segmentation

dellytools · Feb 7, 2024 · c2e2b9b · c2e2b9b
1 parent fe57dd8
commit c2e2b9b
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 10 deletions.
diff --git a/R/rd.R b/R/rd.R
@@ -26,20 +26,27 @@ x$pos = (x$start + x$end) / 2
 baseCN = median(x[,6])
 x$logratio = log2(x[,6] / baseCN)
 
-# Segment logR
-cnaData = CNA(x$logratio, maploc=x$pos, chrom=x$chr, sampleid="Sample", presorted=T)
-cnaSegments = segment(smooth.CNA(cnaData), undo.splits="sdundo", undo.SD=sdUNDO)
-seg = data.frame(segments.summary(cnaSegments))
-colnames(seg) = c("ID", "chr", "start", "end", "num.mark", "seg.mean", "seg.sd", "seg.median", "seg.mad")
-seg$cn = 2^seg$seg.median * baseCN
+# Segmentation
+if (length(args)>1) {
+ # Provided on the command-line
+ seg = read.table(args[2], header=F, sep="\t")
+ colnames(seg) = c("chr", "start", "end", "id", "cn")
+} else {
+ # Segment logR
+ cnaData = CNA(x$logratio, maploc=x$pos, chrom=x$chr, sampleid="Sample", presorted=T)
+ cnaSegments = segment(smooth.CNA(cnaData), undo.splits="sdundo", undo.SD=sdUNDO)
+ seg = data.frame(segments.summary(cnaSegments))
+ colnames(seg) = c("ID", "chr", "start", "end", "num.mark", "seg.mean", "seg.sd", "seg.median", "seg.mad")
+ seg$cn = 2^seg$seg.median * baseCN
+}
 
 # Whole genome
 p = ggplot(data=x, aes(x=start, y=x[,6]))
 p = p + geom_point(pch=21, color="black", fill="black", size=0.5)
 p = p + xlab("Chromosome")
 p = p + ylab("Copy-number")
 p = p + scale_x_continuous(labels=comma)
-p = p + geom_segment(data=seg, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2);
+if (nrow(seg)) { p = p + geom_segment(data=seg, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); }
 p = p + facet_grid(. ~ chr, scales="free_x", space="free_x")
 p = p + scale_y_continuous(labels=comma, breaks = c(minCN:maxCN), limits=c(minCN, maxCN))
 p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
@@ -57,7 +64,7 @@ for(chrname in unique(x$chr)) {
  p = p + ylab("Copy-number") + xlab(chrname)
  p = p + scale_x_continuous(labels=comma, breaks = scales::pretty_breaks(n=20))
  p = p + scale_y_continuous(labels=comma, breaks = c(minCN:maxCN), limits=c(minCN, maxCN))
- p = p + geom_segment(data=sl, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2);
+ if (nrow(sl)) { p = p + geom_segment(data=sl, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); }
  p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
  ggsave(p, file=paste0("plot.", chrname, ".png"), width=24, height=6)
  print(warnings())

diff --git a/README.md b/README.md
@@ -124,8 +124,8 @@ Delly also supports long-reads for SV discovery.
 `delly lr -y pb -o delly.bcf -g hg38.fa input.bam`
 
 
-Read-depth profiles
--------------------
+Read-depth profiles and copy-number variant calling
+---------------------------------------------------
 
 You can generate read-depth profiles with delly. This requires a mappability map which can be downloaded here:
 
@@ -139,6 +139,12 @@ The output file `out.cov.gz` can be plotted using [R](https://www.r-project.org/
 
 `Rscript R/rd.R out.cov.gz`
 
+Instead of segmenting the read-depth information, you can also visualize the CNV calls.
+
+`bcftools query -f "%CHROM\t%POS\t%INFO/END\t%ID[\t%RDCN]\n" out.bcf > seg.bed`
+
+`Rscript R/rd.R out.cov.gz seg.bed`
+
 With `-s` you can output a statistics file with GC bias information.
 
 `delly cnv -g hg38.fa -m hg38.map -c out.cov.gz -o out.bcf -s stats.gz input.bam`