From c2e2b9b8736e24d32f02d62eada0f80d77da32ee Mon Sep 17 00:00:00 2001 From: Tobias Rausch Date: Wed, 7 Feb 2024 14:40:03 +0100 Subject: [PATCH] segmentation --- R/rd.R | 23 +++++++++++++++-------- README.md | 10 ++++++++-- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/R/rd.R b/R/rd.R index 4cb15c9..df55e44 100644 --- a/R/rd.R +++ b/R/rd.R @@ -26,12 +26,19 @@ x$pos = (x$start + x$end) / 2 baseCN = median(x[,6]) x$logratio = log2(x[,6] / baseCN) -# Segment logR -cnaData = CNA(x$logratio, maploc=x$pos, chrom=x$chr, sampleid="Sample", presorted=T) -cnaSegments = segment(smooth.CNA(cnaData), undo.splits="sdundo", undo.SD=sdUNDO) -seg = data.frame(segments.summary(cnaSegments)) -colnames(seg) = c("ID", "chr", "start", "end", "num.mark", "seg.mean", "seg.sd", "seg.median", "seg.mad") -seg$cn = 2^seg$seg.median * baseCN +# Segmentation +if (length(args)>1) { + # Provided on the command-line + seg = read.table(args[2], header=F, sep="\t") + colnames(seg) = c("chr", "start", "end", "id", "cn") +} else { + # Segment logR + cnaData = CNA(x$logratio, maploc=x$pos, chrom=x$chr, sampleid="Sample", presorted=T) + cnaSegments = segment(smooth.CNA(cnaData), undo.splits="sdundo", undo.SD=sdUNDO) + seg = data.frame(segments.summary(cnaSegments)) + colnames(seg) = c("ID", "chr", "start", "end", "num.mark", "seg.mean", "seg.sd", "seg.median", "seg.mad") + seg$cn = 2^seg$seg.median * baseCN +} # Whole genome p = ggplot(data=x, aes(x=start, y=x[,6])) @@ -39,7 +46,7 @@ p = p + geom_point(pch=21, color="black", fill="black", size=0.5) p = p + xlab("Chromosome") p = p + ylab("Copy-number") p = p + scale_x_continuous(labels=comma) -p = p + geom_segment(data=seg, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); +if (nrow(seg)) { p = p + geom_segment(data=seg, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); } p = p + facet_grid(. ~ chr, scales="free_x", space="free_x") p = p + scale_y_continuous(labels=comma, breaks = c(minCN:maxCN), limits=c(minCN, maxCN)) p = p + theme(axis.text.x = element_text(angle=45, hjust=1)) @@ -57,7 +64,7 @@ for(chrname in unique(x$chr)) { p = p + ylab("Copy-number") + xlab(chrname) p = p + scale_x_continuous(labels=comma, breaks = scales::pretty_breaks(n=20)) p = p + scale_y_continuous(labels=comma, breaks = c(minCN:maxCN), limits=c(minCN, maxCN)) - p = p + geom_segment(data=sl, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); + if (nrow(sl)) { p = p + geom_segment(data=sl, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); } p = p + theme(axis.text.x = element_text(angle=45, hjust=1)) ggsave(p, file=paste0("plot.", chrname, ".png"), width=24, height=6) print(warnings()) diff --git a/README.md b/README.md index e77d2e1..96e1b51 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,8 @@ Delly also supports long-reads for SV discovery. `delly lr -y pb -o delly.bcf -g hg38.fa input.bam` -Read-depth profiles -------------------- +Read-depth profiles and copy-number variant calling +--------------------------------------------------- You can generate read-depth profiles with delly. This requires a mappability map which can be downloaded here: @@ -139,6 +139,12 @@ The output file `out.cov.gz` can be plotted using [R](https://www.r-project.org/ `Rscript R/rd.R out.cov.gz` +Instead of segmenting the read-depth information, you can also visualize the CNV calls. + +`bcftools query -f "%CHROM\t%POS\t%INFO/END\t%ID[\t%RDCN]\n" out.bcf > seg.bed` + +`Rscript R/rd.R out.cov.gz seg.bed` + With `-s` you can output a statistics file with GC bias information. `delly cnv -g hg38.fa -m hg38.map -c out.cov.gz -o out.bcf -s stats.gz input.bam`