Skip to content

Commit

Permalink
segmentation
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasrausch committed Feb 7, 2024
1 parent fe57dd8 commit c2e2b9b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 10 deletions.
23 changes: 15 additions & 8 deletions R/rd.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,27 @@ x$pos = (x$start + x$end) / 2
baseCN = median(x[,6])
x$logratio = log2(x[,6] / baseCN)

# Segment logR
cnaData = CNA(x$logratio, maploc=x$pos, chrom=x$chr, sampleid="Sample", presorted=T)
cnaSegments = segment(smooth.CNA(cnaData), undo.splits="sdundo", undo.SD=sdUNDO)
seg = data.frame(segments.summary(cnaSegments))
colnames(seg) = c("ID", "chr", "start", "end", "num.mark", "seg.mean", "seg.sd", "seg.median", "seg.mad")
seg$cn = 2^seg$seg.median * baseCN
# Segmentation
if (length(args)>1) {
# Provided on the command-line
seg = read.table(args[2], header=F, sep="\t")
colnames(seg) = c("chr", "start", "end", "id", "cn")
} else {
# Segment logR
cnaData = CNA(x$logratio, maploc=x$pos, chrom=x$chr, sampleid="Sample", presorted=T)
cnaSegments = segment(smooth.CNA(cnaData), undo.splits="sdundo", undo.SD=sdUNDO)
seg = data.frame(segments.summary(cnaSegments))
colnames(seg) = c("ID", "chr", "start", "end", "num.mark", "seg.mean", "seg.sd", "seg.median", "seg.mad")
seg$cn = 2^seg$seg.median * baseCN
}

# Whole genome
p = ggplot(data=x, aes(x=start, y=x[,6]))
p = p + geom_point(pch=21, color="black", fill="black", size=0.5)
p = p + xlab("Chromosome")
p = p + ylab("Copy-number")
p = p + scale_x_continuous(labels=comma)
p = p + geom_segment(data=seg, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2);
if (nrow(seg)) { p = p + geom_segment(data=seg, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); }
p = p + facet_grid(. ~ chr, scales="free_x", space="free_x")
p = p + scale_y_continuous(labels=comma, breaks = c(minCN:maxCN), limits=c(minCN, maxCN))
p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
Expand All @@ -57,7 +64,7 @@ for(chrname in unique(x$chr)) {
p = p + ylab("Copy-number") + xlab(chrname)
p = p + scale_x_continuous(labels=comma, breaks = scales::pretty_breaks(n=20))
p = p + scale_y_continuous(labels=comma, breaks = c(minCN:maxCN), limits=c(minCN, maxCN))
p = p + geom_segment(data=sl, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2);
if (nrow(sl)) { p = p + geom_segment(data=sl, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); }
p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
ggsave(p, file=paste0("plot.", chrname, ".png"), width=24, height=6)
print(warnings())
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ Delly also supports long-reads for SV discovery.
`delly lr -y pb -o delly.bcf -g hg38.fa input.bam`


Read-depth profiles
-------------------
Read-depth profiles and copy-number variant calling
---------------------------------------------------

You can generate read-depth profiles with delly. This requires a mappability map which can be downloaded here:

Expand All @@ -139,6 +139,12 @@ The output file `out.cov.gz` can be plotted using [R](https://www.r-project.org/

`Rscript R/rd.R out.cov.gz`

Instead of segmenting the read-depth information, you can also visualize the CNV calls.

`bcftools query -f "%CHROM\t%POS\t%INFO/END\t%ID[\t%RDCN]\n" out.bcf > seg.bed`

`Rscript R/rd.R out.cov.gz seg.bed`

With `-s` you can output a statistics file with GC bias information.

`delly cnv -g hg38.fa -m hg38.map -c out.cov.gz -o out.bcf -s stats.gz input.bam`
Expand Down

0 comments on commit c2e2b9b

Please sign in to comment.