Skip to content

Commit

Permalink
Update ASM scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
NMNS93 committed Jan 24, 2024
1 parent 3e62584 commit 1a86442
Show file tree
Hide file tree
Showing 6 changed files with 362 additions and 13 deletions.
16 changes: 11 additions & 5 deletions R/asm_allele_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ cwrap_asm_get_allele_counts <- function(
# Overlap with SNP loci
bam_dt <- phase_reads_to_snps(bam_dt, snps_gr)
bam_dt <- select_read_snp_pair(bam_dt)
# If both R1 and R2 overlap the same SNP, this code selects one
# This throws away information at CpGs where one read informs of CpGs the other does not.
# As such, we should still avoid double-counting downstream

# Assign alleles using CAMDAC rules
bam_dt[, hap_is_ref := assign_het_allele(hap_bsseq, hap_ref, hap_alt, "ref")]
Expand All @@ -30,9 +33,12 @@ cwrap_asm_get_allele_counts <- function(
# Get haplotype stats
hap_stats <- asm_hap_stats(bam_dt)

# Annotate BAM with loci
# Annotate BAM with CpG and SNP loci
bam_dt <- annotate_bam_with_loci_asm(bam_dt, loci_dt, drop_ccgg, paired_end)

# For each CpG site, only one read can be counted
bam_dt <- fix_pe_overlap_at_loci(bam_dt)

# Get qname to cpg mapping
qname_hap_cg <- unique(bam_dt[, .(qname, hap_id, chrom=chrom, start=start, end=end)])
qname_hap_cg$chrom = gsub("chr", "", qname_hap_cg$chrom)
Expand Down Expand Up @@ -91,12 +97,12 @@ haps_as_numeric <- function(v) {
# FUTURE: Select based on counts
select_read_snp_pair <- function(bam_dt) {
# Reads may map to multiple SNPs.
# Ensure each read is represented only once.
unique(bam_dt, by = "qname")
# Ensure each read (R1 and R2) is represented only once, selecting the SNP with the highest coverage.
unique(bam_dt, by = c("qname", "flag"))
}

phase_reads_to_snps <- function(bam_dt, snps_gr) {
# Find overlapping read pairs between BAM and SNPs
# Find overlap between reads and SNP pairs
snps_dt <- data.table(data.frame(snps_gr))
setnames("seqnames", "chrom", x = snps_dt)
setkey(bam_dt, chrom, start, end)
Expand Down Expand Up @@ -181,7 +187,7 @@ asm_hap_stats <- function(bam_dt) {
bam_dt <- bam_dt[hap_is_ref == T | hap_is_alt == T]

# Count reads aligned to input haplotype/SNP
stats <- bam_dt[, .(hap_BAF = sum(hap_allele == hap_alt) / .N, hap_reads = .N), by = c("chrom", "hap_ref", "hap_alt", "hap_POS", "hap_id")]
stats <- bam_dt[, .(hap_BAF = sum(hap_is_alt) / .N, hap_reads = .N), by = c("chrom", "hap_ref", "hap_alt", "hap_POS", "hap_id")]
stats <- merge(stats, unexp_dt, all.x = T)

# Ensure BAM chrom field fits expected format for downstream joins
Expand Down
9 changes: 9 additions & 0 deletions R/asm_cmain.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ cmain_asm_make_methylation <- function(sample, config) {
}


fread_chrom_if_char = function(x) {
if (is.character(x)) {
fread_chrom(x)
} else {
x
}
}


cmain_asm_make_snps <- function(tumor, germline, infiltrates, origin, config) {
# Check that ASM snps file is availabe for tumor. If so, return NULL
asm_snps_file <- get_fpath(tumor, config, "asm_snps")
Expand Down
1 change: 0 additions & 1 deletion R/asm_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ asm_pipeline <- function(tumor, germline = NULL, infiltrates = NULL, origin = NU
# Run ASM deconvolution
cmain_asm_deconvolve(tumor, infiltrates, config)

# TODO: How are CG-SNPs handled at allele counting stage for ASM?
# Run ASM differential methylation within-sample
for (s in sample_list) {
if (!is.null(s)) {
Expand Down
15 changes: 8 additions & 7 deletions R/cmain.R
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,9 @@ cmain_run_ascat <- function(tumour, config) {
cna_settings <- config$cna_settings

# Set Rho and Psi to NA if not given (required by ASCAT)
if (!is.null(cna_settings$ascat_purity) & !is.null(cna_settings$ascat_ploidy)) {
preset_rho <- cna_settings$ascat_purity
preset_psi <- cna_settings$ascat_ploidy
if (!is.null(cna_settings$rho) & !is.null(cna_settings$psi)) {
preset_rho <- cna_settings$rho
preset_psi <- cna_settings$psi
} else {
preset_rho <- NA
preset_psi <- NA
Expand Down Expand Up @@ -329,11 +329,12 @@ cmain_run_battenberg <- function(tumour, config) {
# Set beagle software path. CAMDAC config creation fits by default.
beaglejar <- config$beaglejar

# Set default RHO and PSI based on config
if (!is.null(config$ascat_rho_manual) & !is.null(config$ascat_psi_manual)) {
# Set default RHO (purity) and PSI (ploidy) based on config
cna_settings = config$cna_settings
if (!is.null(cna_settings$rho) & !is.null(cna_settings$psi)) {
use_preset_rho_psi <- T
preset_rho <- config$ascat_rho_manual
preset_psi <- config$ascat_psi_manual
preset_rho <- cna_settings$rho
preset_psi <- cna_settings$psi
} else {
use_preset_rho_psi <- F
preset_rho <- NA
Expand Down
Loading

0 comments on commit 1a86442

Please sign in to comment.