Skip to content

Commit

Permalink
Fixes with local subworkflow
Browse files Browse the repository at this point in the history
  • Loading branch information
pinin4fjords committed Dec 10, 2024
1 parent f4b76b1 commit 9603f3d
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 79 deletions.
78 changes: 49 additions & 29 deletions subworkflows/local/bam_dedup_umi/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,23 @@
// BAM deduplication with UMI processing
//

include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse'
include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools'
include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools'
include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem'
include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main'
include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse'
include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools'
include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse'
include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools'
include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools'
include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem'
include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main'

workflow BAM_DEDUP_UMI {
take:
ch_genome_bam // channel: [ val(meta), path(bam), path(bai) ]
ch_fasta // channel: [ path(fasta) ]
ch_fasta // channel: [ val(meta), path(fasta) ]
umi_dedup_tool // string: 'umicollapse' or 'umitools'
umitools_dedup_stats // boolean: whether to generate UMI-tools dedup stats
bam_csi_index // boolean: whether to generate CSI index
ch_transcriptome_bam //
ch_transcript_fasta
ch_transcriptome_bam // channel: [ val(meta), path(bam) ]
ch_transcript_fasta // channel: [ val(meta), path(fasta) ]

main:
ch_versions = Channel.empty()
Expand All @@ -27,18 +29,18 @@ workflow BAM_DEDUP_UMI {

// Genome BAM deduplication
if (umi_dedup_tool == "umicollapse") {
BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE (
BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME (
ch_genome_bam
)
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME
ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats

} else if (umi_dedup_tool == "umitools") {
BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS (
BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME (
ch_genome_bam,
umitools_dedup_stats
)
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME
ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog
}

Expand All @@ -51,32 +53,32 @@ workflow BAM_DEDUP_UMI {

BAM_SORT_STATS_SAMTOOLS (
ch_transcriptome_bam,
ch_transcript_fasta.map { [ [:], it ] }
ch_transcript_fasta
)
ch_sorted_transcriptome_bam = BAM_SORT_STATS_SAMTOOLS.out.bam
.join(BAM_SORT_STATS_SAMTOOLS.out.bai)

// 2. Transcriptome BAM deduplication
if (umi_dedup_tool == "umicollapse") {
BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE (
BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME (
ch_sorted_transcriptome_bam
)
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE
ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats
UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME
ch_dedup_log = dedup_log.mix(UMI_DEDUP_GENOME.out.dedup_stats)

} else if (umi_dedup_tool == "umitools") {
BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS (
BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME (
ch_sorted_transcriptome_bam,
umitools_dedup_stats
)
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS
ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog
UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME
ch_dedup_log = ch_dedup_log.mix(UMI_DEDUP_GENOME.out.deduplog)
}

// 3. Restore name sorting
SAMTOOLS_SORT (
UMI_DEDUP_TRANSCRIPTOME.out.bam,
ch_fasta.map { [ [:], it ] }
ch_fasta
)

// 4. Run prepare_for_rsem.py on paired-end BAM files
Expand All @@ -91,28 +93,46 @@ workflow BAM_DEDUP_UMI {
return [ meta, bam ]
}

UMITOOLS_PREPAREFORSALMON (
UMITOOLS_PREPAREFORRSEM (
ended_transcriptome_dedup_bam.paired_end
.map { meta, bam -> [ meta, bam, [] ] }
)

ch_dedup_transcriptome_bam = ch_transcriptome_bam
.single_end
.mix(UMITOOLS_PREPAREFORSALMON.out.bam)
ch_dedup_transcriptome_bam = ended_transcriptome_dedup_bam.single_end
.mix(UMITOOLS_PREPAREFORRSEM.out.bam)

// Collect files useful for MultiQC into one helpful emission

ch_stats = UMI_DEDUP_GENOME.out.stats
.mix(UMI_DEDUP_TRANSCRIPTOME.out.stats)

ch_flagstat = UMI_DEDUP_GENOME.out.flagstat
.mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat)

ch_idxstats = UMI_DEDUP_GENOME.out.idxstats
.mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats)

ch_multiqc_files = ch_dedup_log
.mix(ch_stats)
.mix(ch_flagstat)
.mix(ch_idxstats)
.transpose()
.map{it[1]}

// Record versions

ch_versions = UMI_DEDUP_GENOME.out.versions
.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
.mix(UMITOOLS_PREPAREFORSALMON.out.versions)
.mix(UMITOOLS_PREPAREFORRSEM.out.versions)

emit:
bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ]
bam_index = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ]
bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ]
dedup_log = ch_dedup_log // channel: [ val(meta), path(log) ]
stats = UMI_DEDUP_GENOME.out.stats
flagstat = UMI_DEDUP_GENOME.out.flagstat
idxstats = UMI_DEDUP_GENOME.out.idxstats
stats = ch_stats
flagstat = ch_flagstat
idxstats = ch_idxstats
multiqc_files = ch_multiqc_files
transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}
79 changes: 34 additions & 45 deletions workflows/rnaseq/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ include { MULTIQC_CUSTOM_BIOTYPE } from '../../modules/local/multiqc
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
include { ALIGN_STAR } from '../../subworkflows/local/align_star'
include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem'
include { BAM_DEDUP_UMI } from '../../subworkflows/local/bam_dedup_umi'
include { ALIGN_STAR } from '../../subworkflows/local/align_star'
include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem'
include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_STAR } from '../../subworkflows/local/bam_dedup_umi'
include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_HISAT } from '../../subworkflows/local/bam_dedup_umi'

include { checkSamplesAfterGrouping } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
include { multiqcTsvFromList } from '../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness'
include { getStarPercentMapped } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
Expand All @@ -35,15 +37,13 @@ include { methodsDescriptionText } from '../../subworkflows/local/utils_
// MODULE: Installed directly from nf-core/modules
//
include { DUPRADAR } from '../../modules/nf-core/dupradar'
include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort'
include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap'
include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq'
include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie'
include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts'
include { KRAKEN2_KRAKEN2 as KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main'
include { BRACKEN_BRACKEN as BRACKEN } from '../../modules/nf-core/bracken/bracken/main'
include { MULTIQC } from '../../modules/nf-core/multiqc'
include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../../modules/nf-core/umitools/prepareforrsem'
include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_FW } from '../../modules/nf-core/bedtools/genomecov'
include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_REV } from '../../modules/nf-core/bedtools/genomecov'

Expand Down Expand Up @@ -220,27 +220,23 @@ workflow RNASEQ {
//
if (params.with_umi) {

BAM_DEDUP_UMI(
BAM_DEDUP_UMI_STAR(
ch_genome_bam.join(ch_genome_bam_index, by: [0]),
ch_fasta,
ch_fasta.map { [ [:], it ] },
params.umi_dedup_tool,
params.umitools_dedup_stats,
params.bam_csi_index,
BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai, by: [0])
ch_transcriptome_bam,
ch_transcript_fasta.map { [ [:], it ] }
)

ch_genome_bam = BAM_DEDUP_UMI.out.bam
ch_transcriptome_bam = BAM_DEDUP_UMI.out.bam
ch_genome_bam_index = BAM_DEDUP_UMI.out.bai
ch_versions = BAM_DEDUP_UMI.out.versions

ch_multiqc_files = ch_multiqc_files
.mix(
BAM_DEDUP_UMI.dedup_log
.concat(BAM_DEDUP_UMI.out.stats)
.concat(BAM_DEDUP_UMI.out.flagstat)
.concat(BAM_DEDUP_UMI.out.idxstats)
)
ch_genome_bam = BAM_DEDUP_UMI_STAR.out.bam
ch_transcriptome_bam = BAM_DEDUP_UMI_STAR.out.transcriptome_bam
ch_genome_bam_index = BAM_DEDUP_UMI_STAR.out.bai
ch_versions = BAM_DEDUP_UMI_STAR.out.versions

ch_multiqc_files = ch_multiqc_files
.mix(BAM_DEDUP_UMI_STAR.out.multiqc_files)
}

//
Expand Down Expand Up @@ -335,32 +331,25 @@ workflow RNASEQ {
//
// SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs
//

if (params.with_umi) {
if (params.umi_dedup_tool == "umicollapse") {
BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME (
ch_genome_bam.join(ch_genome_bam_index, by: [0]),
)
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME
ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.dedup_stats.collect{it[1]}.ifEmpty([]))
} else if (params.umi_dedup_tool == "umitools") {
BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME (
ch_genome_bam.join(ch_genome_bam_index, by: [0]),
params.umitools_dedup_stats
)
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME
ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.deduplog.collect{it[1]})
} else {
error("Unknown umi_dedup_tool '${params.umi_dedup_tool}'")
}
ch_genome_bam = UMI_DEDUP_GENOME.out.bam
ch_genome_bam_index = UMI_DEDUP_GENOME.out.bai
ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.stats.collect{it[1]})
ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.flagstat.collect{it[1]})
ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.idxstats.collect{it[1]})
if (params.bam_csi_index) {
ch_genome_bam_index = UMI_DEDUP_GENOME.out.csi
}
ch_versions = ch_versions.mix(UMI_DEDUP_GENOME.out.versions)

BAM_DEDUP_UMI_HISAT2(
ch_genome_bam.join(ch_genome_bam_index, by: [0]),
ch_fasta.map { [ [:], it ] },
params.umi_dedup_tool,
params.umitools_dedup_stats,
params.bam_csi_index,
[[],[]],
[[],[]]
)

ch_genome_bam = BAM_DEDUP_UMI_HISAT2.out.bam
ch_genome_bam_index = BAM_DEDUP_UMI_HISAT2.out.bai
ch_versions = BAM_DEDUP_UMI_HISAT2.out.versions

ch_multiqc_files = ch_multiqc_files
.mix(BAM_DEDUP_UMI_HISAT2.out.multiqc_files)
}
}

Expand Down
10 changes: 5 additions & 5 deletions workflows/rnaseq/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {

if (params.with_umi) {
process {
withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:SAMTOOLS_SORT' {
withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):SAMTOOLS_SORT' {
ext.args = '-n'
ext.prefix = { "${meta.id}.umi_dedup.transcriptome" }
publishDir = [
Expand All @@ -113,7 +113,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
]
}

withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:UMITOOLS_PREPAREFORSALMON' {
withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):UMITOOLS_PREPAREFORRSEM' {
ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" }
publishDir = [
[
Expand All @@ -130,7 +130,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
]
}

withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {
withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}.transcriptome.sorted" }
publishDir = [
path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir },
Expand All @@ -140,7 +140,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
]
}

withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' {
withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' {
publishDir = [
path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir },
mode: params.publish_dir_mode,
Expand All @@ -149,7 +149,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
]
}

withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
ext.prefix = { "${meta.id}.transcriptome.sorted.bam" }
publishDir = [
path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}/samtools_stats" : params.outdir },
Expand Down

0 comments on commit 9603f3d

Please sign in to comment.