From 9603f3da79fd8cc63da18d31f8479e304c8b8819 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 10 Dec 2024 16:32:20 +0000 Subject: [PATCH] Fixes with local subworkflow --- subworkflows/local/bam_dedup_umi/main.nf | 78 ++++++++++++++--------- workflows/rnaseq/main.nf | 79 ++++++++++-------------- workflows/rnaseq/nextflow.config | 10 +-- 3 files changed, 88 insertions(+), 79 deletions(-) diff --git a/subworkflows/local/bam_dedup_umi/main.nf b/subworkflows/local/bam_dedup_umi/main.nf index 3359fa7b6..a7f76f9a3 100644 --- a/subworkflows/local/bam_dedup_umi/main.nf +++ b/subworkflows/local/bam_dedup_umi/main.nf @@ -2,21 +2,23 @@ // BAM deduplication with UMI processing // -include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' -include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools' -include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' -include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' +include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools' +include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' workflow BAM_DEDUP_UMI { take: ch_genome_bam // channel: [ val(meta), path(bam), path(bai) ] - ch_fasta // channel: [ path(fasta) ] + ch_fasta // channel: [ val(meta), path(fasta) ] umi_dedup_tool // string: 'umicollapse' or 'umitools' umitools_dedup_stats // boolean: whether to generate UMI-tools dedup stats bam_csi_index // boolean: whether to generate CSI index - ch_transcriptome_bam // - ch_transcript_fasta + ch_transcriptome_bam // channel: [ val(meta), path(bam) ] + ch_transcript_fasta // channel: [ val(meta), path(fasta) ] main: ch_versions = Channel.empty() @@ -27,18 +29,18 @@ workflow BAM_DEDUP_UMI { // Genome BAM deduplication if (umi_dedup_tool == "umicollapse") { - BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE ( + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ( ch_genome_bam ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats } else if (umi_dedup_tool == "umitools") { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS ( + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( ch_genome_bam, umitools_dedup_stats ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog } @@ -51,32 +53,32 @@ workflow BAM_DEDUP_UMI { BAM_SORT_STATS_SAMTOOLS ( ch_transcriptome_bam, - ch_transcript_fasta.map { [ [:], it ] } + ch_transcript_fasta ) ch_sorted_transcriptome_bam = BAM_SORT_STATS_SAMTOOLS.out.bam .join(BAM_SORT_STATS_SAMTOOLS.out.bai) // 2. Transcriptome BAM deduplication if (umi_dedup_tool == "umicollapse") { - BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE ( + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME ( ch_sorted_transcriptome_bam ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE - ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats + UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME + ch_dedup_log = dedup_log.mix(UMI_DEDUP_GENOME.out.dedup_stats) } else if (umi_dedup_tool == "umitools") { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS ( + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( ch_sorted_transcriptome_bam, umitools_dedup_stats ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS - ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog + UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME + ch_dedup_log = ch_dedup_log.mix(UMI_DEDUP_GENOME.out.deduplog) } // 3. Restore name sorting SAMTOOLS_SORT ( UMI_DEDUP_TRANSCRIPTOME.out.bam, - ch_fasta.map { [ [:], it ] } + ch_fasta ) // 4. Run prepare_for_rsem.py on paired-end BAM files @@ -91,28 +93,46 @@ workflow BAM_DEDUP_UMI { return [ meta, bam ] } - UMITOOLS_PREPAREFORSALMON ( + UMITOOLS_PREPAREFORRSEM ( ended_transcriptome_dedup_bam.paired_end .map { meta, bam -> [ meta, bam, [] ] } ) - ch_dedup_transcriptome_bam = ch_transcriptome_bam - .single_end - .mix(UMITOOLS_PREPAREFORSALMON.out.bam) + ch_dedup_transcriptome_bam = ended_transcriptome_dedup_bam.single_end + .mix(UMITOOLS_PREPAREFORRSEM.out.bam) + + // Collect files useful for MultiQC into one helpful emission + + ch_stats = UMI_DEDUP_GENOME.out.stats + .mix(UMI_DEDUP_TRANSCRIPTOME.out.stats) + + ch_flagstat = UMI_DEDUP_GENOME.out.flagstat + .mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat) + + ch_idxstats = UMI_DEDUP_GENOME.out.idxstats + .mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats) + + ch_multiqc_files = ch_dedup_log + .mix(ch_stats) + .mix(ch_flagstat) + .mix(ch_idxstats) + .transpose() + .map{it[1]} // Record versions ch_versions = UMI_DEDUP_GENOME.out.versions .mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - .mix(UMITOOLS_PREPAREFORSALMON.out.versions) + .mix(UMITOOLS_PREPAREFORRSEM.out.versions) emit: bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ] - bam_index = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] + bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] dedup_log = ch_dedup_log // channel: [ val(meta), path(log) ] - stats = UMI_DEDUP_GENOME.out.stats - flagstat = UMI_DEDUP_GENOME.out.flagstat - idxstats = UMI_DEDUP_GENOME.out.idxstats + stats = ch_stats + flagstat = ch_flagstat + idxstats = ch_idxstats + multiqc_files = ch_multiqc_files transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 4328ec3a7..6db5dfa15 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -15,9 +15,11 @@ include { MULTIQC_CUSTOM_BIOTYPE } from '../../modules/local/multiqc // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { ALIGN_STAR } from '../../subworkflows/local/align_star' -include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' -include { BAM_DEDUP_UMI } from '../../subworkflows/local/bam_dedup_umi' +include { ALIGN_STAR } from '../../subworkflows/local/align_star' +include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' +include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_STAR } from '../../subworkflows/local/bam_dedup_umi' +include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_HISAT } from '../../subworkflows/local/bam_dedup_umi' + include { checkSamplesAfterGrouping } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { multiqcTsvFromList } from '../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness' include { getStarPercentMapped } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' @@ -35,7 +37,6 @@ include { methodsDescriptionText } from '../../subworkflows/local/utils_ // MODULE: Installed directly from nf-core/modules // include { DUPRADAR } from '../../modules/nf-core/dupradar' -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort' include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap' include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq' include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie' @@ -43,7 +44,6 @@ include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featu include { KRAKEN2_KRAKEN2 as KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main' include { BRACKEN_BRACKEN as BRACKEN } from '../../modules/nf-core/bracken/bracken/main' include { MULTIQC } from '../../modules/nf-core/multiqc' -include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../../modules/nf-core/umitools/prepareforrsem' include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_FW } from '../../modules/nf-core/bedtools/genomecov' include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_REV } from '../../modules/nf-core/bedtools/genomecov' @@ -220,27 +220,23 @@ workflow RNASEQ { // if (params.with_umi) { - BAM_DEDUP_UMI( + BAM_DEDUP_UMI_STAR( ch_genome_bam.join(ch_genome_bam_index, by: [0]), - ch_fasta, + ch_fasta.map { [ [:], it ] }, params.umi_dedup_tool, params.umitools_dedup_stats, params.bam_csi_index, - BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai, by: [0]) + ch_transcriptome_bam, + ch_transcript_fasta.map { [ [:], it ] } ) - ch_genome_bam = BAM_DEDUP_UMI.out.bam - ch_transcriptome_bam = BAM_DEDUP_UMI.out.bam - ch_genome_bam_index = BAM_DEDUP_UMI.out.bai - ch_versions = BAM_DEDUP_UMI.out.versions - - ch_multiqc_files = ch_multiqc_files - .mix( - BAM_DEDUP_UMI.dedup_log - .concat(BAM_DEDUP_UMI.out.stats) - .concat(BAM_DEDUP_UMI.out.flagstat) - .concat(BAM_DEDUP_UMI.out.idxstats) - ) + ch_genome_bam = BAM_DEDUP_UMI_STAR.out.bam + ch_transcriptome_bam = BAM_DEDUP_UMI_STAR.out.transcriptome_bam + ch_genome_bam_index = BAM_DEDUP_UMI_STAR.out.bai + ch_versions = BAM_DEDUP_UMI_STAR.out.versions + + ch_multiqc_files = ch_multiqc_files + .mix(BAM_DEDUP_UMI_STAR.out.multiqc_files) } // @@ -335,32 +331,25 @@ workflow RNASEQ { // // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs // + if (params.with_umi) { - if (params.umi_dedup_tool == "umicollapse") { - BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.dedup_stats.collect{it[1]}.ifEmpty([])) - } else if (params.umi_dedup_tool == "umitools") { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - params.umitools_dedup_stats - ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.deduplog.collect{it[1]}) - } else { - error("Unknown umi_dedup_tool '${params.umi_dedup_tool}'") - } - ch_genome_bam = UMI_DEDUP_GENOME.out.bam - ch_genome_bam_index = UMI_DEDUP_GENOME.out.bai - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.stats.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.flagstat.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.idxstats.collect{it[1]}) - if (params.bam_csi_index) { - ch_genome_bam_index = UMI_DEDUP_GENOME.out.csi - } - ch_versions = ch_versions.mix(UMI_DEDUP_GENOME.out.versions) + + BAM_DEDUP_UMI_HISAT2( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + ch_fasta.map { [ [:], it ] }, + params.umi_dedup_tool, + params.umitools_dedup_stats, + params.bam_csi_index, + [[],[]], + [[],[]] + ) + + ch_genome_bam = BAM_DEDUP_UMI_HISAT2.out.bam + ch_genome_bam_index = BAM_DEDUP_UMI_HISAT2.out.bai + ch_versions = BAM_DEDUP_UMI_HISAT2.out.versions + + ch_multiqc_files = ch_multiqc_files + .mix(BAM_DEDUP_UMI_HISAT2.out.multiqc_files) } } diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index 02d60e7e3..2eef40849 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -102,7 +102,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { if (params.with_umi) { process { - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):SAMTOOLS_SORT' { ext.args = '-n' ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } publishDir = [ @@ -113,7 +113,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:UMITOOLS_PREPAREFORSALMON' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):UMITOOLS_PREPAREFORRSEM' { ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" } publishDir = [ [ @@ -130,7 +130,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.transcriptome.sorted" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, @@ -140,7 +140,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, @@ -149,7 +149,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}/samtools_stats" : params.outdir },