diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d94d52ef..1fa69957a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,11 +10,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Special thanks to the following for their contributions to the release: - [Caitlin Winkler](https://github.com/oligomyeggo) +- [Siddhartha Bagaria](https://github.com/siddharthab) ### Enhancements & fixes +- [PR #1369](https://github.com/nf-core/rnaseq/pull/1369) - Add umicollapse as an alternative to umi-tools - [PR #1461](https://github.com/nf-core/rnaseq/pull/1461) - Add FASTQ linting during preprocessing +### Software dependencies + +| Dependency | Old version | New version | +| ------------- | ----------- | ----------- | +| `UMICollapse` | | 1.1.0 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + ## [[3.17.0](https://github.com/nf-core/rnaseq/releases/tag/3.17.0)] - 2024-10-23 ### Credits @@ -1019,14 +1033,14 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi ### Parameters -| Old parameter | New parameter | -| --------------------------- | -------------------------------------- | -| `--fc_extra_attributes` | `--gtf_extra_attributes` | -|  `--fc_group_features` |  `--gtf_group_features` | -|  `--fc_count_type` |  `--gtf_count_type` | -|  `--fc_group_features_type` |  `--gtf_group_features_type` | -|   |  `--singularity_pull_docker_container` | -|  `--skip_featurecounts` |   | +| Old parameter | New parameter | +| -------------------------- | ------------------------------------- | +| `--fc_extra_attributes` | `--gtf_extra_attributes` | +| `--fc_group_features` | `--gtf_group_features` | +| `--fc_count_type` | `--gtf_count_type` | +| `--fc_group_features_type` | `--gtf_group_features_type` | +| | `--singularity_pull_docker_container` | +| `--skip_featurecounts` | | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. @@ -1104,28 +1118,28 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi #### Updated -| Old parameter | New parameter | -| ----------------------------- | --------------------------- | -| `--reads` | `--input` | -|  `--igenomesIgnore` |  `--igenomes_ignore` | -|  `--removeRiboRNA` |  `--remove_ribo_rna` | -|  `--rRNA_database_manifest` |  `--ribo_database_manifest` | -|  `--save_nonrRNA_reads` |  `--save_non_ribo_reads` | -|  `--saveAlignedIntermediates` |  `--save_align_intermeds` | -|  `--saveReference` |  `--save_reference` | -|  `--saveTrimmed` |  `--save_trimmed` | -|  `--saveUnaligned` |  `--save_unaligned` | -|  `--skipAlignment` |  `--skip_alignment` | -|  `--skipBiotypeQC` |  `--skip_biotype_qc` | -|  `--skipDupRadar` |  `--skip_dupradar` | -|  `--skipFastQC` |  `--skip_fastqc` | -|  `--skipMultiQC` |  `--skip_multiqc` | -|  `--skipPreseq` |  `--skip_preseq` | -|  `--skipQC` |  `--skip_qc` | -|  `--skipQualimap` |  `--skip_qualimap` | -|  `--skipRseQC` |  `--skip_rseqc` | -|  `--skipTrimming` |  `--skip_trimming` | -|  `--stringTieIgnoreGTF` |  `--stringtie_ignore_gtf` | +| Old parameter | New parameter | +| ---------------------------- | -------------------------- | +| `--reads` | `--input` | +| `--igenomesIgnore` | `--igenomes_ignore` | +| `--removeRiboRNA` | `--remove_ribo_rna` | +| `--rRNA_database_manifest` | `--ribo_database_manifest` | +| `--save_nonrRNA_reads` | `--save_non_ribo_reads` | +| `--saveAlignedIntermediates` | `--save_align_intermeds` | +| `--saveReference` | `--save_reference` | +| `--saveTrimmed` | `--save_trimmed` | +| `--saveUnaligned` | `--save_unaligned` | +| `--skipAlignment` | `--skip_alignment` | +| `--skipBiotypeQC` | `--skip_biotype_qc` | +| `--skipDupRadar` | `--skip_dupradar` | +| `--skipFastQC` | `--skip_fastqc` | +| `--skipMultiQC` | `--skip_multiqc` | +| `--skipPreseq` | `--skip_preseq` | +| `--skipQC` | `--skip_qc` | +| `--skipQualimap` | `--skip_qualimap` | +| `--skipRseQC` | `--skip_rseqc` | +| `--skipTrimming` | `--skip_trimming` | +| `--stringTieIgnoreGTF` | `--stringtie_ignore_gtf` | #### Added diff --git a/modules.json b/modules.json index d03e252c6..43fdc1f9a 100644 --- a/modules.json +++ b/modules.json @@ -203,6 +203,7 @@ "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", "installed_by": [ + "bam_dedup_stats_samtools_umicollapse", "bam_dedup_stats_samtools_umitools", "bam_markduplicates_picard", "bam_sort_stats_samtools" @@ -271,9 +272,14 @@ "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", "installed_by": ["bedgraph_bedclip_bedgraphtobigwig"] }, + "umicollapse": { + "branch": "master", + "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", + "installed_by": ["bam_dedup_stats_samtools_umicollapse"] + }, "umitools/dedup": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", "installed_by": ["bam_dedup_stats_samtools_umitools"] }, "umitools/extract": { @@ -295,9 +301,14 @@ }, "subworkflows": { "nf-core": { + "bam_dedup_stats_samtools_umicollapse": { + "branch": "master", + "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", + "installed_by": ["subworkflows"] + }, "bam_dedup_stats_samtools_umitools": { "branch": "master", - "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", + "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", "installed_by": ["subworkflows"] }, "bam_markduplicates_picard": { @@ -319,6 +330,7 @@ "branch": "master", "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", "installed_by": [ + "bam_dedup_stats_samtools_umicollapse", "bam_dedup_stats_samtools_umitools", "bam_markduplicates_picard", "bam_sort_stats_samtools" diff --git a/modules/nf-core/umicollapse/environment.yml b/modules/nf-core/umicollapse/environment.yml new file mode 100644 index 000000000..066f55eef --- /dev/null +++ b/modules/nf-core/umicollapse/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::umicollapse=1.1.0 diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf new file mode 100644 index 000000000..aec10e851 --- /dev/null +++ b/modules/nf-core/umicollapse/main.nf @@ -0,0 +1,73 @@ +process UMICOLLAPSE { + tag "$meta.id" + label "process_high" + label "process_high_memory" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umicollapse:1.1.0--hdfd78af_0' : + 'biocontainers/umicollapse:1.1.0--hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(bai) + val(mode) + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*dedup*fastq.gz"), emit: fastq, optional: true + tuple val(meta), path("*_UMICollapse.log"), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.1.0-0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + // Memory allocation: We need to make sure that both heap and stack size is sufficiently large for + // umicollapse. We set the stack size to 5% of the available memory, the heap size to 90% + // which leaves 5% for stuff happening outside of java without the scheduler killing the process. + def max_heap_size_mega = (task.memory.toMega() * 0.9).intValue() + def max_stack_size_mega = 999 //most java jdks will not allow Xss > 1GB, so fixing this to the allowed max + if ( mode !in [ 'fastq', 'bam' ] ) { + error "Mode must be one of 'fastq' or 'bam'." + } + extension = mode.contains("fastq") ? "fastq.gz" : "bam" + """ + # Getting the umicollapse jar file like this because `umicollapse` is a Python wrapper script generated + # by conda that allows to set the heap size (Xmx), but not the stack size (Xss). + # `which` allows us to get the directory that contains `umicollapse`, independent of whether we + # are in a container or conda environment. + UMICOLLAPSE_JAR=\$(dirname \$(which umicollapse))/../share/umicollapse-${VERSION}/umicollapse.jar + java \\ + -Xmx${max_heap_size_mega}M \\ + -Xss${max_stack_size_mega}M \\ + -jar \$UMICOLLAPSE_JAR \\ + $mode \\ + -i ${input} \\ + -o ${prefix}.${extension} \\ + $args | tee ${prefix}_UMICollapse.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umicollapse: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.0-1' + if ( mode !in [ 'fastq', 'bam' ] ) { + error "Mode must be one of 'fastq' or 'bam'." + } + extension = mode.contains("fastq") ? "fastq.gz" : "bam" + """ + touch ${prefix}.dedup.${extension} + touch ${prefix}_UMICollapse.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umicollapse: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/umicollapse/meta.yml b/modules/nf-core/umicollapse/meta.yml new file mode 100644 index 000000000..7c4ebc5ec --- /dev/null +++ b/modules/nf-core/umicollapse/meta.yml @@ -0,0 +1,82 @@ +name: "umicollapse" +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached + to the read. +keywords: + - umicollapse + - deduplication + - genomics +tools: + - "umicollapse": + description: "UMICollapse contains tools for dealing with Unique Molecular Identifiers + (UMIs)/Random Molecular Tags (RMTs)." + homepage: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + documentation: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + tool_dev_url: "https://github.com/siddharthab/UMICollapse" + doi: "10.7717/peerj.8275" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Input bam file + pattern: "*.bam" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. Optionally can be skipped using [] when using FastQ input. + pattern: "*.{bai}" + - - mode: + type: string + description: | + Selects the mode of Umicollapse - either fastq or bam need to be provided. + pattern: "{fastq,bam}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*dedup*fastq.gz": + type: file + description: FASTQ file with deduplicated UMIs. + pattern: "*dedup*fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_UMICollapse.log": + type: file + description: A log file with the deduplication statistics. + pattern: "*_{UMICollapse.log}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@CharlotteAnne" + - "@chris-cheshire" +maintainers: + - "@CharlotteAnne" + - "@chris-cheshire" + - "@apeltzer" + - "@siddharthab" + - "@MatthiasZepper" diff --git a/modules/nf-core/umicollapse/tests/main.nf.test b/modules/nf-core/umicollapse/tests/main.nf.test new file mode 100644 index 000000000..db578775e --- /dev/null +++ b/modules/nf-core/umicollapse/tests/main.nf.test @@ -0,0 +1,177 @@ +nextflow_process { + + name "Test Process UMICOLLAPSE" + script "../main.nf" + process "UMICOLLAPSE" + + tag "modules" + tag "modules_nfcore" + tag "umicollapse" + + test("umicollapse single end test") { + + when { + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getSamLinesMD5(), + process.out.versions).match() } + ) + } + + } + + test("umicollapse paired tests") { + + when { + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getSamLinesMD5(), + process.out.versions).match() } + ) + } + + } + + test("umicollapse fastq test (single-end)") { + + when { + config "./nextflow.config" + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_single.fastq.gz', checkIfExists: true), + [] + ] + input[1] = 'fastq' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.fastq, + process.out.versions).match() } + ) + } + } + + test("umicollapse fastq test (paired-end)") { + + when { + config "./nextflow.config" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_2.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.fastq, + process.out.versions).match() } + ) + } + } + + // Stub tests + + test("umicollapse single end test - stub") { + + options "-stub" + + when { + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + + test("umicollapse paired tests - stub") { + + options "-stub" + + when { + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = 'bam' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/main.nf.test.snap b/modules/nf-core/umicollapse/tests/main.nf.test.snap new file mode 100644 index 000000000..e903c0ce0 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/main.nf.test.snap @@ -0,0 +1,110 @@ +{ + "umicollapse single end test": { + "content": [ + "9158ea6e7a0e54819e25cbac5fbc5cc0", + [ + "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:31:45.024306" + }, + "umicollapse paired tests": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495", + [ + "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:31:52.072799" + }, + "umicollapse fastq test (paired-end)": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dedup.fastq.gz:md5,721a84a2accac988d636e837c60e47bc" + ] + ], + [ + "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T13:57:36.968147" + }, + "umicollapse fastq test (single-end)": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.fastq.gz:md5,2e602ed23eb87f434e4f0a9e491c0310" + ] + ], + [ + "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T13:57:28.328682" + }, + "umicollapse single end test - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T14:09:40.277318" + }, + "umicollapse paired tests - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T14:09:44.224965" + } +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow.config b/modules/nf-core/umicollapse/tests/nextflow.config new file mode 100644 index 000000000..105d8e132 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/modules/nf-core/umicollapse/tests/tags.yml b/modules/nf-core/umicollapse/tests/tags.yml new file mode 100644 index 000000000..912879c4d --- /dev/null +++ b/modules/nf-core/umicollapse/tests/tags.yml @@ -0,0 +1,2 @@ +umicollapse: + - "modules/nf-core/umicollapse/**" diff --git a/modules/nf-core/umitools/dedup/tests/main.nf.test b/modules/nf-core/umitools/dedup/tests/main.nf.test index 883e2d9d7..f00a8cbed 100644 --- a/modules/nf-core/umitools/dedup/tests/main.nf.test +++ b/modules/nf-core/umitools/dedup/tests/main.nf.test @@ -4,6 +4,11 @@ nextflow_process { script "../main.nf" process "UMITOOLS_DEDUP" + tag "modules" + tag "modules_nfcore" + tag "umitools" + tag "umitools/dedup" + test("se - no stats") { config "./nextflow.config" @@ -14,8 +19,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -43,8 +48,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -56,7 +61,7 @@ nextflow_process { { assert process.success }, { assert path("${process.out.log[0][1]}").exists() }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.versions).match() } ) } @@ -72,8 +77,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -85,7 +90,7 @@ nextflow_process { { assert process.success }, { assert path("${process.out.log[0][1]}").exists() }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.tsv_edit_distance, process.out.tsv_per_umi, process.out.tsv_umi_per_position, @@ -107,8 +112,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -136,8 +141,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -165,8 +170,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ diff --git a/modules/nf-core/umitools/dedup/tests/main.nf.test.snap b/modules/nf-core/umitools/dedup/tests/main.nf.test.snap index f7f4e94f1..04b81692a 100644 --- a/modules/nf-core/umitools/dedup/tests/main.nf.test.snap +++ b/modules/nf-core/umitools/dedup/tests/main.nf.test.snap @@ -37,13 +37,14 @@ }, "pe - with stats": { "content": [ + "b7be15ac7aae194b04bdbb56f3534495", [ [ { "id": "test", "single_end": false }, - "test.dedup.bam:md5,350e942a0d45e8356fa24bc8c47dc1ed" + "test.dedup_edit_distance.tsv:md5,c247a49b58768e6e2e86a6c08483e612" ] ], [ @@ -52,7 +53,7 @@ "id": "test", "single_end": false }, - "test.dedup_edit_distance.tsv:md5,65186b0964e2f8d970cc04d736d8b119" + "test.dedup_per_umi.tsv:md5,ced75f7bdbf38bf78f3137d5325a8773" ] ], [ @@ -61,16 +62,7 @@ "id": "test", "single_end": false }, - "test.dedup_per_umi.tsv:md5,8e6783a4a79437b095f095f2aefe7c01" - ] - ], - [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup_per_umi_per_position.tsv:md5,9386db4a104b8e4e32f3ca4a84efa4ac" + "test.dedup_per_umi_per_position.tsv:md5,2e1a12e6f720510880068deddeefe063" ] ], [ @@ -79,9 +71,9 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T11:27:24.231325" + "timestamp": "2024-11-25T17:25:28.939957" }, "se - no stats - stub": { "content": [ @@ -103,36 +95,28 @@ }, "se - no stats": { "content": [ - "a114abd9fccce6fe2869852b5cd18964", + "9158ea6e7a0e54819e25cbac5fbc5cc0", [ "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T13:45:48.553561" + "timestamp": "2024-11-23T09:06:54.373171" }, "pe - no stats": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup.bam:md5,350e942a0d45e8356fa24bc8c47dc1ed" - ] - ], + "b7be15ac7aae194b04bdbb56f3534495", [ "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T11:27:06.957467" + "timestamp": "2024-11-25T17:24:51.423637" } } \ No newline at end of file diff --git a/modules/nf-core/umitools/dedup/tests/tags.yml b/modules/nf-core/umitools/dedup/tests/tags.yml new file mode 100644 index 000000000..5934c5cb7 --- /dev/null +++ b/modules/nf-core/umitools/dedup/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/dedup: + - "modules/nf-core/umitools/dedup/**" diff --git a/nextflow.config b/nextflow.config index 0af7db8c2..bc528efed 100644 --- a/nextflow.config +++ b/nextflow.config @@ -30,6 +30,7 @@ params { with_umi = false skip_umi_extract = false umitools_extract_method = 'string' + umi_dedup_tool = 'umitools' umitools_grouping_method = 'directional' umitools_dedup_stats = false umitools_bc_pattern = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 39dc46d32..5bd25ca0d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -305,6 +305,13 @@ "fa_icon": "fas fa-barcode", "description": "Enable UMI-based read deduplication." }, + "umi_dedup_tool": { + "type": "string", + "default": "umitools", + "description": "Specifies the tool to use for UMI deduplication - available options are 'umitools' and 'umicollapse'.", + "fa_icon": "fas fa-barcode", + "enum": ["umitools", "umicollapse"] + }, "umitools_extract_method": { "type": "string", "default": "string", diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf index aa0dd4ed7..1745f8be6 100644 --- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf @@ -216,6 +216,10 @@ def validateInputParameters() { } } + if (params.with_umi && params.umi_dedup_tool == "umicollapse" && params.umitools_grouping_method !in ['directional', 'adjacency', 'cluster']) { + error("UMI grouping method '${params.umitools_grouping_method}' unsupported for umicollapse, supported methods are 'cluster', 'adjacency' and 'directional'") + } + if (params.skip_alignment) { skipAlignmentWarn() } diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/main.nf b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/main.nf new file mode 100644 index 000000000..54c42b986 --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/main.nf @@ -0,0 +1,55 @@ +// +// umicollapse, index BAM file and run samtools stats, flagstat and idxstats +// + +include { UMICOLLAPSE } from '../../../modules/nf-core/umicollapse/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai/csi) ] + + main: + + ch_versions = Channel.empty() + + // + // umicollapse in bam mode (thus hardcode mode input channel to 'bam') + // + UMICOLLAPSE ( ch_bam_bai, channel.value( 'bam' )) + ch_versions = ch_versions.mix(UMICOLLAPSE.out.versions.first()) + + // + // Index BAM file and run samtools stats, flagstat and idxstats + // + SAMTOOLS_INDEX ( UMICOLLAPSE.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_bam_bai_dedup = UMICOLLAPSE.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + + BAM_STATS_SAMTOOLS ( ch_bam_bai_dedup, [ [:], [] ] ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = UMICOLLAPSE.out.bam // channel: [ val(meta), path(bam) ] + + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + dedup_stats = UMICOLLAPSE.out.log // channel: [ val(meta), path(stats) ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/meta.yml b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/meta.yml new file mode 100644 index 000000000..a24e0448d --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/meta.yml @@ -0,0 +1,59 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_dedup_stats_samtools_umicollapse" +description: umicollapse, index BAM file and run samtools stats, flagstat and idxstats +keywords: + - umi + - dedup + - index + - bam + - sam + - cram +components: + - umicollapse + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - ch_bam_bai: + description: | + input BAM file + Structure: [ val(meta), path(bam), path(bai) ] +output: + - bam: + description: | + Umi deduplicated BAM/SAM file + Structure: [ val(meta), path(bam) ] + - bai: + description: | + Umi deduplicated BAM/SAM samtools index + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI samtools index + Structure: [ val(meta), path(csi) ] + - dedupstats: + description: | + File containing umicollapse deduplication stats + Structure: [ val(meta), path(stats) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@MatthiasZepper" +maintainers: + - "@MatthiasZepper" diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test new file mode 100644 index 000000000..f4f14c71b --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test @@ -0,0 +1,57 @@ +// nf-core subworkflows test bam_dedup_stats_samtools_umicollapse +nextflow_workflow { + + name "Test Subworkflow BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE" + script "../main.nf" + workflow "BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_dedup_stats_samtools_umicollapse" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "bwa/index" + tag "bwa/mem" + tag "samtools" + tag "samtools/index" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + tag "umicollapse" + tag "umitools/extract" + + test("sarscov2_bam_bai") { + + when { + config "./paired-end-umis.config" + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(bam(workflow.out.bam[0][1]).getSamLinesMD5()).match("test_bam_dedup_stats_samtools_umicollapse_bam")}, // separate, because of linting error otherwise + { assert snapshot(workflow.out.versions).match("test_bam_dedup_stats_samtools_umicollapse_versions") }, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert workflow.out.dedup_stats.get(0).get(1) ==~ ".*_UMICollapse.log"}, + { assert snapshot(workflow.out.stats).match("test_bam_dedup_stats_samtools_umicollapse_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_dedup_stats_samtools_umicollapse_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_dedup_stats_samtools_umicollapse_idxstats") } + ) + } + +} +} + diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap new file mode 100644 index 000000000..9694c6d5f --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap @@ -0,0 +1,79 @@ +{ + "test_bam_dedup_stats_samtools_umicollapse_stats": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.stats:md5,498621f92e86d55e4f7ae93170e6e733" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T13:57:02.323104" + }, + "test_bam_dedup_stats_samtools_umicollapse_versions": { + "content": [ + [ + "versions.yml:md5,20605eb79c410c0ed179ba660d82f75b", + "versions.yml:md5,23617661d2c899996bee2b05db027e25", + "versions.yml:md5,657bce03545b4c57f9c5fc4314bf85f7", + "versions.yml:md5,e02a62a393a833778e16542eeed0d148", + "versions.yml:md5,ef00762e264b99ac45713dc0dedf4060" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T18:39:15.637444" + }, + "test_bam_dedup_stats_samtools_umicollapse_flagstats": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.flagstat:md5,18d602435a02a4d721b78d1812622159" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T13:57:02.366866" + }, + "test_bam_dedup_stats_samtools_umicollapse_idxstats": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.idxstats:md5,85d20a901eef23ca50c323638a2eb602" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T13:57:02.410712" + }, + "test_bam_dedup_stats_samtools_umicollapse_bam": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T18:39:15.613319" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config new file mode 100644 index 000000000..8d58a861f --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config @@ -0,0 +1,6 @@ +process { + + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/tags.yml b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/tags.yml new file mode 100644 index 000000000..a3ba5b726 --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_dedup_stats_samtools_umicollapse: + - subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/** diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test index ab541cb88..93e624857 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test @@ -3,6 +3,19 @@ nextflow_workflow { name "Test Workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS" script "../main.nf" workflow "BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_dedup_stats_samtools_umitools" + tag "subworkflows/bam_stats_samtools" + tag "bam_dedup_stats_samtools_umitools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + tag "umitools" + tag "umitools/dedup" test("sarscov2_bam_bai") { @@ -13,8 +26,8 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test'], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) ]) input[1] = val_get_dedup_stats """ @@ -28,6 +41,7 @@ nextflow_workflow { { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, { assert snapshot( + bam(workflow.out.bam[0][1]).getSamLinesMD5(), workflow.out.stats, workflow.out.flagstat, workflow.out.idxstats, @@ -48,8 +62,8 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test'], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) ]) input[1] = val_get_dedup_stats """ diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap index d39f9129b..3b3613572 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap @@ -1,12 +1,13 @@ { "sarscov2_bam_bai": { "content": [ + "b7be15ac7aae194b04bdbb56f3534495", [ [ { "id": "test" }, - "test.stats:md5,84891a894010aeb882c4092db9248d2c" + "test.stats:md5,41ba57a9b90b54587e7d154e5405ea5e" ] ], [ @@ -14,7 +15,7 @@ { "id": "test" }, - "test.flagstat:md5,0bb716e40fae381b97484b58e0b16efe" + "test.flagstat:md5,18d602435a02a4d721b78d1812622159" ] ], [ @@ -22,7 +23,7 @@ { "id": "test" }, - "test.idxstats:md5,1adb27b52d4d64b826f48b59d61dcd4d" + "test.idxstats:md5,85d20a901eef23ca50c323638a2eb602" ] ], [ @@ -34,10 +35,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T08:04:23.444693448" + "timestamp": "2024-11-25T17:23:13.841219" }, "sarscov2_bam_bai - stub": { "content": [ diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/tags.yml b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/tags.yml new file mode 100644 index 000000000..bfd5e023e --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_dedup_stats_samtools_umitools: + - subworkflows/nf-core/bam_dedup_stats_samtools_umitools/** diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index d65912df0..2e0d7d161 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -57,6 +57,8 @@ include { FASTQ_ALIGN_HISAT2 } from '../../subworkflows/nf-core/fa include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' include { BAM_MARKDUPLICATES_PICARD } from '../../subworkflows/nf-core/bam_markduplicates_picard' include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc' +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig' @@ -217,21 +219,32 @@ workflow RNASEQ { // if (params.with_umi) { // Deduplicate genome BAM file before downstream analysis - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - params.umitools_dedup_stats - ) - ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam - ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai - ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.deduplog.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats.collect{it[1]}) + if (params.umi_dedup_tool == "umicollapse") { + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ( + ch_genome_bam.join(ch_genome_bam_index, by: [0]) + ) + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.dedup_stats.collect{it[1]}.ifEmpty([])) + } else if (params.umi_dedup_tool == "umitools") { + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + params.umitools_dedup_stats + ) + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.deduplog.collect{it[1]}) + } else { + error("Unknown umi_dedup_tool '${params.umi_dedup_tool}'") + } + ch_genome_bam = UMI_DEDUP_GENOME.out.bam + ch_genome_bam_index = UMI_DEDUP_GENOME.out.bai + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.stats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.flagstat.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.idxstats.collect{it[1]}) if (params.bam_csi_index) { - ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi + ch_genome_bam_index = UMI_DEDUP_GENOME.out.csi } - ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions) + ch_versions = ch_versions.mix(UMI_DEDUP_GENOME.out.versions) // Co-ordinate sort, index and run stats on transcriptome BAM BAM_SORT_STATS_SAMTOOLS ( @@ -242,14 +255,24 @@ workflow RNASEQ { ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai // Deduplicate transcriptome BAM file before read counting with Salmon - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( - ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]), - params.umitools_dedup_stats - ) + if (params.umi_dedup_tool == "umicollapse") { + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME ( + ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]) + ) + UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME + } else if (params.umi_dedup_tool == "umitools") { + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( + ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]), + params.umitools_dedup_stats + ) + UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME + } else { + error("Unknown umi_dedup_tool '${params.umi_dedup_tool}'") + } // Name sort BAM before passing to Salmon SAMTOOLS_SORT ( - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam, + UMI_DEDUP_TRANSCRIPTOME.out.bam, ch_fasta.map { [ [:], it ] } ) @@ -264,16 +287,16 @@ workflow RNASEQ { paired_end: !meta.single_end return [ meta, bam ] } - .set { ch_umitools_dedup_bam } + .set { ch_dedup_bam } // Fix paired-end reads in name sorted BAM file // See: https://github.com/nf-core/rnaseq/issues/828 UMITOOLS_PREPAREFORSALMON ( - ch_umitools_dedup_bam.paired_end.map { meta, bam -> [ meta, bam, [] ] } + ch_dedup_bam.paired_end.map { meta, bam -> [ meta, bam, [] ] } ) ch_versions = ch_versions.mix(UMITOOLS_PREPAREFORSALMON.out.versions.first()) - ch_umitools_dedup_bam + ch_dedup_bam .single_end .mix(UMITOOLS_PREPAREFORSALMON.out.bam) .set { ch_transcriptome_bam } @@ -372,20 +395,31 @@ workflow RNASEQ { // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs // if (params.with_umi) { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - params.umitools_dedup_stats - ) - ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam - ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai - ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.deduplog.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.stats.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats.collect{it[1]}) + if (params.umi_dedup_tool == "umicollapse") { + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + ) + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.dedup_stats.collect{it[1]}.ifEmpty([])) + } else if (params.umi_dedup_tool == "umitools") { + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + params.umitools_dedup_stats + ) + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.deduplog.collect{it[1]}) + } else { + error("Unknown umi_dedup_tool '${params.umi_dedup_tool}'") + } + ch_genome_bam = UMI_DEDUP_GENOME.out.bam + ch_genome_bam_index = UMI_DEDUP_GENOME.out.bai + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.stats.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.flagstat.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.idxstats.collect{it[1]}) if (params.bam_csi_index) { - ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.csi + ch_genome_bam_index = UMI_DEDUP_GENOME.out.csi } - ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions) + ch_versions = ch_versions.mix(UMI_DEDUP_GENOME.out.versions) } } diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index 726586d32..5c51aa2a9 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -21,6 +21,31 @@ includeConfig "../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/nextf includeConfig "../../subworkflows/nf-core/fastq_subsample_fq_salmon/nextflow.config" includeConfig "../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/nextflow.config" +def umi_dedup_args() { + if (params.umi_dedup_tool == "umicollapse") { + def algo = params.umitools_grouping_method + if (params.umitools_grouping_method == 'directional') { + algo = 'dir' + } else if (params.umitools_grouping_method == 'adjacency') { + algo = 'adj' + } else if (params.umitools_grouping_method == 'cluster') { + algo = 'cc' + } + return { [ + '--two-pass', + meta.single_end ? '' : '--paired --remove-unpaired --remove-chimeric', + params.umitools_grouping_method ? "--algo '${algo}'" : '', + params.umitools_umi_separator ? "--umi-sep '${params.umitools_umi_separator}'" : '' + ].join(' ').trim() } + } else { + return { [ + meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', + params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', + params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' + ].join(' ').trim() } + } +} + // // STAR Salmon alignment options // @@ -134,12 +159,8 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:UMITOOLS_DEDUP' { - ext.args = { [ - meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', - params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', - params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' - ].join(' ').trim() } + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_TRANSCRIPTOME:UMI(COLLAPSE|TOOLS_DEDUP)' { + ext.args = umi_dedup_args() ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted" } publishDir = [ [ @@ -161,7 +182,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:SAMTOOLS_INDEX' { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_TRANSCRIPTOME:SAMTOOLS_INDEX' { publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, @@ -170,7 +191,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_TRANSCRIPTOME:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/samtools_stats" }, @@ -228,12 +249,8 @@ if (!params.skip_alignment) { if (params.with_umi && ['star_salmon','hisat2'].contains(params.aligner)) { process { - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:UMITOOLS_DEDUP' { - ext.args = { [ - meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', - params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', - params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' - ].join(' ').trim() } + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_GENOME:UMI(COLLAPSE|TOOLS_DEDUP)' { + ext.args = umi_dedup_args() ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ [ @@ -255,7 +272,7 @@ if (!params.skip_alignment) { ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:SAMTOOLS_INDEX' { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_GENOME:SAMTOOLS_INDEX' { ext.args = { params.bam_csi_index ? '-c' : '' } ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ @@ -266,7 +283,7 @@ if (!params.skip_alignment) { ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_GENOME:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.umi_dedup.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/samtools_stats" },