From 1d00a7ed381756ff019371af4695cf63157f953c Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 25 Apr 2024 19:43:58 +0300 Subject: [PATCH 01/17] Added minimap2 as aligner --- CITATIONS.md | 4 + conf/base.config | 2 +- conf/modules/aligner.config | 7 +- conf/modules/prepare_genome.config | 11 + docs/output.md | 5 + docs/usage.md | 3 +- main.nf | 103 ++-- modules.json | 10 + .../nf-core/minimap2/align/environment.yml | 9 + modules/nf-core/minimap2/align/main.nf | 63 +++ modules/nf-core/minimap2/align/meta.yml | 75 +++ .../nf-core/minimap2/align/tests/main.nf.test | 181 +++++++ .../minimap2/align/tests/main.nf.test.snap | 69 +++ modules/nf-core/minimap2/align/tests/tags.yml | 2 + .../nf-core/minimap2/index/environment.yml | 7 + modules/nf-core/minimap2/index/main.nf | 44 ++ modules/nf-core/minimap2/index/meta.yml | 43 ++ .../nf-core/minimap2/index/tests/main.nf.test | 32 ++ .../minimap2/index/tests/main.nf.test.snap | 68 +++ modules/nf-core/minimap2/index/tests/tags.yml | 2 + nextflow_schema.json | 9 +- .../main.nf | 9 +- workflows/sarek/main.nf | 451 +++++++++++++++--- 23 files changed, 1078 insertions(+), 131 deletions(-) create mode 100644 modules/nf-core/minimap2/align/environment.yml create mode 100644 modules/nf-core/minimap2/align/main.nf create mode 100644 modules/nf-core/minimap2/align/meta.yml create mode 100644 modules/nf-core/minimap2/align/tests/main.nf.test create mode 100644 modules/nf-core/minimap2/align/tests/main.nf.test.snap create mode 100644 modules/nf-core/minimap2/align/tests/tags.yml create mode 100644 modules/nf-core/minimap2/index/environment.yml create mode 100644 modules/nf-core/minimap2/index/main.nf create mode 100644 modules/nf-core/minimap2/index/meta.yml create mode 100644 modules/nf-core/minimap2/index/tests/main.nf.test create mode 100644 modules/nf-core/minimap2/index/tests/main.nf.test.snap create mode 100644 modules/nf-core/minimap2/index/tests/tags.yml rename subworkflows/local/{fastq_align_bwamem_mem2_dragmap_sentieon => fastq_align_bwamem_mem2_dragmap_minimap2_sentieon}/main.nf (83%) diff --git a/CITATIONS.md b/CITATIONS.md index 632a65b795..e2570438f6 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -92,6 +92,10 @@ > Chen X, Schulz-Trieglaff O, Shaw R, et al.: Manta: rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics. 2016 Apr 15;32(8):1220-2. doi: 10.1093/bioinformatics/btv710. PubMed PMID: 26647377. +- [Minimap2](https://github.com/lh3/minimap2) + + > Li, H. Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics. 2018 May 10;34:3094-3100. doi:10.1093/bioinformatics/bty191. PubMed PMID: 29750242; PubMed Central PMCID: PMC6137996. + - [Mosdepth](https://academic.oup.com/bioinformatics/article/34/5/867/4583630) > Brent S Pedersen, Aaron R Quinlan, Mosdepth: quick coverage calculation for genomes and exomes, Bioinformatics, Volume 34, Issue 5, 01 March 2018, Pages 867–868. doi: 10.1093/bioinformatics/btx699. PubMed PMID: 29096012. PubMed Central PMCID: PMC6030888. diff --git a/conf/base.config b/conf/base.config index d371e9407f..bec2b779f4 100644 --- a/conf/base.config +++ b/conf/base.config @@ -66,7 +66,7 @@ process { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } } - withName: 'BWAMEM1_MEM|BWAMEM2_MEM' { + withName: 'BWAMEM1_MEM|BWAMEM2_MEM|MINIMAP2_ALIGN' { cpus = { check_max( 24 * task.attempt, 'cpus' ) } memory = { check_max( 30.GB * task.attempt, 'memory' ) } } diff --git a/conf/modules/aligner.config b/conf/modules/aligner.config index 5f44e199b0..6d4a5c36a6 100644 --- a/conf/modules/aligner.config +++ b/conf/modules/aligner.config @@ -29,11 +29,16 @@ process { ext.when = { params.aligner == 'dragmap' } } + withName: 'MINIMAP2_ALIGN' { + ext.args = { "-ax sr -R ${meta.read_group}" } + ext.when = { params.aligner == 'minimap2' } + } + withName: 'SENTIEON_BWAMEM' { ext.when = { params.aligner == 'sentieon-bwamem' } } - withName: 'BWAMEM.*_MEM|DRAGMAP_ALIGN|SENTIEON_BWAMEM' { + withName: 'BWAMEM.*_MEM|DRAGMAP_ALIGN|MINIMAP2_ALIGN|SENTIEON_BWAMEM' { ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } publishDir = [ mode: params.publish_dir_mode, diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index e54138538c..aad4c982ad 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -76,6 +76,17 @@ process { ] } + withName: 'MINIMAP2_INDEX' { + ext.args = { "-x sr" } + ext.when = { !params.minimap2 && params.step == "mapping" && params.aligner == "minimap2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/minimap2" }, + pattern: "*mmi", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + withName: 'MSISENSORPRO_SCAN' { ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') } publishDir = [ diff --git a/docs/output.md b/docs/output.md index ff6445e89b..a272ddb71a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -20,6 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [BWA](#bwa) - [BWA-mem2](#bwa-mem2) - [DragMap](#dragmap) + - [Minimap2](#minimap2) - [Sentieon BWA mem](#sentieon-bwa-mem) - [Mark Duplicates](#mark-duplicates) - [GATK MarkDuplicates (Spark)](#gatk-markduplicates-spark) @@ -174,6 +175,10 @@ These files are intermediate and by default not placed in the output-folder kept These files are intermediate and by default not placed in the output-folder kept in the final files delivered to users. Set `--save_mapped` to enable publishing, furthermore add the flag `save_output_as_bam` for publishing in BAM format. +### Minimap2 + +[Minimap2](https://github.com/lh3/minimap2) is a versatile pairwise aligner for genomic and spliced nucleotide sequences. The aligned reads are then coordinate-sorted (or name-sorted if [`GATK MarkDuplicatesSpark`](https://gatk.broadinstitute.org/hc/en-us/articles/5358833264411-MarkDuplicatesSpark) is used for duplicate marking) with [samtools](https://www.htslib.org/doc/samtools.html). + #### Sentieon BWA mem Sentieon [bwa mem](https://support.sentieon.com/manual/usages/general/#bwa-mem-syntax) is a subroutine for mapping low-divergent sequences against a large reference genome. It is part of the proprietary software package [DNAseq](https://www.sentieon.com/detailed-description-of-pipelines/#dnaseq) from [Sentieon](https://www.sentieon.com/). diff --git a/docs/usage.md b/docs/usage.md index fbd3e8d9a6..337d168a3c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -843,8 +843,9 @@ For GATK.GRCh38 the links for each reference file and the corresponding processe | known_indels | BaseRecalibrator(Spark), FilterVariantTranches | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | | known_indels_tbi | BaseRecalibrator(Spark), FilterVariantTranches | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | | known_snps | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | -| known_snps_tbi | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | +| known_snps_tbi | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | | mappability | ControlFREEC | http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip | http://boevalab.inf.ethz.ch/FREEC/tutorial.html | +| minimap2 | Minimap2 | minimap2 -x sr -d ${fasta.baseName} $fasta | | | pon | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- | | pon_tbi | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- | diff --git a/main.nf b/main.nf index 8b7940a558..342b7c980f 100755 --- a/main.nf +++ b/main.nf @@ -20,6 +20,19 @@ */ nextflow.enable.dsl = 2 + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { SAREK } from './workflows/sarek' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' + +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_sarek_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES @@ -51,6 +64,7 @@ params.known_snps = getGenomeAttribute('known_snps') params.known_snps_tbi = getGenomeAttribute('known_snps_tbi') params.known_snps_vqsr = getGenomeAttribute('known_snps_vqsr') params.mappability = getGenomeAttribute('mappability') +params.minimap2 = getGenomeAttribute('minimap2') params.ngscheckmate_bed = getGenomeAttribute('ngscheckmate_bed') params.pon = getGenomeAttribute('pon') params.pon_tbi = getGenomeAttribute('pon_tbi') @@ -105,6 +119,25 @@ vep_cache_version = params.vep_cache_version ?: Channel.empty() vep_genome = params.vep_genome ?: Channel.empty() vep_species = params.vep_species ?: Channel.empty() +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GATK.GRCh38 -profile docker --outdir results" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters + vep_extra_files = [] if (params.dbnsfp && params.dbnsfp_tbi) { @@ -125,10 +158,13 @@ if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && p ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { SAREK } from './workflows/sarek' + // WORKFLOW: Run main nf-core/sarek analysis pipeline workflow NFCORE_SAREK { + take: - samplesheet + samplesheet // channel: samplesheet read in from --input main: versions = Channel.empty() @@ -160,11 +196,12 @@ workflow NFCORE_SAREK { : PREPARE_GENOME.out.bwamem2 dragmap = params.dragmap ? Channel.fromPath(params.dragmap).map{ it -> [ [id:'dragmap'], it ] }.collect() : PREPARE_GENOME.out.hashtable + minimap2 = params.dragmap ? Channel.fromPath(params.minimap2).collect() + : PREPARE_GENOME.out.minimap2 // Gather index for mapping given the chosen aligner - index_alignement = (aligner == "bwa-mem" || aligner == "sentieon-bwamem") ? bwa : - aligner == "bwa-mem2" ? bwamem2 : - dragmap + index_alignement = (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem") ? bwa : + params.aligner == "bwa-mem2" ? bwamem2 : params.aligner == "dragmap" ? dragmap : minimap2 // TODO: add a params for msisensorpro_scan msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan @@ -262,55 +299,13 @@ workflow NFCORE_SAREK { // // WORKFLOW: Run pipeline // - SAREK(samplesheet, - allele_files, - bcftools_annotations, - bcftools_annotations_tbi, - bcftools_header_lines, - cf_chrom_len, - chr_files, - cnvkit_reference, - dbsnp, - dbsnp_tbi, - dbsnp_vqsr, - dict, - fasta, - fasta_fai, - gc_file, - germline_resource, - germline_resource_tbi, - index_alignement, - intervals_and_num_intervals, - intervals_bed_combined, - intervals_bed_combined_for_variant_calling, - intervals_bed_gz_tbi_and_num_intervals, - intervals_bed_gz_tbi_combined, - intervals_for_preprocessing, - known_indels_vqsr, - known_sites_indels, - known_sites_indels_tbi, - known_sites_snps, - known_sites_snps_tbi, - known_snps_vqsr, - loci_files, - mappability, - msisensorpro_scan, - ngscheckmate_bed, - pon, - pon_tbi, - rt_file, - sentieon_dnascope_model, - snpeff_cache, - vep_cache, - vep_cache_version, - vep_extra_files, - vep_fasta, - vep_genome, - vep_species + SAREK ( + samplesheet ) emit: multiqc_report = SAREK.out.multiqc_report // channel: /path/to/multiqc_report.html + } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -318,6 +313,8 @@ workflow NFCORE_SAREK { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 workflow { main: @@ -325,7 +322,7 @@ workflow { // // SUBWORKFLOW: Run initialisation tasks // - PIPELINE_INITIALISATION( + PIPELINE_INITIALISATION ( params.version, params.help, params.validate_params, @@ -338,12 +335,14 @@ workflow { // // WORKFLOW: Run main workflow // - NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet) + NFCORE_SAREK ( + PIPELINE_INITIALISATION.out.samplesheet + ) // // SUBWORKFLOW: Run completion tasks // - PIPELINE_COMPLETION( + PIPELINE_COMPLETION ( params.email, params.email_on_fail, params.plaintext_email, diff --git a/modules.json b/modules.json index a3f3a8fd65..f815caf248 100644 --- a/modules.json +++ b/modules.json @@ -314,6 +314,16 @@ "git_sha": "8731a6221dd10fd9039e18518b390b43e14ef9ae", "installed_by": ["modules"] }, + "minimap2/align": { + "branch": "master", + "git_sha": "72e277acfd9e61a9f1368eafb4a9e83f5bcaa9f5", + "installed_by": ["modules"] + }, + "minimap2/index": { + "branch": "master", + "git_sha": "72e277acfd9e61a9f1368eafb4a9e83f5bcaa9f5", + "installed_by": ["modules"] + }, "mosdepth": { "branch": "master", "git_sha": "30d3ca4346ae38f0de821c57a9c517b8b0b135d6", diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml new file mode 100644 index 0000000000..051ca8efba --- /dev/null +++ b/modules/nf-core/minimap2/align/environment.yml @@ -0,0 +1,9 @@ +name: minimap2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::minimap2=2.28 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf new file mode 100644 index 0000000000..62349edc22 --- /dev/null +++ b/modules/nf-core/minimap2/align/main.nf @@ -0,0 +1,63 @@ +process MINIMAP2_ALIGN { + tag "$meta.id" + label 'process_high' + + // Note: the versions here need to match the versions used in the mulled container below and minimap2/index + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(reference) + val bam_format + val cigar_paf_format + val cigar_bam + + output: + tuple val(meta), path("*.paf"), optional: true, emit: paf + tuple val(meta), path("*.bam"), optional: true, emit: bam + tuple val(meta), path("*.csi"), optional: true, emit: csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + """ + minimap2 \\ + $args \\ + -t $task.cpus \\ + ${reference ?: reads} \\ + $reads \\ + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + """ + touch $output_file + touch ${prefix}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml new file mode 100644 index 0000000000..408522d5c9 --- /dev/null +++ b/modules/nf-core/minimap2/align/meta.yml @@ -0,0 +1,75 @@ +name: minimap2_align +description: A versatile pairwise aligner for genomic and spliced nucleotide sequences +keywords: + - align + - fasta + - fastq + - genome + - paf + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - paf: + type: file + description: Alignment in PAF format + pattern: "*.paf" + - bam: + type: file + description: Alignment in BAM format + pattern: "*.bam" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" +maintainers: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test new file mode 100644 index 0000000000..83cceeab19 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -0,0 +1,181 @@ +nextflow_process { + + name "Test Process MINIMAP2_ALIGN" + script "../main.nf" + process "MINIMAP2_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/align" + + test("sarscov2 - fastq, fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.paf[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap new file mode 100644 index 0000000000..19a8f20412 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -0,0 +1,69 @@ +{ + "sarscov2 - fastq, fasta, true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:18.939731126" + }, + "sarscov2 - fastq, fasta, true, false, false - stub": { + "content": [ + "test.bam", + "test.csi", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:34.275879844" + }, + "sarscov2 - fastq, fasta, false, false, false - stub": { + "content": [ + "test.paf", + "test.csi", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:39.227958138" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:24.265054877" + }, + "sarscov2 - fastq, [], true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:29.27901773" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml new file mode 100644 index 0000000000..39dba37441 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/align: + - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/minimap2/index/environment.yml b/modules/nf-core/minimap2/index/environment.yml new file mode 100644 index 0000000000..8a912a1213 --- /dev/null +++ b/modules/nf-core/minimap2/index/environment.yml @@ -0,0 +1,7 @@ +name: minimap2_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::minimap2=2.28 diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf new file mode 100644 index 0000000000..383202142c --- /dev/null +++ b/modules/nf-core/minimap2/index/main.nf @@ -0,0 +1,44 @@ +process MINIMAP2_INDEX { + label 'process_low' + + // Note: the versions here need to match the versions used in minimap2/align + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/minimap2:2.28--he4a0461_0' : + 'biocontainers/minimap2:2.28--he4a0461_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.mmi"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + minimap2 \\ + -t $task.cpus \\ + -d ${fasta.baseName}.mmi \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.mmi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml new file mode 100644 index 0000000000..1d29e3f2d6 --- /dev/null +++ b/modules/nf-core/minimap2/index/meta.yml @@ -0,0 +1,43 @@ +name: minimap2_index +description: Provides fasta index required by minimap2 alignment. +keywords: + - index + - fasta + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + Reference database in FASTA format. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Minimap2 fasta index. + pattern: "*.mmi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yuukiiwa" + - "@drpatelh" +maintainers: + - "@yuukiiwa" + - "@drpatelh" diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test b/modules/nf-core/minimap2/index/tests/main.nf.test new file mode 100644 index 0000000000..97840ff75d --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process MINIMAP2_INDEX" + script "../main.nf" + process "MINIMAP2_INDEX" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/index" + + test("minimap2 index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test.snap b/modules/nf-core/minimap2/index/tests/main.nf.test.snap new file mode 100644 index 0000000000..0b0988283e --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test_ref" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "1": [ + "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0" + ], + "index": [ + [ + { + "id": "test_ref" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "versions": [ + "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T11:46:30.000058092" + }, + "minimap2 index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "1": [ + "versions.yml:md5,2f8340380c6741e9261a284262a90bde" + ], + "index": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "versions": [ + "versions.yml:md5,2f8340380c6741e9261a284262a90bde" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:58:29.828187662" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/tests/tags.yml b/modules/nf-core/minimap2/index/tests/tags.yml new file mode 100644 index 0000000000..e5ef8e19f3 --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/index: + - modules/nf-core/minimap2/index/** diff --git a/nextflow_schema.json b/nextflow_schema.json index 0c58c57655..aabcce9057 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -216,7 +216,7 @@ "type": "string", "default": "bwa-mem", "fa_icon": "fas fa-puzzle-piece", - "enum": ["bwa-mem", "bwa-mem2", "dragmap", "sentieon-bwamem"], + "enum": ["bwa-mem", "bwa-mem2", "dragmap", "minimap2", "sentieon-bwamem"], "description": "Specify aligner to be used to map reads to reference genome.", "help_text": "Sarek will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf DragMap is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. For more info see [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode)." }, @@ -733,6 +733,13 @@ "hidden": true, "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, + "minimap2": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to minimap2 genome index file.", + "hidden": true, + "help_text": "If you use AWS iGenomes, this has NOT already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the known FASTA file. Combine with `--save_reference` to save for future runs." + }, "ngscheckmate_bed": { "type": "string", "fa_icon": "fas fa-file", diff --git a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf similarity index 83% rename from subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf rename to subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf index 0a711afc5e..81be7750c7 100644 --- a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf +++ b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf @@ -7,9 +7,10 @@ include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' include { BWA_MEM as BWAMEM1_MEM } from '../../../modules/nf-core/bwa/mem/main' include { DRAGMAP_ALIGN } from '../../../modules/nf-core/dragmap/align/main' +include { MINIMAP2_ALIGN } from '../../../modules/nf-core/minimap2/align/main' include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' -workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON { +workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON { take: reads // channel: [mandatory] meta, reads index // channel: [mandatory] index @@ -22,10 +23,14 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON { versions = Channel.empty() reports = Channel.empty() + cigar_paf_format = false + cigar_bam = false + // Only one of the following should be run BWAMEM1_MEM(reads, index, [[id:'no_fasta'], []], sort) // If aligner is bwa-mem BWAMEM2_MEM(reads, index, [[id:'no_fasta'], []], sort) // If aligner is bwa-mem2 DRAGMAP_ALIGN(reads, index, [[id:'no_fasta'], []], sort) // If aligner is dragmap + MINIMAP2_ALIGN(reads, index, [[id:'no_fasta'], []], sort, cigar_paf_format, cigar_bam) // If aligner is minimap2 // The sentieon-bwamem-module does sorting as part of the conversion from sam to bam. SENTIEON_BWAMEM(reads, index, fasta, fasta_fai) // If aligner is sentieon-bwamem @@ -35,6 +40,7 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON { bam = bam.mix(BWAMEM1_MEM.out.bam) bam = bam.mix(BWAMEM2_MEM.out.bam) bam = bam.mix(DRAGMAP_ALIGN.out.bam) + bam = bam.mix(MINIMAP2_ALIGN.out.bam) bam = bam.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bam ] }) bai = SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bai ] } @@ -46,6 +52,7 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON { versions = versions.mix(BWAMEM1_MEM.out.versions) versions = versions.mix(BWAMEM2_MEM.out.versions) versions = versions.mix(DRAGMAP_ALIGN.out.versions) + versions = versions.mix(MINIMAP2_ALIGN.out.versions) versions = versions.mix(SENTIEON_BWAMEM.out.versions) emit: diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index 5062470373..e95bc5e809 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -4,82 +4,206 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_sarek_pipeline' +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, + params.bwa, + params.bwamem2, + params.bcftools_annotations, + params.bcftools_annotations_tbi, + params.bcftools_header_lines, + params.cf_chrom_len, + params.chr_dir, + params.cnvkit_reference, + params.dbnsfp, + params.dbnsfp_tbi, + params.dbsnp, + params.dbsnp_tbi, + params.dict, + params.dragmap, + params.fasta, + params.fasta_fai, + params.germline_resource, + params.germline_resource_tbi, + params.input, + params.intervals, + params.known_indels, + params.known_indels_tbi, + params.known_snps, + params.known_snps_tbi, + params.mappability, + params.minimap2, + params.multiqc_config, + params.ngscheckmate_bed, + params.pon, + params.pon_tbi, + params.sentieon_dnascope_model, + params.spliceai_indel, + params.spliceai_indel_tbi, + params.spliceai_snv, + params.spliceai_snv_tbi +] + +// only check if we are using the tools +if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache) +if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Initialize file channels based on params, defined in the params.genomes[params.genome] scope +bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty() +bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty() +cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [] +dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) +fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() +fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() +germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input +known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) +known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) +mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) +pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) +sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) + +// Initialize value channels based on params, defined in the params.genomes[params.genome] scope +ascat_genome = params.ascat_genome ?: Channel.empty() +dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty() +known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty() +known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty() +ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty() +snpeff_db = params.snpeff_db ?: Channel.empty() +vep_cache_version = params.vep_cache_version ?: Channel.empty() +vep_genome = params.vep_genome ?: Channel.empty() +vep_species = params.vep_species ?: Channel.empty() + + +vep_extra_files = [] + +if (params.dbnsfp && params.dbnsfp_tbi) { + vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) + vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) +} + +if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { + vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL/NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ // Create samplesheets to restart from different steps -include { CHANNEL_ALIGN_CREATE_CSV } from '../../subworkflows/local/channel_align_create_csv/main' -include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../../subworkflows/local/channel_markduplicates_create_csv/main' -include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../../subworkflows/local/channel_baserecalibrator_create_csv/main' -include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../../subworkflows/local/channel_applybqsr_create_csv/main' -include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../../subworkflows/local/channel_variant_calling_create_csv/main' +include { SAMPLESHEET_TO_CHANNEL } from '../subworkflows/local/samplesheet_to_channel/main' +include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main' +include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main' +include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main' +include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main' +include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main' + +// Download cache for SnpEff/VEP if needed +include { DOWNLOAD_CACHE_SNPEFF_VEP } from '../subworkflows/local/download_cache_snpeff_vep/main' + +// Initialize annotation cache +include { INITIALIZE_ANNOTATION_CACHE } from '../subworkflows/local/initialize_annotation_cache/main' + +// Build indices if needed +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main' + +// Build intervals if needed +include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals/main' + +// Build CNVkit reference if needed +include { PREPARE_REFERENCE_CNVKIT } from '../subworkflows/local/prepare_reference_cnvkit/main' // Convert BAM files to FASTQ files -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../../subworkflows/local/bam_convert_samtools/main' -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../../subworkflows/local/bam_convert_samtools/main' +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../subworkflows/local/bam_convert_samtools/main' +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../subworkflows/local/bam_convert_samtools/main' // Run FASTQC -include { FASTQC } from '../../modules/nf-core/fastqc/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' // TRIM/SPLIT FASTQ Files -include { FASTP } from '../../modules/nf-core/fastp/main' +include { FASTP } from '../modules/nf-core/fastp/main' // Create umi consensus bams from fastq -include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../../subworkflows/local/fastq_create_umi_consensus_fgbio/main' +include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../subworkflows/local/fastq_create_umi_consensus_fgbio/main' // Map input reads to reference genome -include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main' +include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON } from '../subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main' // Merge and index BAM files (optional) -include { BAM_MERGE_INDEX_SAMTOOLS } from '../../subworkflows/local/bam_merge_index_samtools/main' +include { BAM_MERGE_INDEX_SAMTOOLS } from '../subworkflows/local/bam_merge_index_samtools/main' // Convert BAM files -include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core/samtools/convert/main' // Convert CRAM files (optional) -include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main' // Mark Duplicates (+QC) -include { BAM_MARKDUPLICATES } from '../../subworkflows/local/bam_markduplicates/main' -include { BAM_MARKDUPLICATES_SPARK } from '../../subworkflows/local/bam_markduplicates_spark/main' -include { BAM_SENTIEON_DEDUP } from '../../subworkflows/local/bam_sentieon_dedup/main' +include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main' +include { BAM_MARKDUPLICATES_SPARK } from '../subworkflows/local/bam_markduplicates_spark/main' +include { BAM_SENTIEON_DEDUP } from '../subworkflows/local/bam_sentieon_dedup/main' // QC on CRAM -include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../../subworkflows/local/cram_qc_mosdepth_samtools/main' -include { CRAM_SAMPLEQC } from '../../subworkflows/local/cram_sampleqc/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' // Create recalibration tables -include { BAM_BASERECALIBRATOR } from '../../subworkflows/local/bam_baserecalibrator/main' -include { BAM_BASERECALIBRATOR_SPARK } from '../../subworkflows/local/bam_baserecalibrator_spark/main' +include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main' +include { BAM_BASERECALIBRATOR_SPARK } from '../subworkflows/local/bam_baserecalibrator_spark/main' // Create recalibrated cram files to use for variant calling (+QC) -include { BAM_APPLYBQSR } from '../../subworkflows/local/bam_applybqsr/main' -include { BAM_APPLYBQSR_SPARK } from '../../subworkflows/local/bam_applybqsr_spark/main' +include { BAM_APPLYBQSR } from '../subworkflows/local/bam_applybqsr/main' +include { BAM_APPLYBQSR_SPARK } from '../subworkflows/local/bam_applybqsr_spark/main' // Variant calling on a single normal sample -include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../../subworkflows/local/bam_variant_calling_germline_all/main' +include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../subworkflows/local/bam_variant_calling_germline_all/main' // Variant calling on a single tumor sample -include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../../subworkflows/local/bam_variant_calling_tumor_only_all/main' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows/local/bam_variant_calling_tumor_only_all/main' // Variant calling on tumor/normal pair -include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../../subworkflows/local/bam_variant_calling_somatic_all/main' +include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main' // POST VARIANTCALLING: e.g. merging -include { POST_VARIANTCALLING } from '../../subworkflows/local/post_variantcalling/main' +include { POST_VARIANTCALLING } from '../subworkflows/local/post_variantcalling/main' // QC on VCF files -include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../../subworkflows/local/vcf_qc_bcftools_vcftools/main' +include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' + +// Sample QC on CRAM files +include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main' // Annotation -include { VCF_ANNOTATE_ALL } from '../../subworkflows/local/vcf_annotate_all/main' +include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main' + +// REPORTING VERSIONS OF SOFTWARE USED +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' // MULTIQC -include { MULTIQC } from '../../modules/nf-core/multiqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -137,11 +261,183 @@ workflow SAREK { main: + // Parse samplesheet + // Set input, can either be from --input or from automatic retrieval in WorkflowSarek.groovy + ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? Channel.fromSamplesheet("input") : Channel.fromSamplesheet("input_restart") + SAMPLESHEET_TO_CHANNEL( + ch_from_samplesheet, + params.aligner, + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_rt, + params.bcftools_annotations, + params.bcftools_annotations_tbi, + params.bcftools_header_lines, + params.build_only_index, + params.dbsnp, + params.fasta, + params.germline_resource, + params.intervals, + params.joint_germline, + params.joint_mutect2, + params.known_indels, + params.known_snps, + params.no_intervals, + params.pon, + params.sentieon_dnascope_emit_mode, + params.sentieon_haplotyper_emit_mode, + params.seq_center, + params.seq_platform, + params.skip_tools, + params.step, + params.tools, + params.umi_read_structure, + params.wes) + + input_sample = SAMPLESHEET_TO_CHANNEL.out.input_sample + + // MULTIQC + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + // To gather all QC reports for MultiQC - ch_multiqc_files = Channel.empty() - multiqc_report = Channel.empty() - reports = Channel.empty() - versions = Channel.empty() + reports = Channel.empty() + // To gather used softwares versions for MultiQC + versions = Channel.empty() + + // Download cache + if (params.download_cache) { + // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache + ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) + snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) + DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) + snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache + vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } + + versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions) + } else { + // Looks for cache information either locally or on the cloud + INITIALIZE_ANNOTATION_CACHE( + (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), + params.snpeff_cache, + params.snpeff_genome, + params.snpeff_db, + (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), + params.vep_cache, + params.vep_species, + params.vep_cache_version, + params.vep_genome, + "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") + + snpeff_cache = INITIALIZE_ANNOTATION_CACHE.out.snpeff_cache + vep_cache = INITIALIZE_ANNOTATION_CACHE.out.ensemblvep_cache + } + + // Build indices if needed + PREPARE_GENOME( + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, + bcftools_annotations, + params.chr_dir, + dbsnp, + fasta, + fasta_fai, + germline_resource, + known_indels, + known_snps, + pon) + + // Gather built indices or get them from the params + // Built from the fasta file: + dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() + : PREPARE_GENOME.out.dict + fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).first() + : PREPARE_GENOME.out.fasta_fai + bwa = params.bwa ? Channel.fromPath(params.bwa).collect() + : PREPARE_GENOME.out.bwa + bwamem2 = params.bwamem2 ? Channel.fromPath(params.bwamem2).collect() + : PREPARE_GENOME.out.bwamem2 + dragmap = params.dragmap ? Channel.fromPath(params.dragmap).collect() + : PREPARE_GENOME.out.hashtable + minimap2 = params.minimap2 ? Channel.fromPath(params.minimap2).collect() + : PREPARE_GENOME.out.minimap2 + + // Gather index for mapping given the chosen aligner + index_alignement = (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem") ? bwa : + params.aligner == "bwa-mem2" ? bwamem2 : params.aligner == "dragmap" ? dragmap : minimap2 + + // TODO: add a params for msisensorpro_scan + msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan + + // For ASCAT, extracted from zip or tar.gz files: + allele_files = PREPARE_GENOME.out.allele_files + chr_files = PREPARE_GENOME.out.chr_files + gc_file = PREPARE_GENOME.out.gc_file + loci_files = PREPARE_GENOME.out.loci_files + rt_file = PREPARE_GENOME.out.rt_file + + // Tabix indexed vcf files: + bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.empty([]) + dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) + germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries + known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) + known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi).collect() : PREPARE_GENOME.out.known_snps_tbi : Channel.value([]) + pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : Channel.value([]) + + // known_sites is made by grouping both the dbsnp and the known snps/indels resources + // Which can either or both be optional + known_sites_indels = dbsnp.concat(known_indels).collect() + known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() + + known_sites_snps = dbsnp.concat(known_snps).collect() + known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() + + // Build intervals if needed + PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) + + // Intervals for speed up preprocessing/variant calling by spread/gather + // [interval.bed] all intervals in one file + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined + + // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) + intervals_for_preprocessing = params.wes ? + intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : + Channel.value([ [ id:'null' ], [] ]) + + intervals = PREPARE_INTERVALS.out.intervals_bed // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + + intervals_and_num_intervals = intervals.map{ interval, num_intervals -> + if ( num_intervals < 1 ) [ [], num_intervals ] + else [ interval, num_intervals ] + } + + intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals -> + if ( num_intervals < 1 ) [ [], [], num_intervals ] + else [ intervals[0], intervals[1], num_intervals ] + } + + if (params.tools && params.tools.split(',').contains('cnvkit')) { + if (params.cnvkit_reference) { + cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect() + } else { + PREPARE_REFERENCE_CNVKIT(fasta, intervals_bed_combined) + cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference + + versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions) + } + } else { + cnvkit_reference = Channel.value([]) + } + + // Gather used softwares versions + versions = versions.mix(PREPARE_GENOME.out.versions) + versions = versions.mix(PREPARE_INTERVALS.out.versions) // PREPROCESSING @@ -255,12 +551,12 @@ workflow SAREK { // reads will be sorted sort_bam = true - FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON(reads_for_alignment, index_alignement, sort_bam, fasta, fasta_fai) + FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON(reads_for_alignment, index_alignement, sort_bam, fasta, fasta_fai) // Grouping the bams from the same samples not to stall the workflow // Use groupKey to make sure that the correct group can advance as soon as it is complete // and not stall the workflow until all reads from all channels are mapped - bam_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bam + bam_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON.out.bam .combine(reads_grouping_key) // Creates a tuple of [ meta, bam, reads_grouping_key ] .filter { meta1, bam, meta2 -> meta1.sample == meta2.sample } // Add n_fastq and other variables to meta @@ -278,7 +574,7 @@ workflow SAREK { // Group .groupTuple() - bai_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bai + bai_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON.out.bai .combine(reads_grouping_key) // Creates a tuple of [ meta, bai, reads_grouping_key ] .filter { meta1, bai, meta2 -> meta1.sample == meta2.sample } // Add n_fastq and other variables to meta @@ -321,7 +617,7 @@ workflow SAREK { // Gather used softwares versions versions = versions.mix(CONVERT_FASTQ_INPUT.out.versions) - versions = versions.mix(FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.versions) + versions = versions.mix(FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON.out.versions) } if (params.step in ['mapping', 'markduplicates']) { @@ -333,7 +629,7 @@ workflow SAREK { // STEP 2: markduplicates (+QC) + convert to CRAM - // ch_bam_for_markduplicates will contain bam mapped with FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON when step is mapping + // ch_bam_for_markduplicates will contain bam mapped with FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON when step is mapping // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map{ meta, input, index -> [ meta, input ] } // if no MD is done, then run QC on mapped & converted CRAM files @@ -855,45 +1151,52 @@ workflow SAREK { } } - // - // Collate and save software versions - // version_yaml = Channel.empty() if (!(params.skip_tools && params.skip_tools.split(',').contains('versions'))) { - version_yaml = softwareVersionsToYAML(versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_sarek_software_mqc_versions.yml', sort: true, newLine: true) + CUSTOM_DUMPSOFTWAREVERSIONS(versions.unique().collectFile(name: 'collated_versions.yml')) + version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() } - // - // MODULE: MultiQC - // if (!(params.skip_tools && params.skip_tools.split(',').contains('multiqc'))) { + workflow_summary = WorkflowSarek.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + methods_description = WorkflowSarek.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) + + multiqc_files = Channel.empty() + multiqc_files = multiqc_files.mix(version_yaml) + multiqc_files = multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + multiqc_files = multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + multiqc_files = multiqc_files.mix(reports.collect().ifEmpty([])) + + MULTIQC(multiqc_files.collect(), ch_multiqc_config.collect().ifEmpty([]), ch_multiqc_custom_config.collect().ifEmpty([]), ch_multiqc_logo.collect().ifEmpty([])) - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(version_yaml) - ch_multiqc_files = ch_multiqc_files.mix(reports) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) multiqc_report = MULTIQC.out.report.toList() + versions = versions.mix(MULTIQC.out.versions) + } +} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) +} - emit: - multiqc_report // channel: /path/to/multiqc_report.html - versions // channel: [ path(versions.yml) ] +workflow.onError { + if (workflow.errorReport.contains("Process requirement exceeds available memory")) { + println("🛑 Default resources exceed availability 🛑 ") + println("💡 See here on how to configure pipeline: https://nf-co.re/docs/usage/configuration#tuning-workflow-resources 💡") + } } /* From 85eb59ee652b37233c274ba1acd7b8a60cfbe9e0 Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 25 Apr 2024 19:56:08 +0300 Subject: [PATCH 02/17] updated pyproject.toml --- pyproject.toml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 56110621e7..7d08e1c8ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,13 +3,11 @@ [tool.ruff] line-length = 120 target-version = "py38" -cache-dir = "~/.cache/ruff" - -[tool.ruff.lint] select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] +cache-dir = "~/.cache/ruff" -[tool.ruff.lint.isort] +[tool.ruff.isort] known-first-party = ["nf_core"] -[tool.ruff.lint.per-file-ignores] +[tool.ruff.per-file-ignores] "__init__.py" = ["E402", "F401"] From b2346e35ea13ab2932f2adbb20e38d08e1618125 Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 25 Apr 2024 20:13:43 +0300 Subject: [PATCH 03/17] fixed workflows/sarek/main.nf --- workflows/sarek/main.nf | 443 +++++++--------------------------------- 1 file changed, 70 insertions(+), 373 deletions(-) diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index e95bc5e809..eed71e6977 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -4,206 +4,82 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, - params.bwa, - params.bwamem2, - params.bcftools_annotations, - params.bcftools_annotations_tbi, - params.bcftools_header_lines, - params.cf_chrom_len, - params.chr_dir, - params.cnvkit_reference, - params.dbnsfp, - params.dbnsfp_tbi, - params.dbsnp, - params.dbsnp_tbi, - params.dict, - params.dragmap, - params.fasta, - params.fasta_fai, - params.germline_resource, - params.germline_resource_tbi, - params.input, - params.intervals, - params.known_indels, - params.known_indels_tbi, - params.known_snps, - params.known_snps_tbi, - params.mappability, - params.minimap2, - params.multiqc_config, - params.ngscheckmate_bed, - params.pon, - params.pon_tbi, - params.sentieon_dnascope_model, - params.spliceai_indel, - params.spliceai_indel_tbi, - params.spliceai_snv, - params.spliceai_snv_tbi -] - -// only check if we are using the tools -if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache) -if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Initialize file channels based on params, defined in the params.genomes[params.genome] scope -bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty() -bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty() -cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [] -dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) -fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() -fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() -germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input -known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) -known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) -mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) -pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) -sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) - -// Initialize value channels based on params, defined in the params.genomes[params.genome] scope -ascat_genome = params.ascat_genome ?: Channel.empty() -dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty() -known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty() -known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty() -ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty() -snpeff_db = params.snpeff_db ?: Channel.empty() -vep_cache_version = params.vep_cache_version ?: Channel.empty() -vep_genome = params.vep_genome ?: Channel.empty() -vep_species = params.vep_species ?: Channel.empty() - - -vep_extra_files = [] - -if (params.dbnsfp && params.dbnsfp_tbi) { - vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) - vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) -} - -if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { - vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL/NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_sarek_pipeline' // Create samplesheets to restart from different steps -include { SAMPLESHEET_TO_CHANNEL } from '../subworkflows/local/samplesheet_to_channel/main' -include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main' -include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main' -include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main' -include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main' -include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main' - -// Download cache for SnpEff/VEP if needed -include { DOWNLOAD_CACHE_SNPEFF_VEP } from '../subworkflows/local/download_cache_snpeff_vep/main' - -// Initialize annotation cache -include { INITIALIZE_ANNOTATION_CACHE } from '../subworkflows/local/initialize_annotation_cache/main' - -// Build indices if needed -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main' - -// Build intervals if needed -include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals/main' - -// Build CNVkit reference if needed -include { PREPARE_REFERENCE_CNVKIT } from '../subworkflows/local/prepare_reference_cnvkit/main' +include { CHANNEL_ALIGN_CREATE_CSV } from '../../subworkflows/local/channel_align_create_csv/main' +include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../../subworkflows/local/channel_markduplicates_create_csv/main' +include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../../subworkflows/local/channel_baserecalibrator_create_csv/main' +include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../../subworkflows/local/channel_applybqsr_create_csv/main' +include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../../subworkflows/local/channel_variant_calling_create_csv/main' // Convert BAM files to FASTQ files -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../subworkflows/local/bam_convert_samtools/main' -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../subworkflows/local/bam_convert_samtools/main' +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../../subworkflows/local/bam_convert_samtools/main' +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../../subworkflows/local/bam_convert_samtools/main' // Run FASTQC -include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FASTQC } from '../../modules/nf-core/fastqc/main' // TRIM/SPLIT FASTQ Files -include { FASTP } from '../modules/nf-core/fastp/main' +include { FASTP } from '../../modules/nf-core/fastp/main' // Create umi consensus bams from fastq -include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../subworkflows/local/fastq_create_umi_consensus_fgbio/main' +include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../../subworkflows/local/fastq_create_umi_consensus_fgbio/main' // Map input reads to reference genome -include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON } from '../subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main' +include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON } from '../../subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main' // Merge and index BAM files (optional) -include { BAM_MERGE_INDEX_SAMTOOLS } from '../subworkflows/local/bam_merge_index_samtools/main' +include { BAM_MERGE_INDEX_SAMTOOLS } from '../../subworkflows/local/bam_merge_index_samtools/main' // Convert BAM files -include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../../modules/nf-core/samtools/convert/main' // Convert CRAM files (optional) -include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../../modules/nf-core/samtools/convert/main' // Mark Duplicates (+QC) -include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main' -include { BAM_MARKDUPLICATES_SPARK } from '../subworkflows/local/bam_markduplicates_spark/main' -include { BAM_SENTIEON_DEDUP } from '../subworkflows/local/bam_sentieon_dedup/main' +include { BAM_MARKDUPLICATES } from '../../subworkflows/local/bam_markduplicates/main' +include { BAM_MARKDUPLICATES_SPARK } from '../../subworkflows/local/bam_markduplicates_spark/main' +include { BAM_SENTIEON_DEDUP } from '../../subworkflows/local/bam_sentieon_dedup/main' // QC on CRAM -include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' -include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../../subworkflows/local/cram_qc_mosdepth_samtools/main' +include { CRAM_SAMPLEQC } from '../../subworkflows/local/cram_sampleqc/main' // Create recalibration tables -include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main' -include { BAM_BASERECALIBRATOR_SPARK } from '../subworkflows/local/bam_baserecalibrator_spark/main' +include { BAM_BASERECALIBRATOR } from '../../subworkflows/local/bam_baserecalibrator/main' +include { BAM_BASERECALIBRATOR_SPARK } from '../../subworkflows/local/bam_baserecalibrator_spark/main' // Create recalibrated cram files to use for variant calling (+QC) -include { BAM_APPLYBQSR } from '../subworkflows/local/bam_applybqsr/main' -include { BAM_APPLYBQSR_SPARK } from '../subworkflows/local/bam_applybqsr_spark/main' +include { BAM_APPLYBQSR } from '../../subworkflows/local/bam_applybqsr/main' +include { BAM_APPLYBQSR_SPARK } from '../../subworkflows/local/bam_applybqsr_spark/main' // Variant calling on a single normal sample -include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../subworkflows/local/bam_variant_calling_germline_all/main' +include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../../subworkflows/local/bam_variant_calling_germline_all/main' // Variant calling on a single tumor sample -include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows/local/bam_variant_calling_tumor_only_all/main' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../../subworkflows/local/bam_variant_calling_tumor_only_all/main' // Variant calling on tumor/normal pair -include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main' +include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../../subworkflows/local/bam_variant_calling_somatic_all/main' // POST VARIANTCALLING: e.g. merging -include { POST_VARIANTCALLING } from '../subworkflows/local/post_variantcalling/main' +include { POST_VARIANTCALLING } from '../../subworkflows/local/post_variantcalling/main' // QC on VCF files -include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' - -// Sample QC on CRAM files -include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main' +include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../../subworkflows/local/vcf_qc_bcftools_vcftools/main' // Annotation -include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main' - -// REPORTING VERSIONS OF SOFTWARE USED -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { VCF_ANNOTATE_ALL } from '../../subworkflows/local/vcf_annotate_all/main' // MULTIQC -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC } from '../../modules/nf-core/multiqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -261,183 +137,11 @@ workflow SAREK { main: - // Parse samplesheet - // Set input, can either be from --input or from automatic retrieval in WorkflowSarek.groovy - ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? Channel.fromSamplesheet("input") : Channel.fromSamplesheet("input_restart") - SAMPLESHEET_TO_CHANNEL( - ch_from_samplesheet, - params.aligner, - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_rt, - params.bcftools_annotations, - params.bcftools_annotations_tbi, - params.bcftools_header_lines, - params.build_only_index, - params.dbsnp, - params.fasta, - params.germline_resource, - params.intervals, - params.joint_germline, - params.joint_mutect2, - params.known_indels, - params.known_snps, - params.no_intervals, - params.pon, - params.sentieon_dnascope_emit_mode, - params.sentieon_haplotyper_emit_mode, - params.seq_center, - params.seq_platform, - params.skip_tools, - params.step, - params.tools, - params.umi_read_structure, - params.wes) - - input_sample = SAMPLESHEET_TO_CHANNEL.out.input_sample - - // MULTIQC - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - // To gather all QC reports for MultiQC - reports = Channel.empty() - // To gather used softwares versions for MultiQC - versions = Channel.empty() - - // Download cache - if (params.download_cache) { - // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache - ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) - snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) - DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) - snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache - vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } - - versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions) - } else { - // Looks for cache information either locally or on the cloud - INITIALIZE_ANNOTATION_CACHE( - (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), - params.snpeff_cache, - params.snpeff_genome, - params.snpeff_db, - (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), - params.vep_cache, - params.vep_species, - params.vep_cache_version, - params.vep_genome, - "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") - - snpeff_cache = INITIALIZE_ANNOTATION_CACHE.out.snpeff_cache - vep_cache = INITIALIZE_ANNOTATION_CACHE.out.ensemblvep_cache - } - - // Build indices if needed - PREPARE_GENOME( - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, - bcftools_annotations, - params.chr_dir, - dbsnp, - fasta, - fasta_fai, - germline_resource, - known_indels, - known_snps, - pon) - - // Gather built indices or get them from the params - // Built from the fasta file: - dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() - : PREPARE_GENOME.out.dict - fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).first() - : PREPARE_GENOME.out.fasta_fai - bwa = params.bwa ? Channel.fromPath(params.bwa).collect() - : PREPARE_GENOME.out.bwa - bwamem2 = params.bwamem2 ? Channel.fromPath(params.bwamem2).collect() - : PREPARE_GENOME.out.bwamem2 - dragmap = params.dragmap ? Channel.fromPath(params.dragmap).collect() - : PREPARE_GENOME.out.hashtable - minimap2 = params.minimap2 ? Channel.fromPath(params.minimap2).collect() - : PREPARE_GENOME.out.minimap2 - - // Gather index for mapping given the chosen aligner - index_alignement = (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem") ? bwa : - params.aligner == "bwa-mem2" ? bwamem2 : params.aligner == "dragmap" ? dragmap : minimap2 - - // TODO: add a params for msisensorpro_scan - msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan - - // For ASCAT, extracted from zip or tar.gz files: - allele_files = PREPARE_GENOME.out.allele_files - chr_files = PREPARE_GENOME.out.chr_files - gc_file = PREPARE_GENOME.out.gc_file - loci_files = PREPARE_GENOME.out.loci_files - rt_file = PREPARE_GENOME.out.rt_file - - // Tabix indexed vcf files: - bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.empty([]) - dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) - germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries - known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) - known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi).collect() : PREPARE_GENOME.out.known_snps_tbi : Channel.value([]) - pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : Channel.value([]) - - // known_sites is made by grouping both the dbsnp and the known snps/indels resources - // Which can either or both be optional - known_sites_indels = dbsnp.concat(known_indels).collect() - known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() - - known_sites_snps = dbsnp.concat(known_snps).collect() - known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() - - // Build intervals if needed - PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) - - // Intervals for speed up preprocessing/variant calling by spread/gather - // [interval.bed] all intervals in one file - intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined - intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined - - // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) - intervals_for_preprocessing = params.wes ? - intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : - Channel.value([ [ id:'null' ], [] ]) - - intervals = PREPARE_INTERVALS.out.intervals_bed // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather - intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather - - intervals_and_num_intervals = intervals.map{ interval, num_intervals -> - if ( num_intervals < 1 ) [ [], num_intervals ] - else [ interval, num_intervals ] - } - - intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals -> - if ( num_intervals < 1 ) [ [], [], num_intervals ] - else [ intervals[0], intervals[1], num_intervals ] - } - - if (params.tools && params.tools.split(',').contains('cnvkit')) { - if (params.cnvkit_reference) { - cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect() - } else { - PREPARE_REFERENCE_CNVKIT(fasta, intervals_bed_combined) - cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference - - versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions) - } - } else { - cnvkit_reference = Channel.value([]) - } - - // Gather used softwares versions - versions = versions.mix(PREPARE_GENOME.out.versions) - versions = versions.mix(PREPARE_INTERVALS.out.versions) + ch_multiqc_files = Channel.empty() + multiqc_report = Channel.empty() + reports = Channel.empty() + versions = Channel.empty() // PREPROCESSING @@ -1151,56 +855,49 @@ workflow SAREK { } } + // + // Collate and save software versions + // version_yaml = Channel.empty() if (!(params.skip_tools && params.skip_tools.split(',').contains('versions'))) { - CUSTOM_DUMPSOFTWAREVERSIONS(versions.unique().collectFile(name: 'collated_versions.yml')) - version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() + version_yaml = softwareVersionsToYAML(versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_sarek_software_mqc_versions.yml', sort: true, newLine: true) } + // + // MODULE: MultiQC + // if (!(params.skip_tools && params.skip_tools.split(',').contains('multiqc'))) { - workflow_summary = WorkflowSarek.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowSarek.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - multiqc_files = Channel.empty() - multiqc_files = multiqc_files.mix(version_yaml) - multiqc_files = multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - multiqc_files = multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - multiqc_files = multiqc_files.mix(reports.collect().ifEmpty([])) - - MULTIQC(multiqc_files.collect(), ch_multiqc_config.collect().ifEmpty([]), ch_multiqc_custom_config.collect().ifEmpty([]), ch_multiqc_logo.collect().ifEmpty([])) + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(version_yaml) + ch_multiqc_files = ch_multiqc_files.mix(reports) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) multiqc_report = MULTIQC.out.report.toList() - versions = versions.mix(MULTIQC.out.versions) - } -} -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) -} -workflow.onError { - if (workflow.errorReport.contains("Process requirement exceeds available memory")) { - println("🛑 Default resources exceed availability 🛑 ") - println("💡 See here on how to configure pipeline: https://nf-co.re/docs/usage/configuration#tuning-workflow-resources 💡") - } + emit: + multiqc_report // channel: /path/to/multiqc_report.html + versions // channel: [ path(versions.yml) ] } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +*/ \ No newline at end of file From d902d1bf7dbf20bae29ab1e836c1d957054ea89b Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 25 Apr 2024 20:17:08 +0300 Subject: [PATCH 04/17] fixed import --- subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf | 2 +- workflows/sarek/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf index c237e64014..a51a2cc6ba 100644 --- a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf +++ b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf @@ -9,7 +9,7 @@ include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main.nf' include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main' include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main' -include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON as ALIGN_UMI } from '../fastq_align_bwamem_mem2_dragmap_sentieon/main' +include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON as ALIGN_UMI } from '../fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main' include { SAMBLASTER } from '../../../modules/nf-core/samblaster/main' include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/samtools/bam2fq/main.nf' diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index eed71e6977..3acee756db 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -900,4 +900,4 @@ workflow SAREK { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ \ No newline at end of file +*/ From e49497a4c8ce54d394bd2feb794c7dc2c011ac4b Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 25 Apr 2024 20:18:58 +0300 Subject: [PATCH 05/17] fixed import 2 --- .../local/fastq_create_umi_consensus_fgbio/main.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf index a51a2cc6ba..4f28d90ab8 100644 --- a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf +++ b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf @@ -6,12 +6,12 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main.nf' -include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main' -include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main' -include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON as ALIGN_UMI } from '../fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main' -include { SAMBLASTER } from '../../../modules/nf-core/samblaster/main' -include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/samtools/bam2fq/main.nf' +include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main.nf' +include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main' +include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main' +include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON as ALIGN_UMI } from '../fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main' +include { SAMBLASTER } from '../../../modules/nf-core/samblaster/main' +include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/samtools/bam2fq/main.nf' workflow FASTQ_CREATE_UMI_CONSENSUS_FGBIO { take: From 4dbf79b3307cfe108009cc35062e9db38d606d0d Mon Sep 17 00:00:00 2001 From: Egor Guguchkin <56133096+EgorGuga@users.noreply.github.com> Date: Fri, 26 Apr 2024 02:06:57 +0300 Subject: [PATCH 06/17] Removed redundancies in main.nf --- main.nf | 82 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/main.nf b/main.nf index 342b7c980f..798b11368c 100755 --- a/main.nf +++ b/main.nf @@ -119,25 +119,6 @@ vep_cache_version = params.vep_cache_version ?: Channel.empty() vep_genome = params.vep_genome ?: Channel.empty() vep_species = params.vep_species ?: Channel.empty() -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GATK.GRCh38 -profile docker --outdir results" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters - vep_extra_files = [] if (params.dbnsfp && params.dbnsfp_tbi) { @@ -158,13 +139,10 @@ if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && p ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SAREK } from './workflows/sarek' - // WORKFLOW: Run main nf-core/sarek analysis pipeline workflow NFCORE_SAREK { - take: - samplesheet // channel: samplesheet read in from --input + samplesheet main: versions = Channel.empty() @@ -299,13 +277,55 @@ workflow NFCORE_SAREK { // // WORKFLOW: Run pipeline // - SAREK ( - samplesheet + SAREK(samplesheet, + allele_files, + bcftools_annotations, + bcftools_annotations_tbi, + bcftools_header_lines, + cf_chrom_len, + chr_files, + cnvkit_reference, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + dict, + fasta, + fasta_fai, + gc_file, + germline_resource, + germline_resource_tbi, + index_alignement, + intervals_and_num_intervals, + intervals_bed_combined, + intervals_bed_combined_for_variant_calling, + intervals_bed_gz_tbi_and_num_intervals, + intervals_bed_gz_tbi_combined, + intervals_for_preprocessing, + known_indels_vqsr, + known_sites_indels, + known_sites_indels_tbi, + known_sites_snps, + known_sites_snps_tbi, + known_snps_vqsr, + loci_files, + mappability, + msisensorpro_scan, + ngscheckmate_bed, + pon, + pon_tbi, + rt_file, + sentieon_dnascope_model, + snpeff_cache, + vep_cache, + vep_cache_version, + vep_extra_files, + vep_fasta, + vep_genome, + vep_species ) emit: multiqc_report = SAREK.out.multiqc_report // channel: /path/to/multiqc_report.html - } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -313,8 +333,6 @@ workflow NFCORE_SAREK { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 workflow { main: @@ -322,7 +340,7 @@ workflow { // // SUBWORKFLOW: Run initialisation tasks // - PIPELINE_INITIALISATION ( + PIPELINE_INITIALISATION( params.version, params.help, params.validate_params, @@ -335,14 +353,12 @@ workflow { // // WORKFLOW: Run main workflow // - NFCORE_SAREK ( - PIPELINE_INITIALISATION.out.samplesheet - ) + NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet) // // SUBWORKFLOW: Run completion tasks // - PIPELINE_COMPLETION ( + PIPELINE_COMPLETION( params.email, params.email_on_fail, params.plaintext_email, From 09fed401a5cc0c7ac7556d5c8559a49721f3d692 Mon Sep 17 00:00:00 2001 From: Egor Guguchkin <56133096+EgorGuga@users.noreply.github.com> Date: Fri, 26 Apr 2024 02:08:43 +0300 Subject: [PATCH 07/17] Removed redundant includes in main.nf --- main.nf | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/main.nf b/main.nf index 798b11368c..77c40c741b 100755 --- a/main.nf +++ b/main.nf @@ -20,19 +20,6 @@ */ nextflow.enable.dsl = 2 - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { SAREK } from './workflows/sarek' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' - -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_sarek_pipeline' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES From 550434e70c1cc3aefde53f5d373c4204b3214d65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=95=D0=B3=D0=BE=D1=80=20=D0=93=D1=83=D0=B3=D1=83=D1=87?= =?UTF-8?q?=D0=BA=D0=B8=D0=BD?= Date: Fri, 26 Apr 2024 02:15:12 +0300 Subject: [PATCH 08/17] Revert "updated pyproject.toml" This reverts commit 85eb59ee652b37233c274ba1acd7b8a60cfbe9e0. --- pyproject.toml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7d08e1c8ef..56110621e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,11 +3,13 @@ [tool.ruff] line-length = 120 target-version = "py38" -select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] cache-dir = "~/.cache/ruff" -[tool.ruff.isort] +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] known-first-party = ["nf_core"] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["E402", "F401"] From 21d76706867cb21b9f6c6d946def583aa8395547 Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Sat, 27 Apr 2024 17:12:04 +0300 Subject: [PATCH 09/17] restored tests --- .github/workflows/ci.yml | 2 ++ main.nf | 2 +- nextflow.config | 2 +- .../fastq_create_umi_consensus_fgbio/main.nf | 1 + subworkflows/local/prepare_genome/main.nf | 4 ++++ .../local/utils_nfcore_sarek_pipeline/main.nf | 1 + tests/config/pytesttags.yml | 19 ++++++++++++++----- 7 files changed, 24 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 48fbb4b6b1..3c37a3c79e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -243,6 +243,8 @@ jobs: - tags: "gatk4/mutect2" - tags: "gatk4spark/applybqsr" - tags: "gatk4spark/markduplicates" + - tags: "minimap2/index" + - tags: "minimap2/align" - tags: "mosdepth" - tags: "multiqc" - tags: "samblaster" diff --git a/main.nf b/main.nf index 77c40c741b..c3d6b64ae4 100755 --- a/main.nf +++ b/main.nf @@ -161,7 +161,7 @@ workflow NFCORE_SAREK { : PREPARE_GENOME.out.bwamem2 dragmap = params.dragmap ? Channel.fromPath(params.dragmap).map{ it -> [ [id:'dragmap'], it ] }.collect() : PREPARE_GENOME.out.hashtable - minimap2 = params.dragmap ? Channel.fromPath(params.minimap2).collect() + minimap2 = params.minimap2 ? Channel.fromPath(params.minimap2).collect() : PREPARE_GENOME.out.minimap2 // Gather index for mapping given the chosen aligner diff --git a/nextflow.config b/nextflow.config index e32da2cb94..a56f4b64ef 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,7 +46,7 @@ params { group_by_umi_strategy = 'Adjacency' // default strategy when running with UMI for GROUPREADSBYUMI // Preprocessing - aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2 and dragmap can be used too + aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2, dragmap and minimap2 can be used too use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default save_mapped = false // Mapped BAMs not saved save_output_as_bam = false // Output files from preprocessing are saved as bam and not as cram files diff --git a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf index 4f28d90ab8..8c3d90c453 100644 --- a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf +++ b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf @@ -37,6 +37,7 @@ workflow FASTQ_CREATE_UMI_CONSENSUS_FGBIO { // appropriately tagged interleaved FASTQ reads are mapped to the reference // bams will not be sorted (hence, sort = false) + // TODO minimap2 in current implementation wouldn't work correctly sort = false ALIGN_UMI(BAM2FASTQ.out.reads, map_index, sort, fasta, fai) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 772af47b37..49b2700985 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -12,6 +12,7 @@ include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf- include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' +include { MINIMAP2_INDEX } from '../../../modules/nf-core/minimap2/index/main' include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' include { TABIX_TABIX as TABIX_BCFTOOLS_ANNOTATIONS } from '../../../modules/nf-core/tabix/tabix/main' @@ -48,6 +49,7 @@ workflow PREPARE_GENOME { BWAMEM1_INDEX(fasta) // If aligner is bwa-mem BWAMEM2_INDEX(fasta) // If aligner is bwa-mem2 DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap + MINIMAP2_INDEX(fasta) // If aligner is minimap2 GATK4_CREATESEQUENCEDICTIONARY(fasta) MSISENSORPRO_SCAN(fasta) @@ -106,6 +108,7 @@ workflow PREPARE_GENOME { versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) versions = versions.mix(MSISENSORPRO_SCAN.out.versions) + versions = versions.mix(MINIMAP2_INDEX.out.versions) versions = versions.mix(SAMTOOLS_FAIDX.out.versions) versions = versions.mix(TABIX_BCFTOOLS_ANNOTATIONS.out.versions) versions = versions.mix(TABIX_DBSNP.out.versions) @@ -125,6 +128,7 @@ workflow PREPARE_GENOME { germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi + minimap2 = MINIMAP2_INDEX.out.index.collect() // path: genome.mmi msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf index afd58a8cc2..9a06707b19 100644 --- a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf @@ -109,6 +109,7 @@ workflow PIPELINE_INITIALISATION { params.known_snps, params.known_snps_tbi, params.mappability, + params.minimap2, params.multiqc_config, params.ngscheckmate_bed, params.pon, diff --git a/tests/config/pytesttags.yml b/tests/config/pytesttags.yml index 0c3c05c554..023e9ac423 100644 --- a/tests/config/pytesttags.yml +++ b/tests/config/pytesttags.yml @@ -92,9 +92,10 @@ umi: - modules/nf-core/fgbio/callmolecularconsensusreads/** - modules/nf-core/fgbio/fastqtobam/** - modules/nf-core/fgbio/groupreadsbyumi/** + - modules/nf-core/minimap2/align/** - modules/nf-core/samblaster/** - modules/nf-core/samtools/bam2fq/** - - subworkflows/local/fastq_align_bwamem_mem2_dragmap/** + - subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/** - subworkflows/local/fastq_create_umi_consensus_fgbio/** - tests/csv/3.0/fastq_umi.csv - tests/test_umi.yml @@ -112,7 +113,7 @@ fastp: bwamem: - conf/modules/aligner.config - modules/nf-core/bwa/mem/** - - subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/** + - subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/** - tests/csv/3.0/fastq_single.csv - tests/test_aligner_bwamem.yml @@ -120,7 +121,7 @@ bwamem: bwamem2: - conf/modules/aligner.config - modules/nf-core/bwamem2/mem/** - - subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/** + - subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/** - tests/csv/3.0/fastq_single.csv - tests/test_aligner_bwamem2.yml @@ -128,15 +129,23 @@ bwamem2: dragmap: - conf/modules/aligner.config - modules/nf-core/dragmap/align/** - - subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/** + - subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/** - tests/csv/3.0/fastq_single.csv - tests/test_aligner_dragmap.yml +### minimap2 +minimap2: + - conf/modules/aligner.config + - modules/nf-core/minimap2/align/** + - subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/** + - tests/csv/3.0/fastq_single.csv + - tests/test_aligner_minimap2.yml + ### sentieon/bwamem sentieon/bwamem: - conf/modules/aligner.config - modules/nf-core/sentieon/bwamem/** - - subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/** + - subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/** - tests/csv/3.0/fastq_single.csv - tests/test_sentieon_aligner_bwamem.yml From a02d15d50929117f7beeae48cba3f8a5976e50f3 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 7 May 2024 13:41:44 +0000 Subject: [PATCH 10/17] [automated] Fix code linting --- tests/config/pytesttags.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/pytesttags.yml b/tests/config/pytesttags.yml index 7bcaaa6263..d48cf6496f 100644 --- a/tests/config/pytesttags.yml +++ b/tests/config/pytesttags.yml @@ -93,7 +93,7 @@ umi: - modules/nf-core/fgbio/callmolecularconsensusreads/** - modules/nf-core/fgbio/fastqtobam/** - modules/nf-core/fgbio/groupreadsbyumi/** - - modules/nf-core/minimap2/align/** + - modules/nf-core/minimap2/align/** - modules/nf-core/mosdepth/** - modules/nf-core/samblaster/** - modules/nf-core/samtools/bam2fq/** From af4e486155bbf3a3a3e7383860f1a1a2e371755f Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Mon, 27 May 2024 15:13:40 +0300 Subject: [PATCH 11/17] fixed error; added test_aligner_minimap2.yml --- .../main.nf | 2 +- tests/test_aligner_minimap2.yml | 91 +++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 tests/test_aligner_minimap2.yml diff --git a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf index 81be7750c7..17f95ed6ee 100644 --- a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf +++ b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf @@ -30,7 +30,7 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON { BWAMEM1_MEM(reads, index, [[id:'no_fasta'], []], sort) // If aligner is bwa-mem BWAMEM2_MEM(reads, index, [[id:'no_fasta'], []], sort) // If aligner is bwa-mem2 DRAGMAP_ALIGN(reads, index, [[id:'no_fasta'], []], sort) // If aligner is dragmap - MINIMAP2_ALIGN(reads, index, [[id:'no_fasta'], []], sort, cigar_paf_format, cigar_bam) // If aligner is minimap2 + MINIMAP2_ALIGN(reads, index, sort, cigar_paf_format, cigar_bam) // If aligner is minimap2 // The sentieon-bwamem-module does sorting as part of the conversion from sam to bam. SENTIEON_BWAMEM(reads, index, fasta, fasta_fai) // If aligner is sentieon-bwamem diff --git a/tests/test_aligner_minimap2.yml b/tests/test_aligner_minimap2.yml new file mode 100644 index 0000000000..bf017116bc --- /dev/null +++ b/tests/test_aligner_minimap2.yml @@ -0,0 +1,91 @@ +- name: Run minimap2 + command: nextflow run main.nf -profile test --aligner minimap2 --save_reference --outdir results + tags: + - aligner + - minimap2 + - preprocessing + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns + - path: results/preprocessing/recal_table/test/test.recal.table + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns + - path: results/reference/minimap2/genome.mmi + md5sum: 26530ed9abe1f1318efe2b2cf3c84eb8 + - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi + # conda changes md5sums for test + - path: results/reference/dict/genome.dict + md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 + - path: results/reference/fai/genome.fasta.fai + md5sum: 3520cd30e1b100e55f578db9c855f685 + - path: results/reference/intervals/chr22_1-40001.bed + md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 + - path: results/reference/intervals/chr22_1-40001.bed.gz + md5sum: d3341fa28986c40b24fcc10a079dbb80 + - path: results/reference/intervals/genome.bed + md5sum: a87dc7d20ebca626f65cc16ff6c97a3e + - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi + # conda changes md5sums for test + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 5324 142 162 1061336 3324 2 0 0.593438 4993", "1.0 0.999998 138"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 0a9284c1496efd6fc7ce79f12c0ec7a0 + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: f2dae85c8e57dd3c00fc6be0dc860a4d + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: 0714422bef83371c37087bddbdae2485 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: e8cee27a1207c13ee7b54a330a593d04 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: 99573817f440d12374d6eadd0241ee07 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 0a9284c1496efd6fc7ce79f12c0ec7a0 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: f2dae85c8e57dd3c00fc6be0dc860a4d + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 0714422bef83371c37087bddbdae2485 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: e8cee27a1207c13ee7b54a330a593d04 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: 99573817f440d12374d6eadd0241ee07 + - path: results/reports/samtools/test/test.md.cram.stats + # conda changes md5sums for test + - path: results/reports/samtools/test/test.recal.cram.stats + # conda changes md5sums for test +- name: Build only index with minimap2 + command: nextflow run main.nf -profile test_cache --build_only_index --aligner minimap2 --input false --outdir results + tags: + - aligner + - build_only_index + - minimap2 + files: + - path: results/multiqc + - path: results/reference/minimap2/genome.mmi + md5sum: 26530ed9abe1f1318efe2b2cf3c84eb8 + - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi + # conda changes md5sums for test + - path: results/reference/dict/genome.dict + md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 + - path: results/reference/fai/genome.fasta.fai + md5sum: 3520cd30e1b100e55f578db9c855f685 + - path: results/reference/intervals/chr22_1-40001.bed + md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 + - path: results/reference/intervals/chr22_1-40001.bed.gz + md5sum: d3341fa28986c40b24fcc10a079dbb80 + - path: results/reference/intervals/genome.bed + md5sum: a87dc7d20ebca626f65cc16ff6c97a3e + - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi + # conda changes md5sums for test From 7f628f51fcf4e18fd3ef5a60ca7ef298642e287d Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 30 May 2024 13:45:55 +0300 Subject: [PATCH 12/17] updated test_aligner_minimap2.yml to match others --- tests/test_aligner_minimap2.yml | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/tests/test_aligner_minimap2.yml b/tests/test_aligner_minimap2.yml index bf017116bc..45fded9585 100644 --- a/tests/test_aligner_minimap2.yml +++ b/tests/test_aligner_minimap2.yml @@ -1,4 +1,4 @@ -- name: Run minimap2 +name: Run minimap2 command: nextflow run main.nf -profile test --aligner minimap2 --save_reference --outdir results tags: - aligner @@ -24,20 +24,12 @@ # binary changes md5sums on reruns - path: results/reference/minimap2/genome.mmi md5sum: 26530ed9abe1f1318efe2b2cf3c84eb8 - - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi - # conda changes md5sums for test - - path: results/reference/dict/genome.dict - md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 - - path: results/reference/fai/genome.fasta.fai - md5sum: 3520cd30e1b100e55f578db9c855f685 - path: results/reference/intervals/chr22_1-40001.bed md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - path: results/reference/intervals/chr22_1-40001.bed.gz md5sum: d3341fa28986c40b24fcc10a079dbb80 - path: results/reference/intervals/genome.bed md5sum: a87dc7d20ebca626f65cc16ff6c97a3e - - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi - # conda changes md5sums for test - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 5324 142 162 1061336 3324 2 0 0.593438 4993", "1.0 0.999998 138"] @@ -52,21 +44,21 @@ - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi md5sum: 99573817f440d12374d6eadd0241ee07 - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 0a9284c1496efd6fc7ce79f12c0ec7a0 - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: f2dae85c8e57dd3c00fc6be0dc860a4d - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 0714422bef83371c37087bddbdae2485 - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: e8cee27a1207c13ee7b54a330a593d04 - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: 99573817f440d12374d6eadd0241ee07 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - path: results/reports/samtools/test/test.md.cram.stats # conda changes md5sums for test - path: results/reports/samtools/test/test.recal.cram.stats # conda changes md5sums for test - name: Build only index with minimap2 - command: nextflow run main.nf -profile test_cache --build_only_index --aligner minimap2 --input false --outdir results + command: nextflow run main.nf -profile test --build_only_index --aligner minimap2 --input false --outdir results tags: - aligner - build_only_index @@ -75,17 +67,9 @@ - path: results/multiqc - path: results/reference/minimap2/genome.mmi md5sum: 26530ed9abe1f1318efe2b2cf3c84eb8 - - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi - # conda changes md5sums for test - - path: results/reference/dict/genome.dict - md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 - - path: results/reference/fai/genome.fasta.fai - md5sum: 3520cd30e1b100e55f578db9c855f685 - path: results/reference/intervals/chr22_1-40001.bed md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - path: results/reference/intervals/chr22_1-40001.bed.gz md5sum: d3341fa28986c40b24fcc10a079dbb80 - path: results/reference/intervals/genome.bed md5sum: a87dc7d20ebca626f65cc16ff6c97a3e - - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi - # conda changes md5sums for test From e91e9723061fecbfcca5f1277ebfac1cefb9f912 Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Mon, 10 Jun 2024 16:11:21 +0200 Subject: [PATCH 13/17] Update tests/test_aligner_minimap2.yml --- tests/test_aligner_minimap2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_aligner_minimap2.yml b/tests/test_aligner_minimap2.yml index 45fded9585..a84bd8e309 100644 --- a/tests/test_aligner_minimap2.yml +++ b/tests/test_aligner_minimap2.yml @@ -1,4 +1,4 @@ -name: Run minimap2 +- name: Run minimap2 command: nextflow run main.nf -profile test --aligner minimap2 --save_reference --outdir results tags: - aligner From ecf0416f8a91042dc2490b99e72c6903e95a60cf Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Fri, 6 Dec 2024 15:30:45 +0300 Subject: [PATCH 14/17] restored minimap2 reference in nextflow_schema.json --- nextflow_schema.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index ab6de52704..f34281d243 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -758,6 +758,15 @@ "description": "Path to Control-FREEC mappability file.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, + "minimap2": { + "type": "string", + "fa_icon": "fas fa-file", + "format": "file-path", + "exists": false, + "mimetype": "text/plain", + "description": "Path to minimap2 genome index file.", + "help_text": "If you use AWS iGenomes, this has NOT already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the known FASTA file. Combine with `--save_reference` to save for future runs." + }, "ngscheckmate_bed": { "type": "string", "fa_icon": "fas fa-file", From ae682a2dff6312e7c464c55fd91dabc5afb7a01b Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Fri, 6 Dec 2024 16:02:38 +0300 Subject: [PATCH 15/17] updated minimap2 module --- .../nf-core/minimap2/align/environment.yml | 7 +- modules/nf-core/minimap2/align/main.nf | 36 +- modules/nf-core/minimap2/align/meta.yml | 112 +++-- .../nf-core/minimap2/align/tests/main.nf.test | 318 ++++++++++-- .../minimap2/align/tests/main.nf.test.snap | 459 +++++++++++++++++- .../nf-core/minimap2/index/environment.yml | 4 +- modules/nf-core/minimap2/index/main.nf | 2 +- modules/nf-core/minimap2/index/meta.yml | 45 +- modules/nf-core/minimap2/index/tests/tags.yml | 2 +- .../main.nf | 5 +- 10 files changed, 847 insertions(+), 143 deletions(-) diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index 051ca8efba..696fd427a6 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -1,9 +1,8 @@ -name: minimap2_align channels: - conda-forge - bioconda - - defaults + dependencies: + - bioconda::htslib=1.20 - bioconda::minimap2=2.28 - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + - bioconda::samtools=1.20 \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 62349edc22..e8852e1f6e 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -5,38 +5,49 @@ process MINIMAP2_ALIGN { // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" input: tuple val(meta), path(reads) tuple val(meta2), path(reference) val bam_format + val bam_index_extension val cigar_paf_format val cigar_bam output: - tuple val(meta), path("*.paf"), optional: true, emit: paf - tuple val(meta), path("*.bam"), optional: true, emit: bam - tuple val(meta), path("*.csi"), optional: true, emit: csi - path "versions.yml" , emit: versions + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ + $samtools_reset_fastq \\ minimap2 \\ $args \\ -t $task.cpus \\ - ${reference ?: reads} \\ - $reads \\ + $target \\ + $query \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output @@ -45,15 +56,20 @@ process MINIMAP2_ALIGN { cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ touch $output_file - touch ${prefix}.csi + ${bam_index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index 408522d5c9..a4cfc891a5 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -14,62 +14,86 @@ tools: homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FASTA or FASTQ files of size 1 and 2 for single-end - and paired-end data, respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test_ref'] - - reference: - type: file - description: | - Reference database in FASTA format. - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - - bam_format: + type: boolean + description: Specify that output should be in BAM format + - - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - paf: - type: file - description: Alignment in PAF format - pattern: "*.paf" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: Alignment in PAF format + pattern: "*.paf" - bam: - type: file - description: Alignment in BAM format - pattern: "*.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Alignment in BAM format + pattern: "*.bam" + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam.${bam_index_extension}": + type: file + description: BAM alignment index + pattern: "*.bam.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@heuermh" - "@sofstam" - "@sateeshperi" - "@jfy133" + - "@fellen31" maintainers: - "@heuermh" - "@sofstam" - "@sateeshperi" - "@jfy133" + - "@fellen31" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index 83cceeab19..4072c17197 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -9,22 +9,23 @@ nextflow_process { tag "minimap2" tag "minimap2/align" - test("sarscov2 - fastq, fasta, true, false, false") { + test("sarscov2 - fastq, fasta, true, [], false, false") { when { process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -33,7 +34,43 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, process.out.versions ).match() } ) @@ -49,17 +86,18 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -68,7 +106,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -83,15 +122,16 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), ] input[1] = [ [ id:'test_ref' ], // meta map [] ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -100,7 +140,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -108,24 +149,57 @@ nextflow_process { } - test("sarscov2 - fastq, fasta, true, false, false - stub") { + test("sarscov2 - bam, fasta, true, [], false, false") { - options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { when { process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = 'bai' input[4] = false + input[5] = false """ } } @@ -134,8 +208,9 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, - file(process.out.csi[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, process.out.versions ).match() } ) @@ -143,7 +218,36 @@ nextflow_process { } - test("sarscov2 - fastq, fasta, false, false, false - stub") { + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { options "-stub" @@ -152,15 +256,80 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = false - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -168,11 +337,102 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.paf[0][1]).name, - file(process.out.csi[0][1]).name, - process.out.versions - ).match() } + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } ) } diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index 19a8f20412..b3d8af970b 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -1,69 +1,476 @@ { - "sarscov2 - fastq, fasta, true, false, false": { + "sarscov2 - bam, fasta, true, 'bai', false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:03:00.827260362" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:37.92353539" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:29:44.669021368" + }, + "sarscov2 - fastq, fasta, false, [], false, false - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + + ], + "index": [ + + ], + "paf": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:52.738781039" + }, + "sarscov2 - fastq, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:23.033808223" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-05T10:14:18.939731126" + "timestamp": "2024-07-23T11:18:18.964586894" }, - "sarscov2 - fastq, fasta, true, false, false - stub": { + "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ - "test.bam", - "test.csi", [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-05T10:14:34.275879844" + "timestamp": "2024-07-23T11:17:48.667488325" }, - "sarscov2 - fastq, fasta, false, false, false - stub": { + "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ - "test.paf", - "test.csi", [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-05T10:14:39.227958138" + "timestamp": "2024-07-23T11:18:02.517416733" }, - "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "sarscov2 - bam, fasta, true, [], false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-05T10:14:24.265054877" + "timestamp": "2024-07-25T09:02:49.64829488" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:22.162291795" }, "sarscov2 - fastq, [], true, false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-05T10:14:29.27901773" + "timestamp": "2024-07-23T11:18:34.246998277" } } \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/environment.yml b/modules/nf-core/minimap2/index/environment.yml index 8a912a1213..34235b9266 100644 --- a/modules/nf-core/minimap2/index/environment.yml +++ b/modules/nf-core/minimap2/index/environment.yml @@ -1,7 +1,5 @@ -name: minimap2_index channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::minimap2=2.28 + - bioconda::minimap2=2.28 \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf index 383202142c..7892362dde 100644 --- a/modules/nf-core/minimap2/index/main.nf +++ b/modules/nf-core/minimap2/index/main.nf @@ -41,4 +41,4 @@ process MINIMAP2_INDEX { minimap2: \$(minimap2 --version 2>&1) END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml index 1d29e3f2d6..6aeaf9853c 100644 --- a/modules/nf-core/minimap2/index/meta.yml +++ b/modules/nf-core/minimap2/index/meta.yml @@ -11,33 +11,36 @@ tools: homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: | - Reference database in FASTA format. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + Reference database in FASTA format. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - index: - type: file - description: Minimap2 fasta index. - pattern: "*.mmi" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mmi": + type: file + description: Minimap2 fasta index. + pattern: "*.mmi" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@yuukiiwa" - "@drpatelh" maintainers: - "@yuukiiwa" - - "@drpatelh" + - "@drpatelh" \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/tests/tags.yml b/modules/nf-core/minimap2/index/tests/tags.yml index e5ef8e19f3..ef5b96adb2 100644 --- a/modules/nf-core/minimap2/index/tests/tags.yml +++ b/modules/nf-core/minimap2/index/tests/tags.yml @@ -1,2 +1,2 @@ minimap2/index: - - modules/nf-core/minimap2/index/** + - modules/nf-core/minimap2/index/** \ No newline at end of file diff --git a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf index 17f95ed6ee..298dda22e9 100644 --- a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf +++ b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_minimap2_sentieon/main.nf @@ -23,14 +23,11 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_MINIMAP2_SENTIEON { versions = Channel.empty() reports = Channel.empty() - cigar_paf_format = false - cigar_bam = false - // Only one of the following should be run BWAMEM1_MEM(reads, index, [[id:'no_fasta'], []], sort) // If aligner is bwa-mem BWAMEM2_MEM(reads, index, [[id:'no_fasta'], []], sort) // If aligner is bwa-mem2 DRAGMAP_ALIGN(reads, index, [[id:'no_fasta'], []], sort) // If aligner is dragmap - MINIMAP2_ALIGN(reads, index, sort, cigar_paf_format, cigar_bam) // If aligner is minimap2 + MINIMAP2_ALIGN(reads, index, true, "bai", false, false) // If aligner is minimap2 // The sentieon-bwamem-module does sorting as part of the conversion from sam to bam. SENTIEON_BWAMEM(reads, index, fasta, fasta_fai) // If aligner is sentieon-bwamem From bcb4743579bc2beedebb537c5d8e375d18e9242f Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Fri, 6 Dec 2024 16:49:06 +0300 Subject: [PATCH 16/17] nf-test added --- tests/aligner-minimap2.nf.test | 81 ++++++++++++++++++++++++ tests/aligner-minimap2.nf.test.snap | 97 +++++++++++++++++++++++++++++ tests/test_aligner_minimap2.yml | 75 ---------------------- 3 files changed, 178 insertions(+), 75 deletions(-) create mode 100644 tests/aligner-minimap2.nf.test create mode 100644 tests/aligner-minimap2.nf.test.snap delete mode 100644 tests/test_aligner_minimap2.yml diff --git a/tests/aligner-minimap2.nf.test b/tests/aligner-minimap2.nf.test new file mode 100644 index 0000000000..b5d12c6cb7 --- /dev/null +++ b/tests/aligner-minimap2.nf.test @@ -0,0 +1,81 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + tag "pipeline_sarek" + + test("Run with profile test | --aligner minimap2 --save_reference | skip QC/recal/md") { + + when { + params { + aligner = 'minimap2' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + outdir = "$outputDir" + save_reference = true + skip_tools = 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools' + tools = '' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + def fasta = params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta' + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_sarek_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path, + // All cram files + cram_files.collect{ file -> [ file.getName(), cram(file.toString(), fasta).getReadsMD5() ] } + ).match() } + ) + } + } + + test("Run with profile test | --aligner minimap2 --save_reference --build_only_index") { + + when { + params { + aligner = 'minimap2' + build_only_index = true + input = false + outdir = "$outputDir" + save_reference = true + skip_tools = 'multiqc' + tools = '' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_sarek_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/aligner-minimap2.nf.test.snap b/tests/aligner-minimap2.nf.test.snap new file mode 100644 index 0000000000..08013be4c8 --- /dev/null +++ b/tests/aligner-minimap2.nf.test.snap @@ -0,0 +1,97 @@ +{ + "Run with profile test | --aligner minimap2 --save_reference | skip QC/recal/md": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "MINIMAP2_ALIGN": { + "minimap2": "2.28-r1209", + "samtools": 1.2 + }, + "Workflow": { + "nf-core/sarek": "v3.5.0dev" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reference/minimap2", + "reference/minimap2/genome.mmi" + ], + [ + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.mmi:md5,26530ed9abe1f1318efe2b2cf3c84eb8" + ], + [ + [ + "test.sorted.cram", + "d2957735610ee79298a5c2a0c8adea19" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-06T16:42:52.26903741" + }, + "Run with profile test | --aligner minimap2 --save_reference --build_only_index": { + "content": [ + 5, + { + "Workflow": { + "nf-core/sarek": "v3.5.0dev" + } + }, + [ + "csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reference/minimap2", + "reference/minimap2/genome.mmi" + ], + [ + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.mmi:md5,26530ed9abe1f1318efe2b2cf3c84eb8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-06T16:43:26.649350225" + } +} \ No newline at end of file diff --git a/tests/test_aligner_minimap2.yml b/tests/test_aligner_minimap2.yml deleted file mode 100644 index a84bd8e309..0000000000 --- a/tests/test_aligner_minimap2.yml +++ /dev/null @@ -1,75 +0,0 @@ -- name: Run minimap2 - command: nextflow run main.nf -profile test --aligner minimap2 --save_reference --outdir results - tags: - - aligner - - minimap2 - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns - - path: results/preprocessing/recal_table/test/test.recal.table - # binary changes md5sums on reruns - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns - - path: results/reference/minimap2/genome.mmi - md5sum: 26530ed9abe1f1318efe2b2cf3c84eb8 - - path: results/reference/intervals/chr22_1-40001.bed - md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - - path: results/reference/intervals/chr22_1-40001.bed.gz - md5sum: d3341fa28986c40b24fcc10a079dbb80 - - path: results/reference/intervals/genome.bed - md5sum: a87dc7d20ebca626f65cc16ff6c97a3e - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 5324 142 162 1061336 3324 2 0 0.593438 4993", "1.0 0.999998 138"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 0a9284c1496efd6fc7ce79f12c0ec7a0 - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: f2dae85c8e57dd3c00fc6be0dc860a4d - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: 0714422bef83371c37087bddbdae2485 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: e8cee27a1207c13ee7b54a330a593d04 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: 99573817f440d12374d6eadd0241ee07 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - - path: results/reports/samtools/test/test.md.cram.stats - # conda changes md5sums for test - - path: results/reports/samtools/test/test.recal.cram.stats - # conda changes md5sums for test -- name: Build only index with minimap2 - command: nextflow run main.nf -profile test --build_only_index --aligner minimap2 --input false --outdir results - tags: - - aligner - - build_only_index - - minimap2 - files: - - path: results/multiqc - - path: results/reference/minimap2/genome.mmi - md5sum: 26530ed9abe1f1318efe2b2cf3c84eb8 - - path: results/reference/intervals/chr22_1-40001.bed - md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - - path: results/reference/intervals/chr22_1-40001.bed.gz - md5sum: d3341fa28986c40b24fcc10a079dbb80 - - path: results/reference/intervals/genome.bed - md5sum: a87dc7d20ebca626f65cc16ff6c97a3e From 8a73a062b2a4831686fa177e53a1ab15700567cb Mon Sep 17 00:00:00 2001 From: Egor Guguchkin <56133096+EgorGuga@users.noreply.github.com> Date: Fri, 6 Dec 2024 17:45:33 +0300 Subject: [PATCH 17/17] Apply suggestions from code review applied suggested change in docs/usage.md Co-authored-by: Anders Sune Pedersen <37172585+asp8200@users.noreply.github.com> --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 6142aa43b8..f95937e22c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -860,7 +860,7 @@ For GATK.GRCh38 the links for each reference file and the corresponding processe | known_snps | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | | known_snps_tbi | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | | mappability | ControlFREEC | http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip | http://boevalab.inf.ethz.ch/FREEC/tutorial.html | -| minimap2 | Minimap2 | minimap2 -x sr -d ${fasta.baseName} $fasta | | +| minimap2 | Minimap2 | `minimap2 -x sr -d ${fasta.baseName} $fasta` | | | pon | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- | | pon_tbi | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |