From 049287ee069a9bb406fbc1e4d34b33c61565d7c7 Mon Sep 17 00:00:00 2001 From: Omer An Date: Tue, 27 Aug 2024 16:33:55 +0800 Subject: [PATCH 1/5] Implemented MSISENSORPRO tumor-only analysis --- conf/modules/msisensorpro.config | 10 +++ main.nf | 19 +++++- .../nf-core/msisensorpro/msisomatic/main.nf | 2 +- .../nf-core/msisensorpro/msisomatic/meta.yml | 6 +- .../msisensorpro/msitumoronly/environment.yml | 7 ++ .../nf-core/msisensorpro/msitumoronly/main.nf | 46 +++++++++++++ .../msisensorpro/msitumoronly/meta.yml | 68 +++++++++++++++++++ nextflow.config | 2 + nextflow_schema.json | 10 +++ .../bam_variant_calling_somatic_all/main.nf | 9 +-- .../main.nf | 23 +++++-- subworkflows/local/prepare_genome/main.nf | 4 -- .../local/samplesheet_to_channel/main.nf | 4 +- .../local/utils_nfcore_sarek_pipeline/main.nf | 2 + workflows/sarek/main.nf | 2 + 15 files changed, 186 insertions(+), 28 deletions(-) create mode 100644 modules/nf-core/msisensorpro/msitumoronly/environment.yml create mode 100644 modules/nf-core/msisensorpro/msitumoronly/main.nf create mode 100644 modules/nf-core/msisensorpro/msitumoronly/meta.yml diff --git a/conf/modules/msisensorpro.config b/conf/modules/msisensorpro.config index 8253cccc50..1526a61f13 100644 --- a/conf/modules/msisensorpro.config +++ b/conf/modules/msisensorpro.config @@ -22,4 +22,14 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: 'MSISENSORPRO_MSITUMORONLY' { + ext.args = { params.wes ? '-c 20' : '-c 15' } // default values by MSIsensorpro + ext.prefix = { "${meta.id}.tumor_only" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/msisensorpro/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/main.nf b/main.nf index 92bcc47b2b..1830805b3f 100755 --- a/main.nf +++ b/main.nf @@ -77,6 +77,7 @@ include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nf include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' +include { MSISENSORPRO_SCAN } from './modules/nf-core/msisensorpro/scan/main' // Initialize fasta file with meta map: fasta = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() @@ -91,6 +92,7 @@ germline_resource = params.germline_resource ? Channel.fromPath(para known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) +msisensorpro_baseline = params.msisensorpro_baseline ? Channel.fromPath(params.msisensorpro_baseline).collect() : Channel.empty() pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) @@ -166,8 +168,18 @@ workflow NFCORE_SAREK { aligner == "bwa-mem2" ? bwamem2 : dragmap - // TODO: add a params for msisensorpro_scan - msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan + // Reference msi list for MSIsensorpro + if (params.tools && params.tools.split(',').contains('msisensorpro')) { + if (params.msisensorpro_scan) { + msisensorpro_scan = Channel.fromPath(params.msisensorpro_scan).collect() + } else { + MSISENSORPRO_SCAN(fasta) + msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } + versions = versions.mix(MSISENSORPRO_SCAN.out.versions) + } + } else { + msisensorpro_scan = Channel.empty() + } // For ASCAT, extracted from zip or tar.gz files allele_files = PREPARE_GENOME.out.allele_files @@ -177,7 +189,7 @@ workflow NFCORE_SAREK { rt_file = PREPARE_GENOME.out.rt_file // Tabix indexed vcf files - bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.empty([]) + bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.value([]) dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) @@ -296,6 +308,7 @@ workflow NFCORE_SAREK { loci_files, mappability, msisensorpro_scan, + msisensorpro_baseline, ngscheckmate_bed, pon, pon_tbi, diff --git a/modules/nf-core/msisensorpro/msisomatic/main.nf b/modules/nf-core/msisensorpro/msisomatic/main.nf index 9b0084d949..52a33705a3 100644 --- a/modules/nf-core/msisensorpro/msisomatic/main.nf +++ b/modules/nf-core/msisensorpro/msisomatic/main.nf @@ -26,7 +26,7 @@ process MSISENSORPRO_MSISOMATIC { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def fasta = fasta ? "-g ${fasta}" : "" - def intervals = intervals ? " -e ${intervals} " : "" + def intervals = intervals ? "-e ${intervals} " : "" """ msisensor-pro \\ msi \\ diff --git a/modules/nf-core/msisensorpro/msisomatic/meta.yml b/modules/nf-core/msisensorpro/msisomatic/meta.yml index a6dda66ff2..7f7f4281e3 100644 --- a/modules/nf-core/msisensorpro/msisomatic/meta.yml +++ b/modules/nf-core/msisensorpro/msisomatic/meta.yml @@ -45,7 +45,7 @@ input: pattern: "*.{fasta}" - msisensor_scan: type: file - description: Output from msisensor-pro/scan, conaining list of msi regions + description: Output from msisensor-pro/scan, containing list of msi regions pattern: "*.list" output: - meta: @@ -69,10 +69,6 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - list: - type: file - description: File containing microsatellite list - pattern: "*.{list}" authors: - "@FriederikeHanssen" maintainers: diff --git a/modules/nf-core/msisensorpro/msitumoronly/environment.yml b/modules/nf-core/msisensorpro/msitumoronly/environment.yml new file mode 100644 index 0000000000..47842c75b3 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/environment.yml @@ -0,0 +1,7 @@ +name: msisensorpro_msitumoronly +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::msisensor-pro=1.2.0 diff --git a/modules/nf-core/msisensorpro/msitumoronly/main.nf b/modules/nf-core/msisensorpro/msitumoronly/main.nf new file mode 100644 index 0000000000..92dc3b6b56 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/main.nf @@ -0,0 +1,46 @@ +process MSISENSORPRO_MSITUMORONLY { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.2.0--hfc31af2_0' : + 'biocontainers/msisensor-pro:1.2.0--hfc31af2_0' }" + + input: + tuple val(meta), path(tumor), path(tumor_index), path(intervals) + path (fasta) + path (msisensor_baseline) + + output: + tuple val(meta), path("${prefix}") , emit: output_report + tuple val(meta), path("${prefix}_dis") , emit: output_dis + tuple val(meta), path("${prefix}_all") , emit: output_all + tuple val(meta), path("${prefix}_unstable"), emit: output_unstable + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def fasta = fasta ? "-g ${fasta}" : "" + def intervals = intervals ? "-e ${intervals} " : "" + """ + msisensor-pro \\ + pro \\ + -d ${msisensor_baseline} \\ + -t ${tumor} \\ + ${fasta} \\ + -o $prefix \\ + -b ${task.cpus} \\ + ${intervals} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor-pro: \$(msisensor-pro 2>&1 | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/msisensorpro/msitumoronly/meta.yml b/modules/nf-core/msisensorpro/msitumoronly/meta.yml new file mode 100644 index 0000000000..d6b81a5913 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/meta.yml @@ -0,0 +1,68 @@ +name: msisensorpro_msitumoronly +description: MSIsensor-pro evaluates Microsatellite Instability (MSI) for cancer patients with next generation sequencing data. It accepts the whole genome sequencing, whole exome sequencing and target region (panel) sequencing data as input +keywords: + - micro-satellite-scan + - msisensor-pro + - msi + - somatic + - tumor-only +tools: + - msisensorpro: + description: Microsatellite Instability (MSI) detection using high-throughput sequencing data. + homepage: https://github.com/xjtu-omics/msisensor-pro + documentation: https://github.com/xjtu-omics/msisensor-pro/wiki + tool_dev_url: https://github.com/xjtu-omics/msisensor-pro + doi: "10.1016/j.gpb.2020.02.001" + licence: ["Custom Licence"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - tumor_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: bed file containing interval information, optional + pattern: "*.{bed}" + - fasta: + type: file + description: Reference genome + pattern: "*.{fasta}" + - msisensor_baseline: + type: file + description: File containing custom list of msi regions from a panel of normals to be used as a baseline +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output_report: + type: file + description: File containing final report with all detected microsatellites, unstable somatic microsatellites, msi score + - output_dis: + type: file + description: File containing distribution results + - output_all: + type: file + description: File containing all sites + - output_unstable: + type: file + description: File containing unstable sites + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@bounlu" +maintainers: + - "@FriederikeHanssen" diff --git a/nextflow.config b/nextflow.config index de95ae8c86..6a3d9552e3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,6 +71,8 @@ params { ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling + msisensorpro_scan = null // by default the reference is build from the fasta file + msisensorpro_baseline = null // by default tumor-only mode is not used in MSIsensorpro only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1611d58f40..bd10acb70f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -733,6 +733,16 @@ "hidden": true, "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, + "msisensorpro_scan": { + "type": "string", + "fa_icon": "fas fa-file-alt", + "description": "Path to MSIsensorpro reference genome microsatellites information file." + }, + "msisensorpro_baseline": { + "type": "string", + "fa_icon": "fas fa-file-alt", + "description": "Path to MSIsensorpro custom baseline file for tumor-only analysis." + }, "ngscheckmate_bed": { "type": "string", "fa_icon": "fas fa-file", diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index cdfabfc3ac..21a7e0ba34 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -44,13 +44,12 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { wes // boolean: [mandatory] [default: false] whether targeted data is processed main: - versions = Channel.empty() + versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config vcf_freebayes = Channel.empty() vcf_manta = Channel.empty() vcf_strelka = Channel.empty() - out_msisensorpro = Channel.empty() vcf_mutect2 = Channel.empty() vcf_tiddit = Channel.empty() @@ -175,11 +174,10 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { } // MSISENSOR - if (tools.split(',').contains('msisensorpro')) { + if (tools.split(',').contains('msisensorpro') && msisensorpro_scan) { MSISENSORPRO_MSISOMATIC(cram.combine(intervals_bed_combined), fasta.map{ meta, fasta -> [ fasta ] }, msisensorpro_scan) versions = versions.mix(MSISENSORPRO_MSISOMATIC.out.versions) - out_msisensorpro = out_msisensorpro.mix(MSISENSORPRO_MSISOMATIC.out.output_report) } // MUTECT2 @@ -190,7 +188,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { // joint_mutect2 mode needs different meta.map than regular mode cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> joint_mutect2 ? - //we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step + // we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step [ meta + [ id:meta.patient ], [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] : [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] }, @@ -232,7 +230,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) emit: - out_msisensorpro vcf_all vcf_freebayes vcf_manta diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf index 59b14ed898..97b4e6f54d 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -11,6 +11,7 @@ include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_cal include { BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC } from '../bam_variant_calling_tumor_only_controlfreec/main' include { BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA } from '../bam_variant_calling_tumor_only_manta/main' include { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 } from '../bam_variant_calling_tumor_only_mutect2/main' +include { MSISENSORPRO_MSITUMORONLY } from '../../../modules/nf-core/msisensorpro/msitumoronly/main' workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { take: @@ -32,21 +33,22 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped mappability + msisensorpro_baseline // channel: [optional] msisensorpro_baseline panel_of_normals // channel: [optional] panel_of_normals panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode wes // boolean: [mandatory] [default: false] whether targeted data is processed main: - versions = Channel.empty() + versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config - vcf_freebayes = Channel.empty() - vcf_manta = Channel.empty() - vcf_mpileup = Channel.empty() - vcf_mutect2 = Channel.empty() - vcf_strelka = Channel.empty() - vcf_tiddit = Channel.empty() + vcf_freebayes = Channel.empty() + vcf_manta = Channel.empty() + vcf_mpileup = Channel.empty() + vcf_mutect2 = Channel.empty() + vcf_strelka = Channel.empty() + vcf_tiddit = Channel.empty() // MPILEUP if (tools.split(',').contains('mpileup') || tools.split(',').contains('controlfreec')) { @@ -109,6 +111,13 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) } + // MSISENSOR + if (tools.split(',').contains('msisensorpro') && msisensorpro_baseline) { + MSISENSORPRO_MSITUMORONLY(cram.combine(intervals_bed_combined), fasta.map{ meta, fasta -> [ fasta ] }, msisensorpro_baseline) + + versions = versions.mix(MSISENSORPRO_MSITUMORONLY.out.versions) + } + // MUTECT2 if (tools.split(',').contains('mutect2')) { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2( diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 772af47b37..a7e7614310 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -12,7 +12,6 @@ include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf- include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' -include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' include { TABIX_TABIX as TABIX_BCFTOOLS_ANNOTATIONS } from '../../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main' @@ -50,7 +49,6 @@ workflow PREPARE_GENOME { DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap GATK4_CREATESEQUENCEDICTIONARY(fasta) - MSISENSORPRO_SCAN(fasta) SAMTOOLS_FAIDX(fasta, [ [ id:'no_fai' ], [] ] ) // the following are flattened and mapped in case the user supplies more than one value for the param @@ -105,7 +103,6 @@ workflow PREPARE_GENOME { versions = versions.mix(BWAMEM2_INDEX.out.versions) versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - versions = versions.mix(MSISENSORPRO_SCAN.out.versions) versions = versions.mix(SAMTOOLS_FAIDX.out.versions) versions = versions.mix(TABIX_BCFTOOLS_ANNOTATIONS.out.versions) versions = versions.mix(TABIX_DBSNP.out.versions) @@ -125,7 +122,6 @@ workflow PREPARE_GENOME { germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi allele_files // path: allele_files diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf index 245bfaec1a..1ca34be787 100644 --- a/subworkflows/local/samplesheet_to_channel/main.nf +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -153,7 +153,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ } input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples - def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller'] def requested_tools_requiring_normal_samples = [] tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> if (tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) @@ -164,7 +164,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ } } - // Fails when wrongfull extension for intervals file + // Fails when wrongful extension for intervals file if (wes && !step == 'annotate') { if (intervals && !intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf index 23415aed48..32f95c5524 100644 --- a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf @@ -109,6 +109,8 @@ workflow PIPELINE_INITIALISATION { params.known_snps, params.known_snps_tbi, params.mappability, + params.msisensorpro_scan, + params.msisensorpro_baseline, params.multiqc_config, params.ngscheckmate_bed, params.pon, diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index 90307f19c2..09c639d4e9 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -127,6 +127,7 @@ workflow SAREK { loci_files mappability msisensorpro_scan + msisensorpro_baseline ngscheckmate_bed pon pon_tbi @@ -792,6 +793,7 @@ workflow SAREK { intervals_bed_combined, intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi mappability, + msisensorpro_baseline, pon, pon_tbi, params.joint_mutect2, From 4a53e757466ec35339813c48bb8ee7121f29d950 Mon Sep 17 00:00:00 2001 From: Omer An Date: Mon, 2 Sep 2024 10:28:55 +0800 Subject: [PATCH 2/5] fix typo --- modules/nf-core/ascat/main.nf | 2 -- subworkflows/local/samplesheet_to_channel/main.nf | 2 +- tests/test_umi.yml | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/ascat/main.nf b/modules/nf-core/ascat/main.nf index 8aeb9847b5..d441b98e72 100644 --- a/modules/nf-core/ascat/main.nf +++ b/modules/nf-core/ascat/main.nf @@ -195,6 +195,4 @@ process ASCAT { echo ' ascat: 3.0.0' >> versions.yml """ - - } diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf index 1ca34be787..928c4f7593 100644 --- a/subworkflows/local/samplesheet_to_channel/main.nf +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -175,7 +175,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ } if (step == 'mapping' && aligner.contains("sentieon-bwamem") && umi_read_structure) { - error("Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner.") + error("Sentieon BWA is currently not compatible with FGBio UMI handling. Please choose a different aligner.") } if (tools && tools.split(',').contains("sentieon_haplotyper") && joint_germline && (!sentieon_haplotyper_emit_mode || !(sentieon_haplotyper_emit_mode.contains('gvcf')))) { diff --git a/tests/test_umi.yml b/tests/test_umi.yml index 0c8392f40e..379222b756 100644 --- a/tests/test_umi.yml +++ b/tests/test_umi.yml @@ -49,7 +49,7 @@ exit_code: 1 stdout: contains: - - "Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner." + - "Sentieon BWA is currently not compatible with FGBio UMI handling. Please choose a different aligner." # - name: Run UMI TSO test # command: nextflow run main.nf -profile test,umi_tso --outdir results # tags: From 3590dfb67709c987bf10ea658ab322fadb85a3ce Mon Sep 17 00:00:00 2001 From: Omer An Date: Tue, 3 Sep 2024 11:04:59 +0800 Subject: [PATCH 3/5] remove spaces --- modules/nf-core/msisensorpro/msisomatic/main.nf | 2 +- modules/nf-core/msisensorpro/msitumoronly/main.nf | 2 +- subworkflows/local/vcf_annotate_bcftools/main.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/msisensorpro/msisomatic/main.nf b/modules/nf-core/msisensorpro/msisomatic/main.nf index 52a33705a3..3fd51ad875 100644 --- a/modules/nf-core/msisensorpro/msisomatic/main.nf +++ b/modules/nf-core/msisensorpro/msisomatic/main.nf @@ -26,7 +26,7 @@ process MSISENSORPRO_MSISOMATIC { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def fasta = fasta ? "-g ${fasta}" : "" - def intervals = intervals ? "-e ${intervals} " : "" + def intervals = intervals ? "-e ${intervals}" : "" """ msisensor-pro \\ msi \\ diff --git a/modules/nf-core/msisensorpro/msitumoronly/main.nf b/modules/nf-core/msisensorpro/msitumoronly/main.nf index 92dc3b6b56..0ee0c124d5 100644 --- a/modules/nf-core/msisensorpro/msitumoronly/main.nf +++ b/modules/nf-core/msisensorpro/msitumoronly/main.nf @@ -26,7 +26,7 @@ process MSISENSORPRO_MSITUMORONLY { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def fasta = fasta ? "-g ${fasta}" : "" - def intervals = intervals ? "-e ${intervals} " : "" + def intervals = intervals ? "-e ${intervals}" : "" """ msisensor-pro \\ pro \\ diff --git a/subworkflows/local/vcf_annotate_bcftools/main.nf b/subworkflows/local/vcf_annotate_bcftools/main.nf index e54c52aa7c..94e4b89510 100644 --- a/subworkflows/local/vcf_annotate_bcftools/main.nf +++ b/subworkflows/local/vcf_annotate_bcftools/main.nf @@ -29,5 +29,5 @@ workflow VCF_ANNOTATE_BCFTOOLS { emit: vcf_tbi = ch_vcf_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - versions = ch_versions // path: versions.yml + versions = ch_versions // path: versions.yml } From 03a0122c2f75fe7439ea0679df64a779d36b7fdb Mon Sep 17 00:00:00 2001 From: Omer An Date: Mon, 9 Sep 2024 13:44:06 +0800 Subject: [PATCH 4/5] Implemented TMB score calculation --- assets/tmb/mutect2.yml | 10 + assets/tmb/snpeff.yml | 119 +++ assets/tmb/strelka.yml | 10 + conf/modules/tmb.config | 52 ++ modules.json | 5 + modules/local/tmb/environment.yml | 7 + modules/local/tmb/main.nf | 41 + modules/nf-core/bcftools/norm/environment.yml | 7 + modules/nf-core/bcftools/norm/main.nf | 70 ++ modules/nf-core/bcftools/norm/meta.yml | 69 ++ .../nf-core/bcftools/norm/tests/main.nf.test | 563 +++++++++++++ .../bcftools/norm/tests/main.nf.test.snap | 758 ++++++++++++++++++ .../bcftools/norm/tests/nextflow.bcf.config | 4 + .../norm/tests/nextflow.bcf_gz.config | 4 + .../bcftools/norm/tests/nextflow.config | 4 + .../bcftools/norm/tests/nextflow.vcf.config | 4 + .../norm/tests/nextflow.vcf_gz.config | 4 + modules/nf-core/bcftools/norm/tests/tags.yml | 2 + .../bcftools/norm/tests/vcf_gz_index.config | 4 + .../norm/tests/vcf_gz_index_csi.config | 4 + .../norm/tests/vcf_gz_index_tbi.config | 4 + nextflow.config | 2 + nextflow_schema.json | 9 +- .../local/vcf_tumor_mutational_burden/main.nf | 41 + .../nf-core/vcf_annotate_ensemblvep/main.nf | 4 +- .../nf-core/vcf_annotate_snpeff/main.nf | 5 +- workflows/sarek/main.nf | 10 + 27 files changed, 1813 insertions(+), 3 deletions(-) create mode 100644 assets/tmb/mutect2.yml create mode 100644 assets/tmb/snpeff.yml create mode 100644 assets/tmb/strelka.yml create mode 100644 conf/modules/tmb.config create mode 100644 modules/local/tmb/environment.yml create mode 100644 modules/local/tmb/main.nf create mode 100644 modules/nf-core/bcftools/norm/environment.yml create mode 100644 modules/nf-core/bcftools/norm/main.nf create mode 100644 modules/nf-core/bcftools/norm/meta.yml create mode 100644 modules/nf-core/bcftools/norm/tests/main.nf.test create mode 100644 modules/nf-core/bcftools/norm/tests/main.nf.test.snap create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.bcf.config create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.config create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.vcf.config create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config create mode 100644 modules/nf-core/bcftools/norm/tests/tags.yml create mode 100644 modules/nf-core/bcftools/norm/tests/vcf_gz_index.config create mode 100644 modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config create mode 100644 modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config create mode 100644 subworkflows/local/vcf_tumor_mutational_burden/main.nf diff --git a/assets/tmb/mutect2.yml b/assets/tmb/mutect2.yml new file mode 100644 index 0000000000..415238e82a --- /dev/null +++ b/assets/tmb/mutect2.yml @@ -0,0 +1,10 @@ +## Describe the fields +## For definition, provide the expected key:values +## Note that several keys/values can be defined +## +############################################### + +freq: "AF" +depth: "DP" +altDepth: "AD" +maxVaf: "1" diff --git a/assets/tmb/snpeff.yml b/assets/tmb/snpeff.yml new file mode 100644 index 0000000000..8781d82cfd --- /dev/null +++ b/assets/tmb/snpeff.yml @@ -0,0 +1,119 @@ +## Describe the fields +## For definition, provide the expected key:values +## Note that several keys/values can be defined + +############################################### +## SnpEff Parsing + +## Tags +tag: "ANN" +sep: "&" + +## Annotation flags + +isCoding: + 1: + - chromosome_number_variation + - coding_sequence_variant + - conservative_inframe_deletion + - conservative_inframe_insertion + - disruptive_inframe_deletion + - disruptive_inframe_insertion + - exon_loss + - exon_loss_variant + - exon_variant + - frameshift_variant + - gene_variant + - initiator_codon_variant + - missense_variant + - rare_amino_acid_variant + - splice_acceptor_variant + - splice_donor_variant + - splice_region_variant + - start_lost + - start_retained + - stop_gained + - stop_lost + - stop_retained_variant + - synonymous_variant + - transcript_ablation + - transcript_amplification + - transcript_variant + +isNonCoding: + 1: + - 3_prime_UTR_truncation + - 3_prime_UTR_variant + - 5_prime_UTR_premature_start_codon_gain_variant + - 5_prime_UTR_truncation + - 5_prime_UTR_variant + - conserved_intergenic_variant + - conserved_intron_variant + - downstream_gene_variant + - feature_elongation + - feature_truncation + - intergenic_region + - intragenic_variant + - intron_variant + - mature_miRNA_variant + - miRNA + - NMD_transcript_variant + - non_coding_transcript_exon_variant + - non_coding_transcript_variant + - regulatory_region_ablation + - regulatory_region_amplification + - regulatory_region_variant + - TF_binding_site_variant + - TFBS_ablation + - TFBS_amplification + - upstream_gene_variant + +isSplicing: + 1: + - splice_donor_variant + - splice_acceptor_variant + - splice_region_variant + +isSynonymous: + 1: + - start_retained_variant + - stop_retained_variant + - synonymous_variant + +isNonSynonymous: + 1: + - frameshift_variant + - missense_variant + - rare_amino_acid_variant + - splice_acceptor_variant + - splice_donor_variant + - splice_region_variant + - start_lost + - stop_gained + - stop_lost + +## Databases +cancerDb: + cosmic: + - cosmic_coding_ID + - cosmic_noncoding_ID + +polymDb: + 1k: + - kg_AMR_AF + - kg_AFR_AF + - kg_EAS_AF + - kg_EUR_AF + - kg_SAS_AF + - KG_AF_GLOBAL + + gnomad: + - GnomAD + - gnomAD_genomes_AF + - AF + + esp: + - ESP_AF_GLOBAL + + dbsnp: + - dbSNPBuildID diff --git a/assets/tmb/strelka.yml b/assets/tmb/strelka.yml new file mode 100644 index 0000000000..415238e82a --- /dev/null +++ b/assets/tmb/strelka.yml @@ -0,0 +1,10 @@ +## Describe the fields +## For definition, provide the expected key:values +## Note that several keys/values can be defined +## +############################################### + +freq: "AF" +depth: "DP" +altDepth: "AD" +maxVaf: "1" diff --git a/conf/modules/tmb.config b/conf/modules/tmb.config new file mode 100644 index 0000000000..c3c6d1993a --- /dev/null +++ b/conf/modules/tmb.config @@ -0,0 +1,52 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// TMB + +process { + + withName: 'BCFTOOLS_NORM' { + ext.args = { "-m- -c w" } + ext.prefix = { vcf.baseName - ".vcf" + ".norm" } + ext.when = { meta.dbconfig && meta.varconfig } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/tmb/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'TMB' { + ext.args = { [ + "--sample ${meta.patient}_${meta.sample}", + params.tmb_effgenomesize ? "--effGenomeSize ${params.tmb_effgenomesize}" : '', + '--vaf 0.05', + '--maf 1', + '--minDepth 100', + '--minAltDepth 2', + '--filterLowQual', + '--filterNonCoding', + '--filterSyn', + '--filterPolym', + '--polymDb 1k,gnomad,dbsnp' + ].join(' ').trim() + } + ext.prefix = { vcf.baseName } + ext.when = { dbconfig && varconfig } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/tmb/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/modules.json b/modules.json index 26d801647b..53ab668c0c 100644 --- a/modules.json +++ b/modules.json @@ -26,6 +26,11 @@ "git_sha": "a5ba4d59c2b248c0379b0f8aeb4e7e754566cd1f", "installed_by": ["bam_ngscheckmate"] }, + "bcftools/norm": { + "branch": "master", + "git_sha": "33ef773a7ea36e88323902f63662aa53c9b88988", + "installed_by": ["modules"] + }, "bcftools/sort": { "branch": "master", "git_sha": "a5ba4d59c2b248c0379b0f8aeb4e7e754566cd1f", diff --git a/modules/local/tmb/environment.yml b/modules/local/tmb/environment.yml new file mode 100644 index 0000000000..6ca1e9c0bf --- /dev/null +++ b/modules/local/tmb/environment.yml @@ -0,0 +1,7 @@ +name: tmb +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tmb=1.3.0 diff --git a/modules/local/tmb/main.nf b/modules/local/tmb/main.nf new file mode 100644 index 0000000000..0894f37ebd --- /dev/null +++ b/modules/local/tmb/main.nf @@ -0,0 +1,41 @@ +process TMB { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tmb:1.3.0--pyh5e36f6f_0': + 'quay.io/biocontainers/tmb:1.3.0--pyh5e36f6f_0' }" + + input: + tuple val(meta), path(vcf), path(dbconfig), path(varconfig) + path (intervals) + + output: + tuple val(meta), path("*.log"), emit: log + path("*_export.vcf") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def intervals_bed = intervals ? "--bed ${intervals}" : '' + def target_region = args.contains("--effGenomeSize") ? '' : intervals_bed + """ + pyTMB.py -i ${vcf} \\ + --dbConfig ${dbconfig} \\ + --varConfig ${varconfig} \\ + ${target_region} \\ + ${args} \\ + --export \\ + > ${prefix}.tmb.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tmb: \$(echo \$(pyTMB.py --version 2>&1) | sed 's/^.*pyTMB.py //; s/.*\$//' | sed 's|[()]||g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 0000000000..0c7dfa8f6c --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_norm +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 0000000000..bd7a250127 --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,70 @@ +process BCFTOOLS_NORM { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 0000000000..a0cdeaf102 --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,69 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - csi: + type: file + description: Default VCF file index + pattern: "*.csi" + - tbi: + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test b/modules/nf-core/bcftools/norm/tests/main.nf.test new file mode 100644 index 0000000000..dbc4150237 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test @@ -0,0 +1,563 @@ +nextflow_process { + + name "Test Process BCFTOOLS_NORM" + script "../main.nf" + process "BCFTOOLS_NORM" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/norm" + + test("sarscov2 - [ vcf, [] ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output") { + + config "./nextflow.bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output") { + + config "./nextflow.bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta -stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output -stub") { + + config "./nextflow.vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output - stub") { + + config "./nextflow.bcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub") { + + config "./nextflow.bcf_gz.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test.snap b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap new file mode 100644 index 0000000000..3be52116a9 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap @@ -0,0 +1,758 @@ +{ + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:42.639095032" + }, + "sarscov2 - [ vcf, [] ], fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:05.448449893" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:12.741719961" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:39:22.875147941" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T08:15:23.38765384" + }, + "sarscov2 - [ vcf, [] ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:36:21.519977754" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:27.8230994" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,f35545c26a788b5eb697d9c0490339d9" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,f35545c26a788b5eb697d9c0490339d9" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:53.942403192" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:56:05.3799488" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:28.356741947" + }, + "sarscov2 - [ vcf, tbi ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:36:58.39445154" + }, + "sarscov2 - [ vcf, tbi ], fasta -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:16.259516142" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:39:10.503208929" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T07:52:58.381931979" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:59.121377258" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:56:16.404380471" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:09.808834237" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:42.141945244" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config new file mode 100644 index 0000000000..b79af86817 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type b --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config new file mode 100644 index 0000000000..f36f397c2c --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type u --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.config b/modules/nf-core/bcftools/norm/tests/nextflow.config new file mode 100644 index 0000000000..510803b407 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config new file mode 100644 index 0000000000..10bf93e320 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type v --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config new file mode 100644 index 0000000000..b31dd2de22 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type z ---no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/tags.yml b/modules/nf-core/bcftools/norm/tests/tags.yml new file mode 100644 index 0000000000..f6f5e35616 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/norm: + - "modules/nf-core/bcftools/norm/**" diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config new file mode 100644 index 0000000000..7dd696ee26 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config new file mode 100644 index 0000000000..aebffb6fb7 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config new file mode 100644 index 0000000000..b192ae7d19 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/nextflow.config b/nextflow.config index 6a3d9552e3..b7c681a260 100644 --- a/nextflow.config +++ b/nextflow.config @@ -77,6 +77,7 @@ params { sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' sentieon_haplotyper_emit_mode = 'variant' // default value for Sentieon haplotyper + tmb_effgenomesize = null // wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers // Annotation @@ -443,6 +444,7 @@ includeConfig 'conf/modules/sentieon_haplotyper.config' includeConfig 'conf/modules/sentieon_haplotyper_joint_germline.config' includeConfig 'conf/modules/strelka.config' includeConfig 'conf/modules/tiddit.config' +includeConfig 'conf/modules/tmb.config' includeConfig 'conf/modules/post_variant_calling.config' //annotate diff --git a/nextflow_schema.json b/nextflow_schema.json index bd10acb70f..4dfa79e660 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -112,7 +112,7 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? [ + meta + [ dbconfig: getTMBconfig(meta.annotation), + varconfig: getTMBconfig(meta.variantcaller) ], + vcf, tbi ] } + + BCFTOOLS_NORM(vcf_in, fasta) + + + tmb_in = BCFTOOLS_NORM.out.vcf.map{ meta, vcf -> [ meta, vcf, meta.dbconfig, meta.varconfig ] } + + TMB(tmb_in, target_bed) + + versions = versions.mix(BCFTOOLS_NORM.out.versions) + versions = versions.mix(TMB.out.versions) + + emit: + versions +} + +// +// Function to retrieve config file for tumor mutational burden +// +def getTMBconfig(tool) { + def configFile = "$projectDir/assets/tmb/${tool}.yml" + def file = new File(configFile) + return file.exists() ? configFile : null +} diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf index 291eddc11b..4bf03da9ec 100644 --- a/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf @@ -7,7 +7,7 @@ include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' workflow VCF_ANNOTATE_ENSEMBLVEP { take: - ch_vcf // channel: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)]] + ch_vcf // channel: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optional)]] ch_fasta // channel: [ val(meta2), path(fasta) ] (optional) val_genome // value: genome to use val_species // value: species to use @@ -31,6 +31,8 @@ workflow VCF_ANNOTATE_ENSEMBLVEP { TABIX_TABIX(ENSEMBLVEP_VEP.out.vcf) ch_vcf_tbi = ENSEMBLVEP_VEP.out.vcf.join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) + // Add annotation to meta to use the right config for TMB + .map{ meta, vcf, tbi -> [ meta + [ annotation:'vep' ], vcf, tbi ] } // Gather versions of all tools used ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions) diff --git a/subworkflows/nf-core/vcf_annotate_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_snpeff/main.nf index 3570a5b7c0..1ebe9528e5 100644 --- a/subworkflows/nf-core/vcf_annotate_snpeff/main.nf +++ b/subworkflows/nf-core/vcf_annotate_snpeff/main.nf @@ -17,12 +17,15 @@ workflow VCF_ANNOTATE_SNPEFF { SNPEFF_SNPEFF(ch_vcf, val_snpeff_db, ch_snpeff_cache) TABIX_BGZIPTABIX(SNPEFF_SNPEFF.out.vcf) + // Add annotation to meta to use the right config for TMB + ch_vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi.map{ meta, vcf, tbi -> [ meta + [ annotation:'snpeff' ], vcf, tbi ] } + // Gather versions of all tools used ch_versions = ch_versions.mix(SNPEFF_SNPEFF.out.versions) ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) emit: - vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), path(vcf), path(tbi) ] + vcf_tbi = ch_vcf_tbi // channel: [ val(meta), path(vcf), path(tbi) ] reports = SNPEFF_SNPEFF.out.report // channel: [ path(html) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index 09c639d4e9..febe591d97 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -83,6 +83,9 @@ include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../../subwor // Annotation include { VCF_ANNOTATE_ALL } from '../../subworkflows/local/vcf_annotate_all/main' +// TMB +include { TUMOR_MUTATIONAL_BURDEN } from '../../subworkflows/local/vcf_tumor_mutational_burden/main' + // MULTIQC include { MULTIQC } from '../../modules/nf-core/multiqc/main' @@ -890,6 +893,13 @@ workflow SAREK { // Gather used softwares versions versions = versions.mix(VCF_ANNOTATE_ALL.out.versions) reports = reports.mix(VCF_ANNOTATE_ALL.out.reports) + + // TMB + if (params.tools.split(',').contains('tmb')) { + TUMOR_MUTATIONAL_BURDEN(VCF_ANNOTATE_ALL.out.vcf_ann, fasta, intervals_bed_combined) + + versions = versions.mix(TUMOR_MUTATIONAL_BURDEN.out.versions) + } } } From f40faf46bcdf2654bcac46c317c78ee57eb7abef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20An?= Date: Thu, 16 Jan 2025 10:08:02 +0800 Subject: [PATCH 5/5] removed trailing space for linting --- nextflow.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 2df262cefb..661d20ba8d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -79,7 +79,7 @@ params { sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' sentieon_haplotyper_emit_mode = 'variant' // default value for Sentieon haplotyper - tmb_effgenomesize = null // + tmb_effgenomesize = null // wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers // Annotation @@ -481,4 +481,3 @@ includeConfig 'conf/modules/lofreq.config' //annotate includeConfig 'conf/modules/annotate.config' -