diff --git a/README.md b/README.md index 0efb372..5988c2c 100644 --- a/README.md +++ b/README.md @@ -20,4 +20,8 @@ If you are really impatient, check out our [quickstart](docs/quickstart.md) guid FooDMe2 is developed and supported by the following people and organizations: -[Marc Höppner](https://github.com/marchoeppner), Landeslabor Schleswig-Holstein, [LSH](https://www.schleswig-holstein.de/DE/landesregierung/ministerien-behoerden/LLABOR) \ No newline at end of file +[Marc Höppner](https://github.com/marchoeppner), Landeslabor Schleswig-Holstein, [LSH](https://www.schleswig-holstein.de/DE/landesregierung/ministerien-behoerden/LLABOR) + +## Acknowledgements + +We thank the developers of the [AQUAMIS](https://gitlab.com/bfr_bioinformatics/AQUAMIS) pipeline for making some of the building blocks on which GABI is based publically available - specifically the ConfindR database and validation data for Campylobacter spp. as well as the reference intervals for a broad range of bacteria to determine assembly status. \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index bdc014a..d4916ed 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,6 +238,15 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: MEDAKA_VARIANT { + ext.args = params.medaka_model + publishDir = [ + path: { "${params.outdir}/samples/${meta.sample_id}/variants" }, + mode: params.publish_dir_mode, + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/resources.config b/conf/resources.config index d3273bc..dc3fc97 100644 --- a/conf/resources.config +++ b/conf/resources.config @@ -187,7 +187,7 @@ params { campylobacter_jejuni = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST" campylobacter_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST" campylobacter_lari = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST" - escherichia_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_5_cgMLST" + escherichia_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_5/Escherichia_coli_INNUENDO_wgMLST" listeria_monocytogenes = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_6/Listeria_monocytogenes_Pasteur_cgMLST" yersinia_enterocolitica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_7/Yersinia_enterocolitica_INNUENDO_wgMLST" salmonella_enterica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_8_cgMLST" diff --git a/modules/cat_fastq/main.nf b/modules/cat_fastq/main.nf index 09c2c35..00df26f 100644 --- a/modules/cat_fastq/main.nf +++ b/modules/cat_fastq/main.nf @@ -23,7 +23,7 @@ process CAT_FASTQ { if (meta.single_end) { """ - zcat ${reads.join(' ')} | gzip > ${prefix}.merged.fastq.gz + cat ${reads.join(' ')} > ${prefix}.merged.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -35,8 +35,8 @@ process CAT_FASTQ { def read2 = [] readList.eachWithIndex { v, ix -> (ix & 1 ? read2 : read1) << v } """ - zcat ${read1.join(' ')} | gzip > ${prefix}_1.merged.fastq.gz - zcat ${read2.join(' ')} | gzip > ${prefix}_2.merged.fastq.gz + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/medaka/variant/environment.yml b/modules/medaka/variant/environment.yml new file mode 100644 index 0000000..50c0777 --- /dev/null +++ b/modules/medaka/variant/environment.yml @@ -0,0 +1,7 @@ +name: medaka_variant +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::medaka=2.0.1 diff --git a/modules/medaka/variant/main.nf b/modules/medaka/variant/main.nf new file mode 100644 index 0000000..1fc72d5 --- /dev/null +++ b/modules/medaka/variant/main.nf @@ -0,0 +1,37 @@ +process MEDAKA_VARIANT { + tag "$meta.sample_id" + label 'short_parallel' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/medaka:2.0.1--py38h8774169_0' : + 'quay.io/biocontainers/medaka:2.0.1--py38h8774169_0' }" + + input: + tuple val(meta), path(reads), path(assembly) + + output: + tuple val(meta), path("*annotated.vcf") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + medaka_variant \\ + -t $task.cpus \\ + $args \\ + -i $reads \\ + -r $assembly \\ + -o ./ + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) + END_VERSIONS + """ +} diff --git a/modules/medaka/variant/meta.yml b/modules/medaka/variant/meta.yml new file mode 100644 index 0000000..9ed3589 --- /dev/null +++ b/modules/medaka/variant/meta.yml @@ -0,0 +1,45 @@ +name: medaka +description: A tool to create consensus sequences and variant calls from nanopore sequencing data +keywords: + - assembly + - polishing + - nanopore +tools: + - medaka: + description: Neural network sequence error correction. + homepage: https://nanoporetech.github.io/medaka/index.html + documentation: https://nanoporetech.github.io/medaka/index.html + tool_dev_url: https://github.com/nanoporetech/medaka + licence: ["Mozilla Public License 2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input nanopore fasta/FastQ files + pattern: "*.{fasta,fa,fastq,fastq.gz,fq,fq.gz}" + - assembly: + type: file + description: Genome assembly + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - assembly: + type: file + description: Polished genome assembly + pattern: "*.fa.gz" +authors: + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/modules/medaka/variant/tests/main.nf.test b/modules/medaka/variant/tests/main.nf.test new file mode 100644 index 0000000..948e398 --- /dev/null +++ b/modules/medaka/variant/tests/main.nf.test @@ -0,0 +1,37 @@ +nextflow_process { + + name "Test Process MEDAKA" + tag "modules_nfcore" + tag "modules" + tag "medaka" + script "../main.nf" + process "MEDAKA" + + test("Medaka") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.assembly[0][1]).linesGzip.join()[0..99], + process.out.versions + ).match() + } + ) + } + + } + +} diff --git a/modules/medaka/variant/tests/main.nf.test.snap b/modules/medaka/variant/tests/main.nf.test.snap new file mode 100644 index 0000000..1396e9a --- /dev/null +++ b/modules/medaka/variant/tests/main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "Medaka": { + "content": [ + ">MT192765.1GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTC", + [ + "versions.yml:md5,739bb00a08faba4029f9f5ab9c15275a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-14T12:51:51.820749" + } +} \ No newline at end of file diff --git a/modules/medaka/variant/tests/tags.yml b/modules/medaka/variant/tests/tags.yml new file mode 100644 index 0000000..dd9fb10 --- /dev/null +++ b/modules/medaka/variant/tests/tags.yml @@ -0,0 +1,2 @@ +medaka: + - modules/nf-core/medaka/** diff --git a/modules/tabix/bgzip/environment.yml b/modules/tabix/bgzip/environment.yml new file mode 100644 index 0000000..56cc0fb --- /dev/null +++ b/modules/tabix/bgzip/environment.yml @@ -0,0 +1,8 @@ +name: tabix_bgzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.20 diff --git a/modules/tabix/bgzip/main.nf b/modules/tabix/bgzip/main.nf new file mode 100644 index 0000000..9afe0d3 --- /dev/null +++ b/modules/tabix/bgzip/main.nf @@ -0,0 +1,41 @@ +process TABIX_BGZIP { + tag "$meta.sample_id" + label 'short_serial' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'quay.io/biocontainers/htslib:1.20--h5efdd21_2' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.sample_id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/tabix/bgzip/meta.yml b/modules/tabix/bgzip/meta.yml new file mode 100644 index 0000000..621d49e --- /dev/null +++ b/modules/tabix/bgzip/meta.yml @@ -0,0 +1,52 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: Output compressed/decompressed file + pattern: "*." + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/tabix/bgzip/tests/bgzip_compress.config b/modules/tabix/bgzip/tests/bgzip_compress.config new file mode 100644 index 0000000..6b6ff55 --- /dev/null +++ b/modules/tabix/bgzip/tests/bgzip_compress.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = ' -i' + } +} diff --git a/modules/tabix/bgzip/tests/main.nf.test b/modules/tabix/bgzip/tests/main.nf.test new file mode 100644 index 0000000..d784aa0 --- /dev/null +++ b/modules/tabix/bgzip/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TABIX_BGZIP" + script "modules/nf-core/tabix/bgzip/main.nf" + process "TABIX_BGZIP" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgzip" + + test("sarscov2_vcf_bgzip_compress") { + when { + process { + """ + input[0] = [ + [ id:'bgzip_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bgzip_test") + } + ) + } + } + + test("homo_genome_bedgz_compress") { + when { + process { + """ + input[0] = [ + [ id:'bedgz_test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bedgz_test") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_stub") { + options '-stub' + config "./bgzip_compress.config" + + when { + process { + """ + input[0] = [ + [ id:"test_stub" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("test_stub") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_gzi") { + config "./bgzip_compress.config" + when { + process { + """ + input[0] = [ + [ id:"gzi_compress_test" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gzi[0][1]).name + ).match("gzi_compress_test") + } + ) + } + } +} diff --git a/modules/tabix/bgzip/tests/main.nf.test.snap b/modules/tabix/bgzip/tests/main.nf.test.snap new file mode 100644 index 0000000..0748143 --- /dev/null +++ b/modules/tabix/bgzip/tests/main.nf.test.snap @@ -0,0 +1,218 @@ +{ + "gzi_compress_test": { + "content": [ + "gzi_compress_test.vcf.gz.gzi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:29.328146" + }, + "homo_genome_bedgz_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:34.159992362" + }, + "test_stub": { + "content": [ + "test_stub.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:20.811489" + }, + "sarscov2_vcf_bgzip_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:22.087769106" + }, + "sarscov2_vcf_bgzip_compress_gzi": { + "content": [ + { + "0": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "output": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:57.15091665" + }, + "bgzip_test": { + "content": [ + "bgzip_test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:03.768295" + }, + "bedgz_test": { + "content": [ + "bedgz_test.bed" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:12.453855" + }, + "sarscov2_vcf_bgzip_compress_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:45.219404786" + } +} \ No newline at end of file diff --git a/modules/tabix/bgzip/tests/tags.yml b/modules/tabix/bgzip/tests/tags.yml new file mode 100644 index 0000000..de0eec8 --- /dev/null +++ b/modules/tabix/bgzip/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgzip: + - "modules/nf-core/tabix/bgzip/**" diff --git a/modules/tabix/bgzip/tests/vcf_none.config b/modules/tabix/bgzip/tests/vcf_none.config new file mode 100644 index 0000000..f3a3c46 --- /dev/null +++ b/modules/tabix/bgzip/tests/vcf_none.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = '' + } +} diff --git a/nextflow.config b/nextflow.config index af58bc0..212fa03 100644 --- a/nextflow.config +++ b/nextflow.config @@ -45,6 +45,7 @@ params { ont_min_length = 5000 ont_min_q = 10 ont_min_reads = 1000 + medaka_model = "" chewbbaca_schema_date = "--latest" @@ -57,11 +58,12 @@ params { run_name = null skip_report = false - skip_cgmlst = false + skip_cgmlst = true skip_mlst = false skip_amr = false skip_circos = false skip_serotyping = false + skip_variants = false plaintext_email = false skip_multiqc = false diff --git a/subworkflows/variants/main.nf b/subworkflows/variants/main.nf index ed1bd2c..e54b1d0 100644 --- a/subworkflows/variants/main.nf +++ b/subworkflows/variants/main.nf @@ -1,9 +1,12 @@ include { SNIPPY_RUN } from './../../modules/snippy/run' +include { MEDAKA_VARIANT } from './../../modules/medaka/variant' include { TABIX_TABIX } from './../../modules/tabix/tabix' +include { TABIX_BGZIP } from './../../modules/tabix/bgzip' include { BCFTOOLS_STATS } from './../../modules/bcftools/stats' ch_versions = Channel.from([]) multiqc_files = Channel.from([]) +ch_variants = Channel.from([]) /* Call variants against the assembled genome @@ -17,10 +20,30 @@ workflow VARIANTS { main: + reads_with_assembly.branch { m,r,a -> + nanopore: m.platform == "NANOPORE" + illumina: m.platform == "ILLUMINA" + }.set { reads_by_platform } + + // Medaka is a stand-alone workflow for ONT reads to perform alignment and variant calling + MEDAKA_VARIANT( + reads_by_platform.nanopore + ) + ch_versions = ch_versions.mix(MEDAKA_VARIANT.out.versions) + + // Compress the Medaka VCF file + TABIX_BGZIP( + MEDAKA_VARIANT.out.vcf + ) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions) + ch_variants = ch_variants.mix(TABIX_BGZIP.out.output) + + // Snippy is a stand-alone workflow for Illumina reads to perform alignment and variant calling SNIPPY_RUN( - reads_with_assembly + reads_by_platform.illumina ) ch_versions = ch_versions.mix(SNIPPY_RUN.out.versions) + ch_variants = ch_variants.mix(SNIPPY_RUN.out.vcf_gz) TABIX_TABIX( SNIPPY_RUN.out.vcf_gz diff --git a/workflows/gabi.nf b/workflows/gabi.nf index 5dd4562..48431e6 100644 --- a/workflows/gabi.nf +++ b/workflows/gabi.nf @@ -221,12 +221,24 @@ workflow GABI { ch_versions = ch_versions.mix(FLYE.out.versions) ch_assemblies = ch_assemblies.mix(FLYE.out.fasta) + + // Find empty assemblies and stop them + + ch_assemblies.branch { m,f -> + fail: f.countFasta() < 1 + pass: f.countFasta() > 0 + }.set { ch_assemblies_size } + + ch_assemblies_size.fail.subscribe { m, f -> + log.warn "${m.sample_id} - assembly is empty, stopping sample" + } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Tag and optionally remove highly fragmented assemblies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - ch_assemblies.branch { m, f -> + ch_assemblies_size.pass.branch { m, f -> fail: f.countFasta() > params.max_contigs pass: f.countFasta() <= params.max_contigs }.set { ch_assemblies_status } @@ -310,25 +322,27 @@ workflow GABI { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUB: Map Illumina reads to chromosome assembly to check + SUB: Map reads to chromosome assembly to check for polymorphic positions as indication of read or assembly errors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - VARIANTS( - ch_illumina_trimmed.map { m,r -> - tuple(m.sample_id,m,r) - }.join( - ch_assembly_without_plasmids.map { m,a -> - tuple(m.sample_id,a) + if (!params.skip_variants) { + VARIANTS( + ch_illumina_trimmed.mix(ch_ont_trimmed).map { m,r -> + tuple(m.sample_id,m,r) + }.join( + ch_assembly_without_plasmids.map { m,a -> + tuple(m.sample_id,a) + } + ).map { s,m,r,a -> + tuple(m,r,a) } - ).map { s,m,r,a -> - tuple(m,r,a) - } - ) - ch_versions = ch_versions.mix(VARIANTS.out.versions) - multiqc_files = multiqc_files.mix(VARIANTS.out.qc) - ch_report = ch_report.mix(VARIANTS.out.stats) + ) + ch_versions = ch_versions.mix(VARIANTS.out.versions) + multiqc_files = multiqc_files.mix(VARIANTS.out.qc) + ch_report = ch_report.mix(VARIANTS.out.stats) + } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~