Skip to content

Commit

Permalink
Minor fixes to overall workflow to catch failed assemblies
Browse files Browse the repository at this point in the history
  • Loading branch information
marchoeppner committed Jan 28, 2025
1 parent 62e7a34 commit 13fa50b
Show file tree
Hide file tree
Showing 21 changed files with 659 additions and 22 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,8 @@ If you are really impatient, check out our [quickstart](docs/quickstart.md) guid

FooDMe2 is developed and supported by the following people and organizations:

[Marc Höppner](https://github.com/marchoeppner), Landeslabor Schleswig-Holstein, [LSH](https://www.schleswig-holstein.de/DE/landesregierung/ministerien-behoerden/LLABOR)
[Marc Höppner](https://github.com/marchoeppner), Landeslabor Schleswig-Holstein, [LSH](https://www.schleswig-holstein.de/DE/landesregierung/ministerien-behoerden/LLABOR)

## Acknowledgements

We thank the developers of the [AQUAMIS](https://gitlab.com/bfr_bioinformatics/AQUAMIS) pipeline for making some of the building blocks on which GABI is based publically available - specifically the ConfindR database and validation data for <i>Campylobacter spp.</i> as well as the reference intervals for a broad range of bacteria to determine assembly status.
9 changes: 9 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,15 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: MEDAKA_VARIANT {
ext.args = params.medaka_model
publishDir = [
path: { "${params.outdir}/samples/${meta.sample_id}/variants" },
mode: params.publish_dir_mode,
enabled: true,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


}
2 changes: 1 addition & 1 deletion conf/resources.config
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ params {
campylobacter_jejuni = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST"
campylobacter_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST"
campylobacter_lari = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST"
escherichia_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_5_cgMLST"
escherichia_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_5/Escherichia_coli_INNUENDO_wgMLST"
listeria_monocytogenes = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_6/Listeria_monocytogenes_Pasteur_cgMLST"
yersinia_enterocolitica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_7/Yersinia_enterocolitica_INNUENDO_wgMLST"
salmonella_enterica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_8_cgMLST"
Expand Down
6 changes: 3 additions & 3 deletions modules/cat_fastq/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ process CAT_FASTQ {

if (meta.single_end) {
"""
zcat ${reads.join(' ')} | gzip > ${prefix}.merged.fastq.gz
cat ${reads.join(' ')} > ${prefix}.merged.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand All @@ -35,8 +35,8 @@ process CAT_FASTQ {
def read2 = []
readList.eachWithIndex { v, ix -> (ix & 1 ? read2 : read1) << v }
"""
zcat ${read1.join(' ')} | gzip > ${prefix}_1.merged.fastq.gz
zcat ${read2.join(' ')} | gzip > ${prefix}_2.merged.fastq.gz
cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz
cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
7 changes: 7 additions & 0 deletions modules/medaka/variant/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: medaka_variant
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::medaka=2.0.1
37 changes: 37 additions & 0 deletions modules/medaka/variant/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process MEDAKA_VARIANT {
tag "$meta.sample_id"
label 'short_parallel'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/medaka:2.0.1--py38h8774169_0' :
'quay.io/biocontainers/medaka:2.0.1--py38h8774169_0' }"

input:
tuple val(meta), path(reads), path(assembly)

output:
tuple val(meta), path("*annotated.vcf") , emit: vcf
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
medaka_variant \\
-t $task.cpus \\
$args \\
-i $reads \\
-r $assembly \\
-o ./
cat <<-END_VERSIONS > versions.yml
"${task.process}":
medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' )
END_VERSIONS
"""
}
45 changes: 45 additions & 0 deletions modules/medaka/variant/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: medaka
description: A tool to create consensus sequences and variant calls from nanopore sequencing data
keywords:
- assembly
- polishing
- nanopore
tools:
- medaka:
description: Neural network sequence error correction.
homepage: https://nanoporetech.github.io/medaka/index.html
documentation: https://nanoporetech.github.io/medaka/index.html
tool_dev_url: https://github.com/nanoporetech/medaka
licence: ["Mozilla Public License 2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: List of input nanopore fasta/FastQ files
pattern: "*.{fasta,fa,fastq,fastq.gz,fq,fq.gz}"
- assembly:
type: file
description: Genome assembly
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- assembly:
type: file
description: Polished genome assembly
pattern: "*.fa.gz"
authors:
- "@avantonder"
maintainers:
- "@avantonder"
37 changes: 37 additions & 0 deletions modules/medaka/variant/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
nextflow_process {

name "Test Process MEDAKA"
tag "modules_nfcore"
tag "modules"
tag "medaka"
script "../main.nf"
process "MEDAKA"

test("Medaka") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.assembly[0][1]).linesGzip.join()[0..99],
process.out.versions
).match()
}
)
}

}

}
15 changes: 15 additions & 0 deletions modules/medaka/variant/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Medaka": {
"content": [
">MT192765.1GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTC",
[
"versions.yml:md5,739bb00a08faba4029f9f5ab9c15275a"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-14T12:51:51.820749"
}
}
2 changes: 2 additions & 0 deletions modules/medaka/variant/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
medaka:
- modules/nf-core/medaka/**
8 changes: 8 additions & 0 deletions modules/tabix/bgzip/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: tabix_bgzip
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::tabix=1.11
- bioconda::htslib=1.20
41 changes: 41 additions & 0 deletions modules/tabix/bgzip/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
process TABIX_BGZIP {
tag "$meta.sample_id"
label 'short_serial'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' :
'quay.io/biocontainers/htslib:1.20--h5efdd21_2' }"

input:
tuple val(meta), path(input)

output:
tuple val(meta), path("${output}") , emit: output
tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.sample_id}"
in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension())
extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension()
output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz"
command = in_bgzip ? '-d' : ''
// Name the index according to $prefix, unless a name has been requested
if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) {
args = args + " -I ${output}.gzi"
}
"""
bgzip $command -c $args -@${task.cpus} $input > ${output}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
END_VERSIONS
"""

}
52 changes: 52 additions & 0 deletions modules/tabix/bgzip/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: tabix_bgzip
description: Compresses/decompresses files
keywords:
- compress
- decompress
- bgzip
- tabix
tools:
- bgzip:
description: |
Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip.
homepage: https://www.htslib.org/doc/tabix.html
documentation: http://www.htslib.org/doc/bgzip.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: file to compress or to decompress
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- output:
type: file
description: Output compressed/decompressed file
pattern: "*."
- gzi:
type: file
description: Optional gzip index file for compressed inputs
pattern: "*.gzi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"
- "@maxulysse"
- "@nvnieuwk"
maintainers:
- "@joseespinosa"
- "@drpatelh"
- "@maxulysse"
- "@nvnieuwk"
5 changes: 5 additions & 0 deletions modules/tabix/bgzip/tests/bgzip_compress.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: TABIX_BGZIP {
ext.args = ' -i'
}
}
Loading

0 comments on commit 13fa50b

Please sign in to comment.