diff --git a/bin/chewbbaca_filter_schema.py b/bin/chewbbaca_filter_schema.py index 459a7be..58d4060 100755 --- a/bin/chewbbaca_filter_schema.py +++ b/bin/chewbbaca_filter_schema.py @@ -3,7 +3,8 @@ import glob import argparse - +import pathlib +import os parser = argparse.ArgumentParser(description="Script options") parser.add_argument("--list", help="List of valid alleles") @@ -15,13 +16,18 @@ def main(list, schema, output): - alleles = glob.glob("*/*.fasta") - + pathlib.Path(output).mkdir(parents=True, exist_ok=True) + + alleles = glob.glob(schema + "/*/*.fasta") + with open(list) as file: valid = [line.rstrip() for line in file] - - for keep in valid: - + + for allele in alleles: + allele_name = os.path.basename(allele) + # There probably is a pythonier way to do this + if allele_name in valid: + os.system("cp " + allele + " " + output + "/") if __name__ == '__main__': diff --git a/bin/gabi.py b/bin/gabi.py index a8a4d9b..586eff7 100755 --- a/bin/gabi.py +++ b/bin/gabi.py @@ -334,36 +334,36 @@ def main(yaml, template, output, reference, version, call, wd): if "total" in jdata["mosdepth"]: coverage = float(jdata["mosdepth"]["total"]["mean"]) - if coverage >= 40: + if coverage >= 40.0: coverage_status = status["pass"] - elif coverage >= 20: + elif coverage >= 20.0: coverage_status = status["warn"] else: - coverage_status = status["pass"] + coverage_status = status["fail"] if "illumina" in jdata["mosdepth"]: coverage_illumina = float(jdata["mosdepth"]["illumina"]["mean"]) - if coverage_illumina >= 40: + if coverage_illumina >= 40.0: coverage_illumina_status = status["pass"] - elif coverage_illumina >= 20: + elif coverage_illumina >= 20.0: coverage_illumina_status = status["warn"] else: coverage_illumina_status = status["fail"] if "nanopore" in jdata["mosdepth"]: coverage_nanopore = float(jdata["mosdepth"]["nanopore"]["mean"]) - if coverage_nanopore >= 40: + if coverage_nanopore >= 40.0: coverage_nanopore_status = status["pass"] - elif coverage_nanopore >= 20: + elif coverage_nanopore >= 20.0: coverage_nanopore_status = status["warn"] else: coverage_nanopore_status = status["fail"] if "pacbio" in jdata["mosdepth"]: coverage_pacbio = float(jdata["mosdepth"]["pacbio"]["mean"]) - if coverage_pacbio >= 40: + if coverage_pacbio >= 40.0: coverage_pacbio_status = status["pass"] - elif coverage_pacbio >= 20: + elif coverage_pacbio >= 20.0: coverage_pacbio_status = status["warn"] else: coverage_pacbio_status = status["fail"] diff --git a/conf/modules/installation.config b/conf/modules/installation.config index 672fa9d..2dc0c14 100644 --- a/conf/modules/installation.config +++ b/conf/modules/installation.config @@ -24,8 +24,8 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: CHEWBBACA_DOWNLOADSCHEMA { - ext.args = "--date ${params.chewbbaca_schema_date}" + withName: 'CHEWBBACA_DOWNLOADSCHEMA|CHEWBBACA_FILTER_SCHEMA' { + ext.args = "${params.chewbbaca_schema_date}" publishDir = [ path: { "${params.reference_base}/gabi/${params.reference_version}/chewbbaca" }, mode: params.publish_dir_mode, diff --git a/conf/resources.config b/conf/resources.config index 86fb262..d3273bc 100644 --- a/conf/resources.config +++ b/conf/resources.config @@ -187,10 +187,10 @@ params { campylobacter_jejuni = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST" campylobacter_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST" campylobacter_lari = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST" - escherichia_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_5/Escherichia_coli_INNUENDO_wgMLST" + escherichia_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_5_cgMLST" listeria_monocytogenes = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_6/Listeria_monocytogenes_Pasteur_cgMLST" yersinia_enterocolitica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_7/Yersinia_enterocolitica_INNUENDO_wgMLST" - salmonella_enterica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_8/Salmonella_enterica_INNUENDO_cgMLST" + salmonella_enterica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_8_cgMLST" streptococcus_agalactiae = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_9/Streptococcus_agalactiae_wgMLST" brucella_melitensis = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_10/Brucella_melitensis_Bm_cgMLST_95" brucella = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_11/Brucella_Brucella_cgMLST" diff --git a/modules/helper/chewbbaca_filter_schema/environment.yml b/modules/helper/chewbbaca_filter_schema/environment.yml new file mode 100644 index 0000000..9d0df85 --- /dev/null +++ b/modules/helper/chewbbaca_filter_schema/environment.yml @@ -0,0 +1,7 @@ +name: chewbbaca_filter_schema +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.23 diff --git a/modules/helper/chewbbaca_filter_schema/main.nf b/modules/helper/chewbbaca_filter_schema/main.nf new file mode 100644 index 0000000..73ed3f8 --- /dev/null +++ b/modules/helper/chewbbaca_filter_schema/main.nf @@ -0,0 +1,25 @@ +process CHEWBBACA_FILTER_SCHEMA { + label 'short_serial' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.23--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.23--pyhdfd78af_0' }" + + input: + tuple val(meta), path(schema_dir), path(list) + + output: + tuple val(meta), path(results) , emit: schema + + when: + + script: + def args = task.ext.args ?: '' + results = schema_dir.name + "_cgMLST" + + """ + chewbbaca_filter_schema.py --schema $schema_dir --list $list --output $results + + """ +} diff --git a/nextflow.config b/nextflow.config index 6cb5b0f..af58bc0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,7 +46,7 @@ params { ont_min_q = 10 ont_min_reads = 1000 - chewbbaca_schema_date = "2025-01-01T00:00:00" + chewbbaca_schema_date = "--latest" reference_base = null help = false @@ -86,7 +86,7 @@ params { manifest { name = "bio-raum/gabi" - version = "0.9.3" + version = "0.9.4" description = "GABI Pipeline for assembly and profiling of bacterial isolates" author = "Marc Hoeppner" homePage = "https://github.com/bio-raum/gabi" diff --git a/workflows/build_references.nf b/workflows/build_references.nf index 676babc..3a941c4 100644 --- a/workflows/build_references.nf +++ b/workflows/build_references.nf @@ -8,6 +8,7 @@ include { STAGE_FILE as DOWNLOAD_SOURMASH_DB } from './../modules/h include { STAGE_FILE as DOWNLOAD_SOURMASH_NR_DB } from './../modules/helper/stage_file' include { GUNZIP as GUNZIP_GENOME } from './../modules/gunzip' include { BIOBLOOM_MAKER } from './../modules/biobloom/maker' +include { CHEWBBACA_FILTER_SCHEMA } from './../modules/helper/chewbbaca_filter_schema' kraken_db_url = Channel.fromPath(params.references['kraken2'].url) confindr_db_url = Channel.fromPath(params.references['confindr'].url) @@ -30,9 +31,9 @@ chewie_ids = Channel.fromList([ [ [ taxon: "Brucella melitensis" ], 10], [ [ taxon: "Brucella" ], 11], [ [ taxon: "Clostridium perfringens" ], 12], - [ [ taxon: "Clostridium chauvoei" ], 13] + [ [ taxon: "Clostridium chauvoei" ], 13], [ [ taxon: "Bacillus anthracis" ], 14], - [ [ taxon: "Klebsiella oxytoca" ], 15] + [ [ taxon: "Klebsiella oxytoca" ], 15], [ [ taxon: "Clostridium neonatale" ], 16], [ [ taxon: "Shewanella" ], 17], [ [ taxon: "Neisseria meningitidis" ], 18] @@ -97,6 +98,24 @@ workflow BUILD_REFERENCES { CHEWBBACA_DOWNLOADSCHEMA( chewie_ids ) + + // See if any schema has a filter list configured + CHEWBBACA_DOWNLOADSCHEMA.out.schema.map { m, s -> + def taxon = m.taxon.toLowerCase().replaceAll(/ /, "_") + def ffile = null + if (params.chewbbaca_filters[taxon]) { + ffile = params.chewbbaca_filters[taxon] + } + tuple(m, s, ffile) + }.branch { m, s, ffile -> + fail: ffile == null + pass: ffile + }.set { chewie_schema_with_filter } + + // Filter that schema using the filter list + CHEWBBACA_FILTER_SCHEMA( + chewie_schema_with_filter.pass + ) } if (params.build_references) { diff --git a/workflows/gabi.nf b/workflows/gabi.nf index 5911e8a..5dd4562 100644 --- a/workflows/gabi.nf +++ b/workflows/gabi.nf @@ -377,7 +377,7 @@ workflow GABI { */ if (!params.skip_serotyping) { SEROTYPING( - ch_assemblies_without_plasmids_with_taxa + ch_assemblies_clean_with_taxa ) ch_versions = ch_versions.mix(SEROTYPING.out.versions) ch_report = ch_report.mix(SEROTYPING.out.reports) @@ -390,7 +390,7 @@ workflow GABI { */ if (!params.skip_mlst) { MLST_TYPING( - ch_assemblies_without_plasmids_with_taxa + ch_assemblies_clean_with_taxa ) ch_mlst = MLST_TYPING.out.report ch_versions = ch_versions.mix(MLST_TYPING.out.versions) @@ -405,7 +405,7 @@ workflow GABI { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ANNOTATE( - ch_assemblies_without_plasmids_with_taxa, + ch_assemblies_clean_with_taxa, ch_prokka_proteins, ch_prokka_prodigal )