Skip to content

Commit

Permalink
Updating chewbbaca schema installation to filter for ESFA cgMLST reco…
Browse files Browse the repository at this point in the history
…mmendations rather than full wgMLST
  • Loading branch information
marchoeppner committed Jan 24, 2025
1 parent e208fa2 commit 62e7a34
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 26 deletions.
18 changes: 12 additions & 6 deletions bin/chewbbaca_filter_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

import glob
import argparse

import pathlib
import os

parser = argparse.ArgumentParser(description="Script options")
parser.add_argument("--list", help="List of valid alleles")
Expand All @@ -15,13 +16,18 @@

def main(list, schema, output):

alleles = glob.glob("*/*.fasta")

pathlib.Path(output).mkdir(parents=True, exist_ok=True)

alleles = glob.glob(schema + "/*/*.fasta")

with open(list) as file:
valid = [line.rstrip() for line in file]

for keep in valid:


for allele in alleles:
allele_name = os.path.basename(allele)
# There probably is a pythonier way to do this
if allele_name in valid:
os.system("cp " + allele + " " + output + "/")


if __name__ == '__main__':
Expand Down
18 changes: 9 additions & 9 deletions bin/gabi.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,36 +334,36 @@ def main(yaml, template, output, reference, version, call, wd):

if "total" in jdata["mosdepth"]:
coverage = float(jdata["mosdepth"]["total"]["mean"])
if coverage >= 40:
if coverage >= 40.0:
coverage_status = status["pass"]
elif coverage >= 20:
elif coverage >= 20.0:
coverage_status = status["warn"]
else:
coverage_status = status["pass"]
coverage_status = status["fail"]

if "illumina" in jdata["mosdepth"]:
coverage_illumina = float(jdata["mosdepth"]["illumina"]["mean"])
if coverage_illumina >= 40:
if coverage_illumina >= 40.0:
coverage_illumina_status = status["pass"]
elif coverage_illumina >= 20:
elif coverage_illumina >= 20.0:
coverage_illumina_status = status["warn"]
else:
coverage_illumina_status = status["fail"]

if "nanopore" in jdata["mosdepth"]:
coverage_nanopore = float(jdata["mosdepth"]["nanopore"]["mean"])
if coverage_nanopore >= 40:
if coverage_nanopore >= 40.0:
coverage_nanopore_status = status["pass"]
elif coverage_nanopore >= 20:
elif coverage_nanopore >= 20.0:
coverage_nanopore_status = status["warn"]
else:
coverage_nanopore_status = status["fail"]

if "pacbio" in jdata["mosdepth"]:
coverage_pacbio = float(jdata["mosdepth"]["pacbio"]["mean"])
if coverage_pacbio >= 40:
if coverage_pacbio >= 40.0:
coverage_pacbio_status = status["pass"]
elif coverage_pacbio >= 20:
elif coverage_pacbio >= 20.0:
coverage_pacbio_status = status["warn"]
else:
coverage_pacbio_status = status["fail"]
Expand Down
4 changes: 2 additions & 2 deletions conf/modules/installation.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: CHEWBBACA_DOWNLOADSCHEMA {
ext.args = "--date ${params.chewbbaca_schema_date}"
withName: 'CHEWBBACA_DOWNLOADSCHEMA|CHEWBBACA_FILTER_SCHEMA' {
ext.args = "${params.chewbbaca_schema_date}"
publishDir = [
path: { "${params.reference_base}/gabi/${params.reference_version}/chewbbaca" },
mode: params.publish_dir_mode,
Expand Down
4 changes: 2 additions & 2 deletions conf/resources.config
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,10 @@ params {
campylobacter_jejuni = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST"
campylobacter_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST"
campylobacter_lari = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_4/Campylobacter_jejuni_INNUENDO_wgMLST"
escherichia_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_5/Escherichia_coli_INNUENDO_wgMLST"
escherichia_coli = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_5_cgMLST"
listeria_monocytogenes = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_6/Listeria_monocytogenes_Pasteur_cgMLST"
yersinia_enterocolitica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_7/Yersinia_enterocolitica_INNUENDO_wgMLST"
salmonella_enterica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_8/Salmonella_enterica_INNUENDO_cgMLST"
salmonella_enterica = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_8_cgMLST"
streptococcus_agalactiae = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_9/Streptococcus_agalactiae_wgMLST"
brucella_melitensis = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_10/Brucella_melitensis_Bm_cgMLST_95"
brucella = "${params.reference_base}/gabi/${params.reference_version}/chewbbaca/schema_11/Brucella_Brucella_cgMLST"
Expand Down
7 changes: 7 additions & 0 deletions modules/helper/chewbbaca_filter_schema/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: chewbbaca_filter_schema
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::multiqc=1.23
25 changes: 25 additions & 0 deletions modules/helper/chewbbaca_filter_schema/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process CHEWBBACA_FILTER_SCHEMA {
label 'short_serial'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/multiqc:1.23--pyhdfd78af_0' :
'quay.io/biocontainers/multiqc:1.23--pyhdfd78af_0' }"

input:
tuple val(meta), path(schema_dir), path(list)

output:
tuple val(meta), path(results) , emit: schema

when:

script:
def args = task.ext.args ?: ''
results = schema_dir.name + "_cgMLST"

"""
chewbbaca_filter_schema.py --schema $schema_dir --list $list --output $results
"""
}
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ params {
ont_min_q = 10
ont_min_reads = 1000

chewbbaca_schema_date = "2025-01-01T00:00:00"
chewbbaca_schema_date = "--latest"

reference_base = null
help = false
Expand Down Expand Up @@ -86,7 +86,7 @@ params {

manifest {
name = "bio-raum/gabi"
version = "0.9.3"
version = "0.9.4"
description = "GABI Pipeline for assembly and profiling of bacterial isolates"
author = "Marc Hoeppner"
homePage = "https://github.com/bio-raum/gabi"
Expand Down
23 changes: 21 additions & 2 deletions workflows/build_references.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include { STAGE_FILE as DOWNLOAD_SOURMASH_DB } from './../modules/h
include { STAGE_FILE as DOWNLOAD_SOURMASH_NR_DB } from './../modules/helper/stage_file'
include { GUNZIP as GUNZIP_GENOME } from './../modules/gunzip'
include { BIOBLOOM_MAKER } from './../modules/biobloom/maker'
include { CHEWBBACA_FILTER_SCHEMA } from './../modules/helper/chewbbaca_filter_schema'

kraken_db_url = Channel.fromPath(params.references['kraken2'].url)
confindr_db_url = Channel.fromPath(params.references['confindr'].url)
Expand All @@ -30,9 +31,9 @@ chewie_ids = Channel.fromList([
[ [ taxon: "Brucella melitensis" ], 10],
[ [ taxon: "Brucella" ], 11],
[ [ taxon: "Clostridium perfringens" ], 12],
[ [ taxon: "Clostridium chauvoei" ], 13]
[ [ taxon: "Clostridium chauvoei" ], 13],
[ [ taxon: "Bacillus anthracis" ], 14],
[ [ taxon: "Klebsiella oxytoca" ], 15]
[ [ taxon: "Klebsiella oxytoca" ], 15],
[ [ taxon: "Clostridium neonatale" ], 16],
[ [ taxon: "Shewanella" ], 17],
[ [ taxon: "Neisseria meningitidis" ], 18]
Expand Down Expand Up @@ -97,6 +98,24 @@ workflow BUILD_REFERENCES {
CHEWBBACA_DOWNLOADSCHEMA(
chewie_ids
)

// See if any schema has a filter list configured
CHEWBBACA_DOWNLOADSCHEMA.out.schema.map { m, s ->
def taxon = m.taxon.toLowerCase().replaceAll(/ /, "_")
def ffile = null
if (params.chewbbaca_filters[taxon]) {
ffile = params.chewbbaca_filters[taxon]
}
tuple(m, s, ffile)
}.branch { m, s, ffile ->
fail: ffile == null
pass: ffile
}.set { chewie_schema_with_filter }

// Filter that schema using the filter list
CHEWBBACA_FILTER_SCHEMA(
chewie_schema_with_filter.pass
)
}

if (params.build_references) {
Expand Down
6 changes: 3 additions & 3 deletions workflows/gabi.nf
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ workflow GABI {
*/
if (!params.skip_serotyping) {
SEROTYPING(
ch_assemblies_without_plasmids_with_taxa
ch_assemblies_clean_with_taxa
)
ch_versions = ch_versions.mix(SEROTYPING.out.versions)
ch_report = ch_report.mix(SEROTYPING.out.reports)
Expand All @@ -390,7 +390,7 @@ workflow GABI {
*/
if (!params.skip_mlst) {
MLST_TYPING(
ch_assemblies_without_plasmids_with_taxa
ch_assemblies_clean_with_taxa
)
ch_mlst = MLST_TYPING.out.report
ch_versions = ch_versions.mix(MLST_TYPING.out.versions)
Expand All @@ -405,7 +405,7 @@ workflow GABI {
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
ANNOTATE(
ch_assemblies_without_plasmids_with_taxa,
ch_assemblies_clean_with_taxa,
ch_prokka_proteins,
ch_prokka_prodigal
)
Expand Down

0 comments on commit 62e7a34

Please sign in to comment.