Skip to content

Commit

Permalink
Merge pull request #708 from dialvarezs/dev-exclude-unbinned
Browse files Browse the repository at this point in the history
Add parameter to exclude unbinned data from post-binning steps
  • Loading branch information
jfy133 authored Oct 30, 2024
2 parents f0ca539 + 1a00c14 commit cd5ebae
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 9 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- [#708](https://github.com/nf-core/mag/pull/708) - Added `--exclude_unbins_from_postbinning` parameter to exclude unbinned contigs from post-binning processes, speeding up Prokka in some cases (added by @dialvarezs)

### `Changed`

### `Fixed`

- [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs)

### `Dependencies`

### `Deprecated`
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ params {
refine_bins_dastool = false
refine_bins_dastool_threshold = 0.5
postbinning_input = 'raw_bins_only'
exclude_unbins_from_postbinning = false

// Bin QC
skip_binqc = false
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,12 @@
"type": "integer",
"default": 3000,
"description": "Minimum contig length for Tiara to use for domain classification. For accurate classification, should be longer than 3000 bp."
},
"exclude_unbins_from_postbinning": {
"type": "boolean",
"description": "Exclude unbinned contigs in the post-binning steps (bin QC, taxonomic classification, and annotation steps).",
"help": "If you're not interested in assemby results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.",
"default": false
}
}
},
Expand Down
22 changes: 13 additions & 9 deletions workflows/mag.nf
Original file line number Diff line number Diff line change
Expand Up @@ -769,15 +769,19 @@ workflow MAG {
ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins)
}

DEPTHS(ch_input_for_postbinning_bins_unbins, BINNING.out.metabat2depths, ch_short_reads)
ch_input_for_postbinning = params.exclude_unbins_from_postbinning
? ch_input_for_postbinning_bins
: ch_input_for_postbinning_bins_unbins

DEPTHS(ch_input_for_postbinning, BINNING.out.metabat2depths, ch_short_reads)
ch_input_for_binsummary = DEPTHS.out.depths_summary
ch_versions = ch_versions.mix(DEPTHS.out.versions)

/*
* Bin QC subworkflows: for checking bin completeness with either BUSCO, CHECKM, and/or GUNC
*/

ch_input_bins_for_qc = ch_input_for_postbinning_bins_unbins.transpose()
ch_input_bins_for_qc = ch_input_for_postbinning.transpose()

if (!params.skip_binqc && params.binqc_tool == 'busco') {
/*
Expand Down Expand Up @@ -821,16 +825,16 @@ workflow MAG {
ch_versions = ch_versions.mix(GUNC_QC.out.versions)
}
else if (params.run_gunc) {
ch_input_bins_for_gunc = ch_input_for_postbinning_bins_unbins.filter { meta, bins ->
ch_input_bins_for_gunc = ch_input_for_postbinning.filter { meta, bins ->
meta.domain != "eukarya"
}
GUNC_QC(ch_input_bins_for_qc, ch_gunc_db, [])
GUNC_QC(ch_input_bins_for_gunc, ch_gunc_db, [])
ch_versions = ch_versions.mix(GUNC_QC.out.versions)
}

ch_quast_bins_summary = Channel.empty()
if (!params.skip_quast) {
ch_input_for_quast_bins = ch_input_for_postbinning_bins_unbins
ch_input_for_quast_bins = ch_input_for_postbinning
.groupTuple()
.map { meta, bins ->
def new_bins = bins.flatten()
Expand Down Expand Up @@ -859,7 +863,7 @@ workflow MAG {
ch_cat_db = CAT_DB_GENERATE.out.db
}
CAT(
ch_input_for_postbinning_bins_unbins,
ch_input_for_postbinning,
ch_cat_db
)
// Group all classification results for each sample in a single file
Expand Down Expand Up @@ -890,7 +894,7 @@ workflow MAG {
ch_gtdbtk_summary = Channel.empty()
if (gtdb) {

ch_gtdb_bins = ch_input_for_postbinning_bins_unbins.filter { meta, bins ->
ch_gtdb_bins = ch_input_for_postbinning.filter { meta, bins ->
meta.domain != "eukarya"
}

Expand Down Expand Up @@ -925,7 +929,7 @@ workflow MAG {
*/

if (!params.skip_prokka) {
ch_bins_for_prokka = ch_input_for_postbinning_bins_unbins
ch_bins_for_prokka = ch_input_for_postbinning
.transpose()
.map { meta, bin ->
def meta_new = meta + [id: bin.getBaseName()]
Expand All @@ -944,7 +948,7 @@ workflow MAG {
}

if (!params.skip_metaeuk && (params.metaeuk_db || params.metaeuk_mmseqs_db)) {
ch_bins_for_metaeuk = ch_input_for_postbinning_bins_unbins
ch_bins_for_metaeuk = ch_input_for_postbinning
.transpose()
.filter { meta, bin ->
meta.domain in ["eukarya", "unclassified"]
Expand Down

0 comments on commit cd5ebae

Please sign in to comment.