Skip to content

Commit

Permalink
Bin QC workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
dialvarezs committed Oct 31, 2024
1 parent 8b4fcc7 commit 76dac5c
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 103 deletions.
122 changes: 122 additions & 0 deletions subworkflows/local/bin_qc.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* BUSCO/CheckM/CheckM2: Quantitative measures for the assessment of genome assembly
*/

include { BUSCO_DB_PREPARATION } from '../../modules/local/busco_db_preparation'
include { BUSCO } from '../../modules/local/busco'
include { BUSCO_SAVE_DOWNLOAD } from '../../modules/local/busco_save_download'
include { BUSCO_SUMMARY } from '../../modules/local/busco_summary'
include { CHECKM_QA } from '../../modules/nf-core/checkm/qa/main'
include { CHECKM_LINEAGEWF } from '../../modules/nf-core/checkm/lineagewf/main'
include { CHECKM2_PREDICT } from '../../modules/nf-core/checkm2/predict/main'
include { COMBINE_TSV } from '../../modules/local/combine_tsv'

workflow BIN_QC {
take:
bins // channel: [ val(meta), path(bin) ]
checkm_db
checkm2_db
busco_db

main:
ch_versions = Channel.empty()

if (params.binqc_tool == "busco") {
// BUSCO workflow
if (!busco_db.isEmpty()) {
if (busco_db.extension in ['gz', 'tgz']) {
// Expects to be tar.gz!
BUSCO_DB_PREPARATION(busco_db)
ch_db_for_busco = BUSCO_DB_PREPARATION.out.db.map { meta, db ->
[[id: meta, lineage: 'Y'], db]
}
}
else if (busco_db.isDirectory()) {
// Set meta to match expected channel cardinality for BUSCO
ch_db_for_busco = Channel
.of(busco_db)
.map { db ->
def basename = db.getBaseName()
def lineage = basename.contains('odb10') ? 'Y' : 'N'
[[id: basename, lineage: lineage], db]
}
.collect()
}
}
else {
// Set BUSCO database to empty to allow for --auto-lineage
ch_db_for_busco = Channel
.of([])
.map { empty_db -> [[lineage: ''], []] }
.collect()
}

if (params.save_busco_db) {
// publish files downloaded by Busco
ch_downloads = BUSCO.out.busco_downloads
.groupTuple()
.map { lin, downloads -> downloads[0] }
.toSortedList()
.flatten()
BUSCO_SAVE_DOWNLOAD(ch_downloads)
}

BUSCO(bins, ch_db_for_busco)

// busco_summary_domain = BUSCO.out.summary_domain.collect()
// busco_summary_specific = BUSCO.out.summary_specific.collect()
// busco_failed_bin = BUSCO.out.failed_bin.collect()

BUSCO_SUMMARY(
BUSCO.out.summary_domain.map { it[1] }.collect().ifEmpty([]),
BUSCO.out.summary_specific.map { it[1] }.collect().ifEmpty([]),
BUSCO.out.failed_bin.map { it[1] }.collect().ifEmpty([])
)

multiqc_reports = BUSCO.out.summary_domain.mix(BUSCO.out.summary_specific).map{ it[1] }
summary = BUSCO_SUMMARY.out.summary
ch_versions = ch_versions.mix(BUSCO.out.versions.first())
}
else if (params.binqc_tool == "checkm") {
// CheckM workflow
ch_bins_for_checkmlineagewf = bins
.filter { meta, bin ->
meta.domain != "eukarya"
}
.multiMap { meta, fa ->
reads: [meta, fa]
ext: fa.extension.unique().join("")
}

CHECKM_LINEAGEWF(ch_bins_for_checkmlineagewf.reads, ch_bins_for_checkmlineagewf.ext, checkm_db)
ch_versions = ch_versions.mix(CHECKM_LINEAGEWF.out.versions.first())

ch_checkmqa_input = CHECKM_LINEAGEWF.out.checkm_output
.join(CHECKM_LINEAGEWF.out.marker_file)
.map { meta, dir, marker ->
[meta, dir, marker, []]
}

CHECKM_QA(ch_checkmqa_input, [])

COMBINE_TSV(CHECKM_QA.out.output.map { it[1] }.collect())

summary = COMBINE_TSV.out.combined
ch_versions = ch_versions.mix(CHECKM_QA.out.versions.first())
}
else if (params.binqc_tool == "checkm2") {
// CheckM2 workflow
CHECKM2_PREDICT(bins, checkm2_db)

COMBINE_TSV(CHECKM2_PREDICT.out.checkm2_tsv.map { it[1] }.collect())

summary = COMBINE_TSV.out.combined
ch_versions = ch_versions.mix(CHECKM2_PREDICT.out.versions.first())
}

emit:
summary = summary
checkm_tsv = params.binqc_tool == "checkm" ? CHECKM_QA.out.output : []
multiqc = params.binqc_tool == "busco" ? multiqc_reports : []
versions = ch_versions
}
54 changes: 0 additions & 54 deletions subworkflows/local/checkm_qc.nf

This file was deleted.

64 changes: 15 additions & 49 deletions workflows/mag.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@ include { methodsDescriptionText } from '../subwo
//
include { BINNING_PREPARATION } from '../subworkflows/local/binning_preparation'
include { BINNING } from '../subworkflows/local/binning'
include { BIN_QC } from '../subworkflows/local/bin_qc'
include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement'
include { BUSCO_QC } from '../subworkflows/local/busco_qc'
include { VIRUS_IDENTIFICATION } from '../subworkflows/local/virus_identification'
include { CHECKM_QC } from '../subworkflows/local/checkm_qc'
include { GUNC_QC } from '../subworkflows/local/gunc_qc'
include { GTDBTK } from '../subworkflows/local/gtdbtk'
include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna'
Expand Down Expand Up @@ -185,11 +184,7 @@ workflow MAG {
ch_metaeuk_db = Channel.empty()
}

// Additional info for completion email and summary
def busco_failed_bins = [:]

// Get checkM database if not supplied

if (!params.skip_binqc && params.binqc_tool == 'checkm' && !params.checkm_db) {
ARIA2_UNTAR(params.checkm_download_url)
ch_checkm_db = ARIA2_UNTAR.out.downloaded_file
Expand Down Expand Up @@ -797,55 +792,26 @@ workflow MAG {

ch_input_bins_for_qc = ch_input_for_postbinning_bins_unbins.transpose()

if (!params.skip_binqc && params.binqc_tool == 'busco') {
/*
* BUSCO subworkflow: Quantitative measures for the assessment of genome assembly
*/

BUSCO_QC(
ch_busco_db,
ch_input_bins_for_qc
)
ch_busco_summary = BUSCO_QC.out.summary
ch_versions = ch_versions.mix(BUSCO_QC.out.versions.first())
// process information if BUSCO analysis failed for individual bins due to no matching genes
BUSCO_QC.out.failed_bin
.splitCsv(sep: '\t')
.map { bin, error ->
if (!bin.contains(".unbinned.")) {
busco_failed_bins[bin] = error
}
}
}
BIN_QC(
ch_input_bins_for_qc,
ch_checkm_db,
ch_checkm2_db,
ch_busco_db
)

if (!params.skip_binqc && params.binqc_tool in ['checkm', 'checkm2']) {
/*
* CheckM/CheckM2 subworkflow: Quantitative measures for the assessment of genome assembly
*/
ch_versions = ch_versions.mix(BIN_QC.out.versions)

ch_input_bins_for_checkm = ch_input_bins_for_qc.filter { meta, bins ->
if (params.run_gunc) {
ch_input_bins_for_gunc = ch_input_for_postbinning_bins_unbins.filter { meta, bins ->
meta.domain != "eukarya"
}

CHECKM_QC(
ch_input_bins_for_checkm.groupTuple(),
ch_checkm_db,
ch_checkm2_db
GUNC_QC(
ch_input_bins_for_gunc,
ch_gunc_db,
params.binqc_tool == 'checkm' ? BIN_QC.out.checkm_tsv : []
)
ch_checkm_summary = CHECKM_QC.out.summary

ch_versions = ch_versions.mix(CHECKM_QC.out.versions)
}

if (params.run_gunc && params.binqc_tool == 'checkm') {
GUNC_QC(ch_input_bins_for_checkm, ch_gunc_db, CHECKM_QC.out.checkm_tsv)
ch_versions = ch_versions.mix(GUNC_QC.out.versions)
}
else if (params.run_gunc) {
ch_input_bins_for_gunc = ch_input_for_postbinning_bins_unbins.filter { meta, bins ->
meta.domain != "eukarya"
}
GUNC_QC(ch_input_bins_for_qc, ch_gunc_db, [])
ch_versions = ch_versions.mix(GUNC_QC.out.versions)
}

Expand Down Expand Up @@ -1075,7 +1041,7 @@ workflow MAG {
}

if (!params.skip_binning && !params.skip_binqc && params.binqc_tool == 'busco') {
ch_multiqc_files = ch_multiqc_files.mix(BUSCO_QC.out.multiqc.collect().ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(BIN_QC.out.multiqc.collect().ifEmpty([]))
}


Expand Down

0 comments on commit 76dac5c

Please sign in to comment.