diff --git a/main.nf b/main.nf index d314047..bcab24f 100644 --- a/main.nf +++ b/main.nf @@ -19,6 +19,19 @@ params.version = workflow.manifest.version summary = [:] +summary["MaxContigs"] = params.skip_failed ? params.max_contigs : "Not applied" +summary["Busco"] = params.busco_lineage +summary["ConfindR DB"] = params.confindr_db ? params.confindr_db : "built-in" +summary["Max Coverage"] = params.subsample_reads ? params.max_coverage : "Not applied" +summary["Genome size"] = params.subsample_reads ? params.genome_size : "Not applied" +summary["Shovill assembler"] = params.shovill_assembler +summary["AMRfinder"] = [:] +summary["Abricate"] = [:] +summary["AMRfinder"]["min_cov"] = params.arg_amrfinderplus_coveragemin +summary["AMRfinder"]["min_id"] = params.arg_amrfinderplus_identmin +summary["Abricate"]["min_id"] = params.arg_abricate_minid +summary["Abricate"]["min_cov"] = params.arg_abricate_mincov + run_name = (params.run_name == false) ? "${workflow.sessionId}" : "${params.run_name}" WorkflowMain.initialise(workflow, params, log) @@ -50,10 +63,6 @@ workflow.onComplete { emailFields['session'] = workflow.sessionId emailFields['runName'] = run_name emailFields['Subsampling'] = params.subsample_reads - if (params.subsample_reads) { - emailFields['Maximum coverage'] = params.max_coverage - emailFields['Genome size'] = params.genome_size - } emailFields['success'] = workflow.success emailFields['dateStarted'] = workflow.start emailFields['dateComplete'] = workflow.complete @@ -103,13 +112,11 @@ workflow.onComplete { if (workflow.success && !params.skip_multiqc) { mqcReport = multiqc_report.getVal() if (mqcReport.getClass() == ArrayList) { - // TODO: Update name of pipeline log.warn "[bio-raum/gabi] Found multiple reports from process 'multiqc', will use only one" mqcReport = mqcReport[0] } } } catch (all) { - // TODO: Update name of pipeline log.warn '[bio-raum/gabi] Could not attach MultiQC report to summary email' } diff --git a/subworkflows/coverage/main.nf b/subworkflows/coverage/main.nf index 7fa506c..e88efae 100644 --- a/subworkflows/coverage/main.nf +++ b/subworkflows/coverage/main.nf @@ -56,6 +56,18 @@ workflow COVERAGE { ) ch_bam = ch_bam.mix(ALIGN_LONG_READS.out.bam) + bam_mapped = ch_bam.map { meta, bam -> + new_meta = [:] + new_meta.sample_id = meta.sample_id + def groupKey = meta.sample_id + tuple( groupKey, new_meta, bam) + }.groupTuple(by: [0,1]).map { g ,new_meta ,bam -> [ new_meta, bam ] } + + bam_mapped.branch { + single: it[1].size() == 1 + multiple: it[1].size() > 1 + }.set { bam_to_merge } + // Index the BAM files SAMTOOLS_INDEX( ch_bam diff --git a/subworkflows/find_references/main.nf b/subworkflows/find_references/main.nf index baa2cc8..5798c60 100644 --- a/subworkflows/find_references/main.nf +++ b/subworkflows/find_references/main.nf @@ -67,6 +67,10 @@ workflow FIND_REFERENCES { } // Crude method to get the best hit from the mash list +// Basically we take the top hit as the best and only match +// TODO: Improve this to perhaps look at multiple equally good matches +// and find th least fragmented one - will require touching the actual assemblies and +// counting contigs or similar def mash_get_best(report) { gbk = '' lines = file(report).readLines() diff --git a/subworkflows/taxonomy_profiling/main.nf b/subworkflows/taxonomy_profiling/main.nf index 3384555..397e5e1 100644 --- a/subworkflows/taxonomy_profiling/main.nf +++ b/subworkflows/taxonomy_profiling/main.nf @@ -48,7 +48,7 @@ def extract_taxon(aFile) { // Kraken2 has a laughable data format, let's try to find the first species-level assignment... if (elements[3] == 'S' && taxon == 'unknown') { def fraction = Float.parseFloat(elements[0]) - if (fraction >= 40.0) { + if (fraction >= 30.0) { taxon = elements[5..-1].join(' ').trim() } }