Skip to content

Commit

Permalink
Adding bam merging
Browse files Browse the repository at this point in the history
  • Loading branch information
marchoeppner committed Sep 30, 2024
1 parent 98309ab commit 7e637ec
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 7 deletions.
19 changes: 13 additions & 6 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,19 @@ params.version = workflow.manifest.version

summary = [:]

summary["MaxContigs"] = params.skip_failed ? params.max_contigs : "Not applied"
summary["Busco"] = params.busco_lineage
summary["ConfindR DB"] = params.confindr_db ? params.confindr_db : "built-in"
summary["Max Coverage"] = params.subsample_reads ? params.max_coverage : "Not applied"
summary["Genome size"] = params.subsample_reads ? params.genome_size : "Not applied"
summary["Shovill assembler"] = params.shovill_assembler
summary["AMRfinder"] = [:]
summary["Abricate"] = [:]
summary["AMRfinder"]["min_cov"] = params.arg_amrfinderplus_coveragemin
summary["AMRfinder"]["min_id"] = params.arg_amrfinderplus_identmin
summary["Abricate"]["min_id"] = params.arg_abricate_minid
summary["Abricate"]["min_cov"] = params.arg_abricate_mincov

run_name = (params.run_name == false) ? "${workflow.sessionId}" : "${params.run_name}"

WorkflowMain.initialise(workflow, params, log)
Expand Down Expand Up @@ -50,10 +63,6 @@ workflow.onComplete {
emailFields['session'] = workflow.sessionId
emailFields['runName'] = run_name
emailFields['Subsampling'] = params.subsample_reads
if (params.subsample_reads) {
emailFields['Maximum coverage'] = params.max_coverage
emailFields['Genome size'] = params.genome_size
}
emailFields['success'] = workflow.success
emailFields['dateStarted'] = workflow.start
emailFields['dateComplete'] = workflow.complete
Expand Down Expand Up @@ -103,13 +112,11 @@ workflow.onComplete {
if (workflow.success && !params.skip_multiqc) {
mqcReport = multiqc_report.getVal()
if (mqcReport.getClass() == ArrayList) {
// TODO: Update name of pipeline
log.warn "[bio-raum/gabi] Found multiple reports from process 'multiqc', will use only one"
mqcReport = mqcReport[0]
}
}
} catch (all) {
// TODO: Update name of pipeline
log.warn '[bio-raum/gabi] Could not attach MultiQC report to summary email'
}

Expand Down
12 changes: 12 additions & 0 deletions subworkflows/coverage/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ workflow COVERAGE {
)
ch_bam = ch_bam.mix(ALIGN_LONG_READS.out.bam)

bam_mapped = ch_bam.map { meta, bam ->
new_meta = [:]
new_meta.sample_id = meta.sample_id
def groupKey = meta.sample_id
tuple( groupKey, new_meta, bam)
}.groupTuple(by: [0,1]).map { g ,new_meta ,bam -> [ new_meta, bam ] }

bam_mapped.branch {
single: it[1].size() == 1
multiple: it[1].size() > 1
}.set { bam_to_merge }

// Index the BAM files
SAMTOOLS_INDEX(
ch_bam
Expand Down
4 changes: 4 additions & 0 deletions subworkflows/find_references/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ workflow FIND_REFERENCES {
}

// Crude method to get the best hit from the mash list
// Basically we take the top hit as the best and only match
// TODO: Improve this to perhaps look at multiple equally good matches
// and find th least fragmented one - will require touching the actual assemblies and
// counting contigs or similar
def mash_get_best(report) {
gbk = ''
lines = file(report).readLines()
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/taxonomy_profiling/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def extract_taxon(aFile) {
// Kraken2 has a laughable data format, let's try to find the first species-level assignment...
if (elements[3] == 'S' && taxon == 'unknown') {
def fraction = Float.parseFloat(elements[0])
if (fraction >= 40.0) {
if (fraction >= 30.0) {
taxon = elements[5..-1].join(' ').trim()
}
}
Expand Down

0 comments on commit 7e637ec

Please sign in to comment.