From 646c6bffd6d222606a640871becb2a7ec96b3353 Mon Sep 17 00:00:00 2001 From: Muneeb Nasir Date: Fri, 22 Nov 2024 12:33:51 +0000 Subject: [PATCH 1/3] Workflow dev | bwa and samtools --- workflows/metabolt.nf | 66 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/workflows/metabolt.nf b/workflows/metabolt.nf index eda0689..f75ce39 100644 --- a/workflows/metabolt.nf +++ b/workflows/metabolt.nf @@ -16,6 +16,11 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_meta include { FASTQC } from '../modules/nf-core/fastqc/main' include { FASTP } from '../modules/nf-core/fastp/main' include { MEGAHIT } from '../modules/nf-core/megahit/main' +include { BWA_INDEX } from '../modules/nf-core/bwa/index/main' +include { BWA_MEM } from '../modules/nf-core/bwa/mem/main' +include { SAMTOOLS_SORT } from '../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -61,13 +66,10 @@ workflow METABOLT { /* ================================================================================ - Assembly and Mapping + Assembly ================================================================================ */ - // - // MODULE: Run MEGAHIT for assembly - // // Prepare FASTP output for MEGAHIT input ch_megahit_input = FASTP.out.reads.map { meta, reads -> def reads1 = meta.single_end ? reads : reads[0] @@ -75,6 +77,9 @@ workflow METABOLT { [meta, reads1, reads2] } + // + // MODULE: Run MEGAHIT for assembly + // MEGAHIT ( ch_megahit_input ) @@ -85,6 +90,59 @@ workflow METABOLT { [meta + [assembler: 'megahit'], contigs] } + /* + ================================================================================ + Mapping and Alignment + ================================================================================ + */ + + // + // MODULE: Run BWA_INDEX on assembled contigs for indexing + // + BWA_INDEX(ch_assemblies) + + // Prepare input for BWA_MEM + ch_bwa_mem_input = FASTP.out.reads.combine(BWA_INDEX.out.index, by: 0) + .combine(ch_assemblies) + .map { meta, reads, index, assembly_meta, assembly -> + [meta, reads, index, assembly] + } + // Collect version information + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + + // + // MODULE: Run BWA_MEM for alignment of trimmed reads to indexed contigs + // + BWA_MEM( + ch_bwa_mem_input, + BWA_INDEX.out.index, + ch_assemblies, + true + ) + // Collect version information + ch_versions = ch_versions.mix(BWA_MEM.out.versions) + + // Prepare input for SAMTOOLS_SORT + ch_bam_for_sort = BWA_MEM.out.bam + ch_fasta_for_sort = ch_assemblies.map { meta, assembly -> [[id: "assembly"], assembly] } + + // SAMTOOLS_SORT + SAMTOOLS_SORT( + ch_bam_for_sort, + ch_fasta_for_sort + ) + + // Collect version information + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + // SAMTOOLS_INDEX + SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) + // Collect version information + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + + // Output channels for downstream use + ch_sorted_bam = SAMTOOLS_SORT.out.bam + ch_bam_index = SAMTOOLS_INDEX.out.bai.mix(SAMTOOLS_INDEX.out.csi) // // Collate and save software versions From 6bceffdd62d5b3f5d2b348c30465e36028510c79 Mon Sep 17 00:00:00 2001 From: Muneeb Nasir Date: Sun, 24 Nov 2024 19:12:42 +0000 Subject: [PATCH 2/3] modules.config: Configured the publish dirs of modules & their ext.args --- conf/modules.config | 109 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d266a38..cb0bbf6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -19,15 +19,114 @@ process { ] withName: FASTQC { - ext.args = '--quiet' + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/fastqc" }, + mode: params.publish_dir_mode, + pattern: "*.html" + ] + ext.prefix = { "${meta.id}_raw" } + tag = { "${meta.id}_raw" } } - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + + withName: FASTP { + ext.args = [ + "-q ${params.fastp_qualified_quality}", + "--cut_front", + "--cut_tail", + "--cut_mean_quality ${params.fastp_cut_mean_quality}", + "--length_required ${params.reads_minlength}" + ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/multiqc" }, + [ + path: { "${params.outdir}/fastp/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{html,json}" + ], + [ + path: { "${params.outdir}/fastp/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + ] + ] + ext.prefix = { "${meta.id}_trimmed" } + tag = { "${meta.id}_trim" } + } + + withName: MEGAHIT { + ext.args = [ + "--k-list ${params.megahit_kmer_list}", + "--min-contig-len ${params.megahit_min_contig_len}" + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/Assembly" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + filename.equals('versions.yml') + ? null + : filename.indexOf('.contigs.fa.gz') > 0 + ? filename + : filename.indexOf('.log') > 0 ? filename : null + } + ] + ext.prefix = { "${meta.id}_assembled" } + tag = { "${meta.id}_assembly" } + } + + withName: BWA_INDEX { + publishDir = [ + path: { "${params.outdir}/bwa/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) return null + return filename.startsWith('bwa/') ? filename.substring(4) : filename + } + ] + ext.prefix = { "${meta.id}_index" } + tag = { "${meta.id}_index" } + } + + withName: BWA_MEM { + publishDir = [ + path: { "${params.outdir}/bwa/aligned" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename + } ] + ext.prefix = { "${meta.id}_aligned" } + tag = { "${meta.id}_align" } } + withName: SAMTOOLS_SORT { + publishDir = [ + path: { "${params.outdir}/samtools/sorted" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename + } + ] + ext.prefix = { "${meta.id}_sorted" } + tag = { "${meta.id}_sort" } + } + + withName: SAMTOOLS_INDEX { + publishDir = [ + path: { "${params.outdir}/samtools/indexed" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename + } + ] + ext.prefix = { "${meta.id}_indexed" } + tag = { "${meta.id}_index" } + } + + withName: MULTIQC { + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename + } + ] + ext.prefix = "multiqc_report" + tag = 'MULTIQC' + } } From de73659915989bb806e2b5e7c0867845f63b3f04 Mon Sep 17 00:00:00 2001 From: Muneeb Nasir Date: Sun, 24 Nov 2024 19:21:56 +0000 Subject: [PATCH 3/3] Updated workflow: Bug fixees, channelization & refinement of code --- workflows/metabolt.nf | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/workflows/metabolt.nf b/workflows/metabolt.nf index f75ce39..83b6965 100644 --- a/workflows/metabolt.nf +++ b/workflows/metabolt.nf @@ -100,13 +100,6 @@ workflow METABOLT { // MODULE: Run BWA_INDEX on assembled contigs for indexing // BWA_INDEX(ch_assemblies) - - // Prepare input for BWA_MEM - ch_bwa_mem_input = FASTP.out.reads.combine(BWA_INDEX.out.index, by: 0) - .combine(ch_assemblies) - .map { meta, reads, index, assembly_meta, assembly -> - [meta, reads, index, assembly] - } // Collect version information ch_versions = ch_versions.mix(BWA_INDEX.out.versions) @@ -114,7 +107,7 @@ workflow METABOLT { // MODULE: Run BWA_MEM for alignment of trimmed reads to indexed contigs // BWA_MEM( - ch_bwa_mem_input, + FASTP.out.reads, BWA_INDEX.out.index, ch_assemblies, true @@ -123,7 +116,10 @@ workflow METABOLT { ch_versions = ch_versions.mix(BWA_MEM.out.versions) // Prepare input for SAMTOOLS_SORT - ch_bam_for_sort = BWA_MEM.out.bam + ch_bam_for_sort = BWA_MEM.out.bam.map { meta, bam -> + // Ensure meta.id is unique: Add a suffix + [ meta + [id: "${meta.id}_aligned"], bam ] + } ch_fasta_for_sort = ch_assemblies.map { meta, assembly -> [[id: "assembly"], assembly] } // SAMTOOLS_SORT @@ -210,6 +206,8 @@ workflow METABOLT { emit: assemblies = ch_assemblies // channel: + aligned_sort = ch_sorted_bam + aligned_index = ch_bam_index multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ]