Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the coverage report, we were using the samtools flags for decont. instead of mapping #8

Merged
merged 11 commits into from
Jun 3, 2024
3 changes: 3 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ top_modules:
sp:
quast_config:
fn: "*.tsv"

section_comments:
Samtools: Assembly coverage
31 changes: 18 additions & 13 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -72,28 +72,33 @@ process {
]
}

withName: 'BWAMEM2_MEM' {
// This BWAMEM2_MEM belongs to the coverage module
withName: 'BWAMEM2_MEM_COVERAGE' {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 1.h * task.attempt, 'time' ) }

ext.args = "-M"
ext.args2 = [
'-f',
'12',
'-F',
'256',
'-uS',
].join(' ').trim()
ext.args2 = "-F 268 -uS"
}

withName: 'SAMTOOLS_BAM2FQ' {
/* Decontamination */
withName: 'BWAMEM2DECONTNOBAMS' {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
ext.prefix = "decontaminated"
ext.prefix = { "${meta2.id}_decontaminated" }
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
}

withName: 'HUMAN_PHIX_DECONTAMINATION' {
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
}

withName: 'HOST_DECONTAMINATION' {
memory = { check_max( 24.GB * task.attempt, 'memory' ) }
}
/* --------- */

/* Assembly */
withName: 'SPADES' {
memory = { check_max(params.assembly_memory.GB * task.attempt, 'memory') }
cpus = { check_max( 32 * task.attempt, 'cpus') }
Expand Down
18 changes: 4 additions & 14 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,12 @@
"bwamem2/mem": {
"branch": "main",
"git_sha": "75707538d91ddd27fb6007b4ac3710cb05154780",
"installed_by": ["reads_bwamem2_decontamination"]
"installed_by": ["modules"]
},
"samtools/bam2fq": {
"branch": "main",
"git_sha": "88f2bfbe6f0ba858d0833db590e647c4678656a7",
"installed_by": ["reads_bwamem2_decontamination"],
"patch": "modules/ebi-metagenomics/samtools/bam2fq/samtools-bam2fq.diff"
}
}
},
"subworkflows": {
"ebi-metagenomics": {
"reads_bwamem2_decontamination": {
"bwamem2decontnobams": {
"branch": "main",
"git_sha": "0b40060df67681e0172aab145460618c08d99516",
"installed_by": ["subworkflows"]
"git_sha": "32049180387cf2406254acf57882fc55915cb52e",
"installed_by": ["modules"]
}
}
}
Expand Down
5 changes: 2 additions & 3 deletions modules/ebi-metagenomics/bwamem2/mem/environment.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
name: bwamem2_mem

channels:
- conda-forge
- bioconda
- defaults

dependencies:
- bwa-mem2=2.2.1
- htslib=1.19.1
# renovate: datasource=conda depName=bioconda/samtools
- samtools=1.19.2
- htslib=1.19.1
11 changes: 11 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: bwamem2decontnobams

channels:
- conda-forge
- bioconda
- defaults

dependencies:
- bwa-mem2=2.2.1
- htslib=1.19.1
- samtools=1.19.2
56 changes: 56 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process BWAMEM2DECONTNOBAMS {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' :
'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }"


input:
tuple val(meta), path(reads)
tuple val(meta2), path(index)

output:
tuple val(meta), path("*{_1,_2,_interleaved}.fq.gz"), emit: decont_reads
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def ref_prefix = task.ext.ref_prefix ?: "${meta2.id}"
"""
INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
if [[ "${meta.single_end}" == "true" ]]; then
bwa-mem2 \\
mem \\
-M \\
-t $task.cpus \\
\$INDEX \\
$reads \\
| samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\
| samtools sort -@ ${task.cpus} -n -O bam - \\
| samtools bam2fq -@ $task.cpus - | gzip --no-name > ${ref_prefix}_${prefix}_interleaved.fq.gz
else
bwa-mem2 \\
mem \\
-M \\
-t $task.cpus \\
\$INDEX \\
$reads \\
| samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\
| samtools sort -@ ${task.cpus} -n -O bam - \\
| samtools bam2fq -@ ${task.cpus} -1 ${ref_prefix}_${prefix}_1.fq.gz -2 ${ref_prefix}_${prefix}_2.fq.gz -0 /dev/null -s /dev/null
fi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bwa-mem2: \$(bwa-mem2 version 2> /dev/null)
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
56 changes: 56 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "bwamem2decontnobams"
description: Decontamination module using bwamem2 and samtools that generates fastq files on the fly
keywords:
- alignment
- decontamination
- fastq
tools:
- bwamem2:
description: "Mapping DNA sequences against a large reference genome"
tool_dev_url: "https://github.com/bwa-mem2/bwa-mem2"
- samtools:
description: "Tools for dealing with SAM, BAM and CRAM files"
documentation: "http://www.htslib.org/doc/1.1/samtools.html"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2
for single-end and paired-end data, respectively
- meta2:
type: map
description: |
Groovy Map containing reference genome information
e.g. [ id:'ref_name' ]
- index:
type: file
description: |
A list of BWA index files

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- decont_reads:
type: file
description: |
List of fastq files. Two files for paired-end reads and one file for single-end reads

authors:
- "@EBI-metagenomics"
maintainers:
- "@EBI-metagenomics"
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
nextflow_workflow {

name "Test Subworkflow READS_BWAMEM2_DECONTAMINATION"
name "Test module bwamem2decontnobams"
script "../main.nf"
workflow "READS_BWAMEM2_DECONTAMINATION"
workflow "BWAMEM2DECONTNOBAMS"

tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/reads_bwamem2_decontamination"
tag "bwamem2/mem"
tag "samtools/bam2fq"
tag "modules"
tag "modules_nfcore"
tag "bwamem2decontnobams"

test("Illumina paired_end decontamination with MGYG000317500") {
when {
Expand Down Expand Up @@ -42,8 +40,8 @@ nextflow_workflow {
{ assert workflow.success },
// gzip stores extra information in the header, which makes comparing checksums impossible between operating systems.
// that is why we use the sizes of files, and that sort of thing
{ assert path(workflow.out.decontaminated_reads.get(0).get(1).get(0)).linesGzip.size() == 374028 },
{ assert path(workflow.out.decontaminated_reads.get(0).get(1).get(1)).linesGzip.size() == 374028 }
{ assert path(workflow.out.decont_reads.get(0).get(1).get(0)).linesGzip.size() == 374028 },
{ assert path(workflow.out.decont_reads.get(0).get(1).get(1)).linesGzip.size() == 374028 }
)
}
}
Expand Down Expand Up @@ -79,7 +77,7 @@ nextflow_workflow {
then {
assertAll(
{ assert workflow.success },
{ assert path(workflow.out.decontaminated_reads.get(0).get(1)).linesGzip.size() == 378312 }
{ assert path(workflow.out.decont_reads.get(0).get(1)).linesGzip.size() == 378312 }
)
}
}
Expand Down
2 changes: 2 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bwamem2decontnobams:
- modules/ebi-metagenomics/bwamem2decontnobams/**
7 changes: 0 additions & 7 deletions modules/ebi-metagenomics/samtools/bam2fq/environment.yml

This file was deleted.

57 changes: 0 additions & 57 deletions modules/ebi-metagenomics/samtools/bam2fq/main.nf

This file was deleted.

56 changes: 0 additions & 56 deletions modules/ebi-metagenomics/samtools/bam2fq/meta.yml

This file was deleted.

Loading