Skip to content

Commit

Permalink
Add FCSGX_CLEANGENOME (#6975)
Browse files Browse the repository at this point in the history
Co-authored-by: Lauren huet <[email protected]>
Co-authored-by: Sateesh_Peri <[email protected]>
Co-authored-by: Edmund Miller <[email protected]>
  • Loading branch information
4 people authored Nov 14, 2024
1 parent 4265ef4 commit b48e64d
Show file tree
Hide file tree
Showing 5 changed files with 307 additions and 0 deletions.
7 changes: 7 additions & 0 deletions modules/nf-core/fcsgx/cleangenome/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::ncbi-fcs-gx=0.5.4"
51 changes: 51 additions & 0 deletions modules/nf-core/fcsgx/cleangenome/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process FCSGX_CLEANGENOME {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ncbi-fcs-gx:0.5.4--h4ac6f70_1':
'biocontainers/ncbi-fcs-gx:0.5.4--h4ac6f70_1' }"

input:
tuple val(meta), path(fasta), path(fcsgx_report)

output:
tuple val(meta), path("*.cleaned.fasta") , emit: cleaned
tuple val(meta), path("*.contaminants.fasta"), emit: contaminants
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
gx \\
clean-genome \\
--input ${fasta} \\
--action-report ${fcsgx_report} \\
--output ${prefix}.cleaned.fasta \\
--contam-fasta-out ${prefix}.contaminants.fasta \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fcsgx: \$( gx --help | sed '/build/!d; s/.*:v//; s/-.*//' )
END_VERSIONS
"""

stub:
// def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.cleaned.fasta
touch ${prefix}.contaminants.fasta
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fcsgx: \$( gx --help | sed '/build/!d; s/.*:v//; s/-.*//' )
END_VERSIONS
"""
}
64 changes: 64 additions & 0 deletions modules/nf-core/fcsgx/cleangenome/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "fcsgx_cleangenome"
description: Runs FCS-GX (Foreign Contamination Screen - Genome eXtractor) to
remove foreign contamination from genome assemblies
keywords:
- genome
- assembly
- contamination
- screening
- cleaning
- fcs-gx
tools:
- "fcsgx":
description: "The NCBI Foreign Contamination Screen. Genomic cross-species aligner,
for contamination detection."
homepage: "https://github.com/ncbi/fcs-gx"
documentation: "https://github.com/ncbi/fcs/wiki/"
tool_dev_url: "https://github.com/ncbi/fcs-gx"
doi: "10.1186/s13059-024-03198-7"
licence: ["NCBI-PD"]
identifier: "biotools:ncbi_fcs"

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fasta:
type: file
description: Input genome assembly file in FASTA format
pattern: "*.{fa,fasta,fna}"
- fcsgx_report:
type: file
description: Final contamination report with contaminant cleaning actions. Generated using FCSGX_RUNGX
output:
- cleaned:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.cleaned.fasta":
type: file
description: The fasta file after cleaning, where sequences annotated as ACTION_EXCLUDE or ACTION_TRIM are excluded
- contaminants:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.contaminants.fasta":
type: file
description: Sequences annotated as ACTION_EXCLUDE which are marked as contaminants.
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@mahesh-panchal"
- "@LaurenHuet"
maintainers:
- "@mahesh-panchal"
85 changes: 85 additions & 0 deletions modules/nf-core/fcsgx/cleangenome/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
nextflow_process {

name "Test Process FCSGX_CLEANGENOME"
script "../main.nf"
process "FCSGX_CLEANGENOME"

tag "modules"
tag "modules_nfcore"
tag "fcsgx"
tag "fcsgx/fetchdb"
tag "fcsgx/rungx"
tag "fcsgx/cleangenome"

setup {
run("FCSGX_FETCHDB"){
script "../../fetchdb/main.nf"
process {
"""
input[0] = file('https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.manifest', checkIfExists: true)
"""
}
}
run("FCSGX_RUNGX") {
script "../../rungx/main.nf"
process {
"""
input[0] = [
[ id:'test' ], // meta map
'2697049', // taxid for SARS-CoV-2
file('https://zenodo.org/records/10932013/files/FCS_combo_test.fa', checkIfExists: true),
]
input[1] = FCSGX_FETCHDB.out.database
input[2] = []
"""
}
}
}

test("sarscov2 - fasta") {

when {
process {
"""
input[0] = Channel.value([
[ id:'test' ], // meta map
file('https://zenodo.org/records/10932013/files/FCS_combo_test.fa', checkIfExists: true),
]).join(FCSGX_RUNGX.out.fcsgx_report)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("sarscov2 - fasta - stub") {

options "-stub"

when {
process {
"""
input[0] = Channel.value([
[ id:'test' ], // meta map
file('https://zenodo.org/records/10932013/files/FCS_combo_test.fa', checkIfExists: true),
]).join(FCSGX_RUNGX.out.fcsgx_report)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
100 changes: 100 additions & 0 deletions modules/nf-core/fcsgx/cleangenome/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{
"sarscov2 - fasta - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.cleaned.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
[
{
"id": "test"
},
"test.contaminants.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [
"versions.yml:md5,4235d280269123cbd3e43d920bcf71b9"
],
"cleaned": [
[
{
"id": "test"
},
"test.cleaned.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"contaminants": [
[
{
"id": "test"
},
"test.contaminants.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,4235d280269123cbd3e43d920bcf71b9"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.0"
},
"timestamp": "2024-11-12T10:02:26.605398704"
},
"sarscov2 - fasta": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.cleaned.fasta:md5,59ba4b4c773f02a0dc90034762aa1ddf"
]
],
"1": [
[
{
"id": "test"
},
"test.contaminants.fasta:md5,17c11ea7d2d0076be5183b87618ee91e"
]
],
"2": [
"versions.yml:md5,4235d280269123cbd3e43d920bcf71b9"
],
"cleaned": [
[
{
"id": "test"
},
"test.cleaned.fasta:md5,59ba4b4c773f02a0dc90034762aa1ddf"
]
],
"contaminants": [
[
{
"id": "test"
},
"test.contaminants.fasta:md5,17c11ea7d2d0076be5183b87618ee91e"
]
],
"versions": [
"versions.yml:md5,4235d280269123cbd3e43d920bcf71b9"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.0"
},
"timestamp": "2024-11-12T12:05:43.065207163"
}
}

0 comments on commit b48e64d

Please sign in to comment.