Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pypgx/runngspipeline #6823

Merged
merged 11 commits into from
Jan 13, 2025
7 changes: 7 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::pypgx=0.25.0
64 changes: 64 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process PYPGX_RUNNGSPIPELINE {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pypgx:0.25.0--pyh7e72e81_0':
'biocontainers/pypgx:0.25.0--pyh7e72e81_0' }"

input:
tuple val(meta), path(vcf), path(tbi), path(coverage), path(control_stats), val(pgx_gene)
tuple val(meta2), path(resource_bundle)
val(assembly_version)
Jorisvansteenbrugge marked this conversation as resolved.
Show resolved Hide resolved

output:
tuple val(meta), path("*pypgx_output/results.zip"), emit: results
tuple val(meta), path("*pypgx_output/cnv-calls.zip"), emit: cnv_calls, optional: true
tuple val(meta), path("*pypgx_output/consolidated-variants.zip"), emit: consolidated_variants
path("versions.yml"), emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}_${pgx_gene}"
def assembly = assembly_version ?: "GRCh38"
def depth_coverage = coverage ? "--depth-of-coverage ${coverage}" : ""
def control_statistics = control_stats ? "--control-statistics ${control_stats}" : ""

"""
export MPLCONFIGDIR="/tmp/"
export PYPGX_BUNDLE=${resource_bundle}/

pypgx run-ngs-pipeline \\
--assembly ${assembly} \\
${pgx_gene} \\
${prefix}_pypgx_output/ \\
--variants ${vcf} \\
${depth_coverage} \\
${control_statistics}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
pypgx: \$(echo \$(pypgx -v 2>&1) | sed 's/.* //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_${pgx_gene}"

"""
mkdir ${prefix}_pypgx_output
# zip program unavailable in container
python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/results.zip", "w").close()'
python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/cnv-calls.zip", "w").close()'
python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/consolidated-variants.zip", "w").close()'

cat <<-END_VERSIONS > versions.yml
"${task.process}":
pypgx: \$(echo \$(pypgx -v 2>&1) | sed 's/.* //')
END_VERSIONS
"""
}
97 changes: 97 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "pypgx_runngspipeline"

description: PyPGx pharmacogenomics genotyping pipeline for NGS data.
keywords:
- pypgx
- pharmacogenetics
- genotyping
tools:
- "pypgx":
description: "A Python package for pharmacogenomics research"
homepage: "https://pypgx.readthedocs.io/en/latest/"
documentation: "https://pypgx.readthedocs.io/en/latest/"
tool_dev_url: "https://github.com/sbslee/pypgx"
doi: "10.1371/journal.pone.0272129"
licence: ["MIT"]
identifier: ""

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`.
- vcf:
type: file
description: BGZIP compressed VCF file with SNVs/indels. Output of pypgx/createinputvcf.
pattern: "*.{vcf.gz}"
Jorisvansteenbrugge marked this conversation as resolved.
Show resolved Hide resolved
- tbi:
type: file
description: VCF tabix index.
pattern: "*.{vcf.gz.tbi}"
LouisLeNezet marked this conversation as resolved.
Show resolved Hide resolved
- coverage:
type: file
description: ZIP compressed file with depth of coverage information. Output
Jorisvansteenbrugge marked this conversation as resolved.
Show resolved Hide resolved
of pypgx/preparedepthofcoverage. Coverage information is only required when
running the module on a pharmacogene with known structural variants.
pattern: "*.{zip}"
- control_stats:
type: file
description: ZIP compressed file with control statistics. Output of pypgx/computecontrolstatistics.
Control statistics are only required when running the module on a pharmacogene
with known structural variants.
- pgx_gene:
type: string
description: Pharmacogene to genotype/phenotype. A list of supported genes is
available in the pypgx documentation "https://pypgx.readthedocs.io/en/latest/genes.html"
- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`.
- resource_bundle:
type: directory
description: Path to the pypgx resource bundle (https://github.com/sbslee/pypgx-bundle).
- - assembly_version:
type: string
description: Genome assembly version to use.
output:
- results:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*pypgx_output/results.zip":
type: file
description: Main output file of the pipeline in ZIP format, containing a table
with star-alleles per sample and CNV calls where applicable.
- cnv_calls:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*pypgx_output/cnv-calls.zip":
type: file
description: Optional output file in ZIP format, containing CNV calls per sample.
- consolidated_variants:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*pypgx_output/consolidated-variants.zip":
type: file
description: Output file in ZIP format, containing a consolidated (and phased)
VCF file.
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@jorivansteenbrugge"
maintainers:
- "@jorivansteenbrugge"
127 changes: 127 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
nextflow_process {

name "Test Process PYPGX_RUNNGSPIPELINE"
script "../main.nf"
process "PYPGX_RUNNGSPIPELINE"

tag "modules"
tag "modules_nfcore"
tag "pypgx"
tag "pypgx/runngspipeline"
tag "pypgx/createinputvcf"
tag "pypgx/computecontrolstatistics"
tag "pypgx/preparedepthofcoverage"
tag "untar"

test("human paired end bam - CYP2D6 locus - GRCh37 - CYP2D6 pharmacogene") {
setup {
run("PYPGX_CREATEINPUTVCF") {
script "../../createinputvcf/main.nf"
process {
"""
input[0] = [
[id: 'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true)
]
input[1] = [
[id: 'GRCh37'], // meta2 map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/GRCh37_chr22.fasta.gz', checkIfExists: true)
]
input[2] = ["CYP2D6"] // Pharmacogene
input[3] = "GRCh37" // assembly version
"""
}
}
run("PYPGX_PREPAREDEPTHOFCOVERAGE") {
script "../../preparedepthofcoverage"
process {
"""
input[0] = [
[id: 'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true)
]
input[1] = ["CYP2D6"] // Pharmacogene
input[2] = "GRCh37" // assembly version
"""
}
}
run("PYPGX_COMPUTECONTROLSTATISTICS") {
script "../../computecontrolstatistics/main.nf"
process {
"""
input[0] = [
[id: 'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true)
]
input[1] = "22:42512500-42551883"
input[2] = "GRCh37" // assembly version
"""
}
}
run("UNTAR") {
script "../../../untar/main.nf"
process {
"""
input[0] = [
[id: 'resource_bundle'],
file('https://github.com/sbslee/pypgx-bundle/archive/refs/tags/0.25.0.tar.gz')
]
"""
}
}
}
when {
process {
"""
input[0] = PYPGX_CREATEINPUTVCF.out.vcf
.join(PYPGX_CREATEINPUTVCF.out.tbi)
.join(PYPGX_PREPAREDEPTHOFCOVERAGE.out.coverage)
.join(PYPGX_COMPUTECONTROLSTATISTICS.out.control_stats)
.combine(Channel.fromList(["CYP2D6"]))
input[1] = UNTAR.out.untar // resource bundle
input[2] = 'GRCh37' //assembly version
"""
}
}

then {
def results = process.out.results[0][1]


def zipFile = path(results).zip
def paths = zipFile.extractAll()
def metaFile = paths.find { it.getFileName().toString() == 'metadata.txt' }

assertAll(
{ assert process.success },
{ assert zipFile.isValid() },
{ assert paths.size() == 2 },
{ assert snapshot(
metaFile,
LouisLeNezet marked this conversation as resolved.
Show resolved Hide resolved
process.out.versions).match()}
)
}
}

test("stub") {
options "-stub"
when {
process {
"""
input[0] = [[id: 'test'], [], [], [], [], 'gene']
input[1] = [[id: 'bundle'], []]
input[2] = ''
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
80 changes: 80 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"results.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"1": [
[
{
"id": "test"
},
"cnv-calls.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"2": [
[
{
"id": "test"
},
"consolidated-variants.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"3": [
"versions.yml:md5,9eab385d76a79cf80f7ca26a25592525"
],
"cnv_calls": [
[
{
"id": "test"
},
"cnv-calls.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"consolidated_variants": [
[
{
"id": "test"
},
"consolidated-variants.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"results": [
[
{
"id": "test"
},
"results.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"versions": [
"versions.yml:md5,9eab385d76a79cf80f7ca26a25592525"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2025-01-10T10:50:49.324358"
},
"human paired end bam - CYP2D6 locus - GRCh37 - CYP2D6 pharmacogene": {
"content": [
"metadata.txt:md5,ea596a886920435c2a3c719b0ae85a8a",
[
"versions.yml:md5,9eab385d76a79cf80f7ca26a25592525"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2025-01-10T13:48:55.071007"
}
}
Loading