Skip to content

Commit

Permalink
pypgx/runngspipeline (nf-core#6823)
Browse files Browse the repository at this point in the history
* pypgx/runngspipeline

* update PR comments

---------

Co-authored-by: Jorisvansteenbrugge <[email protected]>
Co-authored-by: Simon Pearce <[email protected]>
  • Loading branch information
3 people authored and mazzalab committed Jan 18, 2025
1 parent fcf17e6 commit 1da34ed
Show file tree
Hide file tree
Showing 5 changed files with 375 additions and 0 deletions.
7 changes: 7 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::pypgx=0.25.0
64 changes: 64 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process PYPGX_RUNNGSPIPELINE {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pypgx:0.25.0--pyh7e72e81_0':
'biocontainers/pypgx:0.25.0--pyh7e72e81_0' }"

input:
tuple val(meta), path(vcf), path(tbi), path(coverage), path(control_stats), val(pgx_gene)
tuple val(meta2), path(resource_bundle)
val(assembly_version)

output:
tuple val(meta), path("*pypgx_output/results.zip"), emit: results
tuple val(meta), path("*pypgx_output/cnv-calls.zip"), emit: cnv_calls, optional: true
tuple val(meta), path("*pypgx_output/consolidated-variants.zip"), emit: consolidated_variants
path("versions.yml"), emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}_${pgx_gene}"
def assembly = assembly_version ?: "GRCh38"
def depth_coverage = coverage ? "--depth-of-coverage ${coverage}" : ""
def control_statistics = control_stats ? "--control-statistics ${control_stats}" : ""

"""
export MPLCONFIGDIR="/tmp/"
export PYPGX_BUNDLE=${resource_bundle}/
pypgx run-ngs-pipeline \\
--assembly ${assembly} \\
${pgx_gene} \\
${prefix}_pypgx_output/ \\
--variants ${vcf} \\
${depth_coverage} \\
${control_statistics}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pypgx: \$(echo \$(pypgx -v 2>&1) | sed 's/.* //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_${pgx_gene}"

"""
mkdir ${prefix}_pypgx_output
# zip program unavailable in container
python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/results.zip", "w").close()'
python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/cnv-calls.zip", "w").close()'
python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/consolidated-variants.zip", "w").close()'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pypgx: \$(echo \$(pypgx -v 2>&1) | sed 's/.* //')
END_VERSIONS
"""
}
97 changes: 97 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "pypgx_runngspipeline"

description: PyPGx pharmacogenomics genotyping pipeline for NGS data.
keywords:
- pypgx
- pharmacogenetics
- genotyping
tools:
- "pypgx":
description: "A Python package for pharmacogenomics research"
homepage: "https://pypgx.readthedocs.io/en/latest/"
documentation: "https://pypgx.readthedocs.io/en/latest/"
tool_dev_url: "https://github.com/sbslee/pypgx"
doi: "10.1371/journal.pone.0272129"
licence: ["MIT"]
identifier: ""

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`.
- vcf:
type: file
description: BGZIP compressed VCF file with SNVs/indels. Output of pypgx/createinputvcf.
pattern: "*.{vcf.gz}"
- tbi:
type: file
description: VCF tabix index.
pattern: "*.{vcf.gz.tbi}"
- coverage:
type: file
description: ZIP compressed file with depth of coverage information. Output
of pypgx/preparedepthofcoverage. Coverage information is only required when
running the module on a pharmacogene with known structural variants.
pattern: "*.{zip}"
- control_stats:
type: file
description: ZIP compressed file with control statistics. Output of pypgx/computecontrolstatistics.
Control statistics are only required when running the module on a pharmacogene
with known structural variants.
- pgx_gene:
type: string
description: Pharmacogene to genotype/phenotype. A list of supported genes is
available in the pypgx documentation "https://pypgx.readthedocs.io/en/latest/genes.html"
- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`.
- resource_bundle:
type: directory
description: Path to the pypgx resource bundle (https://github.com/sbslee/pypgx-bundle).
- - assembly_version:
type: string
description: Genome assembly version to use.
output:
- results:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*pypgx_output/results.zip":
type: file
description: Main output file of the pipeline in ZIP format, containing a table
with star-alleles per sample and CNV calls where applicable.
- cnv_calls:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*pypgx_output/cnv-calls.zip":
type: file
description: Optional output file in ZIP format, containing CNV calls per sample.
- consolidated_variants:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*pypgx_output/consolidated-variants.zip":
type: file
description: Output file in ZIP format, containing a consolidated (and phased)
VCF file.
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@jorivansteenbrugge"
maintainers:
- "@jorivansteenbrugge"
127 changes: 127 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
nextflow_process {

name "Test Process PYPGX_RUNNGSPIPELINE"
script "../main.nf"
process "PYPGX_RUNNGSPIPELINE"

tag "modules"
tag "modules_nfcore"
tag "pypgx"
tag "pypgx/runngspipeline"
tag "pypgx/createinputvcf"
tag "pypgx/computecontrolstatistics"
tag "pypgx/preparedepthofcoverage"
tag "untar"

test("human paired end bam - CYP2D6 locus - GRCh37 - CYP2D6 pharmacogene") {
setup {
run("PYPGX_CREATEINPUTVCF") {
script "../../createinputvcf/main.nf"
process {
"""
input[0] = [
[id: 'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true)
]
input[1] = [
[id: 'GRCh37'], // meta2 map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/GRCh37_chr22.fasta.gz', checkIfExists: true)
]
input[2] = ["CYP2D6"] // Pharmacogene
input[3] = "GRCh37" // assembly version
"""
}
}
run("PYPGX_PREPAREDEPTHOFCOVERAGE") {
script "../../preparedepthofcoverage"
process {
"""
input[0] = [
[id: 'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true)
]
input[1] = ["CYP2D6"] // Pharmacogene
input[2] = "GRCh37" // assembly version
"""
}
}
run("PYPGX_COMPUTECONTROLSTATISTICS") {
script "../../computecontrolstatistics/main.nf"
process {
"""
input[0] = [
[id: 'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true)
]
input[1] = "22:42512500-42551883"
input[2] = "GRCh37" // assembly version
"""
}
}
run("UNTAR") {
script "../../../untar/main.nf"
process {
"""
input[0] = [
[id: 'resource_bundle'],
file('https://github.com/sbslee/pypgx-bundle/archive/refs/tags/0.25.0.tar.gz')
]
"""
}
}
}
when {
process {
"""
input[0] = PYPGX_CREATEINPUTVCF.out.vcf
.join(PYPGX_CREATEINPUTVCF.out.tbi)
.join(PYPGX_PREPAREDEPTHOFCOVERAGE.out.coverage)
.join(PYPGX_COMPUTECONTROLSTATISTICS.out.control_stats)
.combine(Channel.fromList(["CYP2D6"]))
input[1] = UNTAR.out.untar // resource bundle
input[2] = 'GRCh37' //assembly version
"""
}
}

then {
def results = process.out.results[0][1]


def zipFile = path(results).zip
def paths = zipFile.extractAll()
def metaFile = paths.find { it.getFileName().toString() == 'metadata.txt' }

assertAll(
{ assert process.success },
{ assert zipFile.isValid() },
{ assert paths.size() == 2 },
{ assert snapshot(
metaFile,
process.out.versions).match()}
)
}
}

test("stub") {
options "-stub"
when {
process {
"""
input[0] = [[id: 'test'], [], [], [], [], 'gene']
input[1] = [[id: 'bundle'], []]
input[2] = ''
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
80 changes: 80 additions & 0 deletions modules/nf-core/pypgx/runngspipeline/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"results.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"1": [
[
{
"id": "test"
},
"cnv-calls.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"2": [
[
{
"id": "test"
},
"consolidated-variants.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"3": [
"versions.yml:md5,9eab385d76a79cf80f7ca26a25592525"
],
"cnv_calls": [
[
{
"id": "test"
},
"cnv-calls.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"consolidated_variants": [
[
{
"id": "test"
},
"consolidated-variants.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"results": [
[
{
"id": "test"
},
"results.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c"
]
],
"versions": [
"versions.yml:md5,9eab385d76a79cf80f7ca26a25592525"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2025-01-10T10:50:49.324358"
},
"human paired end bam - CYP2D6 locus - GRCh37 - CYP2D6 pharmacogene": {
"content": [
"metadata.txt:md5,ea596a886920435c2a3c719b0ae85a8a",
[
"versions.yml:md5,9eab385d76a79cf80f7ca26a25592525"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2025-01-10T13:48:55.071007"
}
}

0 comments on commit 1da34ed

Please sign in to comment.