diff --git a/modules/nf-core/pypgx/runngspipeline/environment.yml b/modules/nf-core/pypgx/runngspipeline/environment.yml new file mode 100644 index 00000000000..36c4150bd25 --- /dev/null +++ b/modules/nf-core/pypgx/runngspipeline/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::pypgx=0.25.0 diff --git a/modules/nf-core/pypgx/runngspipeline/main.nf b/modules/nf-core/pypgx/runngspipeline/main.nf new file mode 100644 index 00000000000..f5f75df410b --- /dev/null +++ b/modules/nf-core/pypgx/runngspipeline/main.nf @@ -0,0 +1,64 @@ +process PYPGX_RUNNGSPIPELINE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pypgx:0.25.0--pyh7e72e81_0': + 'biocontainers/pypgx:0.25.0--pyh7e72e81_0' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(coverage), path(control_stats), val(pgx_gene) + tuple val(meta2), path(resource_bundle) + val(assembly_version) + + output: + tuple val(meta), path("*pypgx_output/results.zip"), emit: results + tuple val(meta), path("*pypgx_output/cnv-calls.zip"), emit: cnv_calls, optional: true + tuple val(meta), path("*pypgx_output/consolidated-variants.zip"), emit: consolidated_variants + path("versions.yml"), emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}_${pgx_gene}" + def assembly = assembly_version ?: "GRCh38" + def depth_coverage = coverage ? "--depth-of-coverage ${coverage}" : "" + def control_statistics = control_stats ? "--control-statistics ${control_stats}" : "" + + """ + export MPLCONFIGDIR="/tmp/" + export PYPGX_BUNDLE=${resource_bundle}/ + + pypgx run-ngs-pipeline \\ + --assembly ${assembly} \\ + ${pgx_gene} \\ + ${prefix}_pypgx_output/ \\ + --variants ${vcf} \\ + ${depth_coverage} \\ + ${control_statistics} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pypgx: \$(echo \$(pypgx -v 2>&1) | sed 's/.* //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_${pgx_gene}" + + """ + mkdir ${prefix}_pypgx_output + # zip program unavailable in container + python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/results.zip", "w").close()' + python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/cnv-calls.zip", "w").close()' + python -c 'import zipfile; zipfile.ZipFile("${prefix}_pypgx_output/consolidated-variants.zip", "w").close()' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pypgx: \$(echo \$(pypgx -v 2>&1) | sed 's/.* //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pypgx/runngspipeline/meta.yml b/modules/nf-core/pypgx/runngspipeline/meta.yml new file mode 100644 index 00000000000..6a40ff8eac2 --- /dev/null +++ b/modules/nf-core/pypgx/runngspipeline/meta.yml @@ -0,0 +1,97 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "pypgx_runngspipeline" + +description: PyPGx pharmacogenomics genotyping pipeline for NGS data. +keywords: + - pypgx + - pharmacogenetics + - genotyping +tools: + - "pypgx": + description: "A Python package for pharmacogenomics research" + homepage: "https://pypgx.readthedocs.io/en/latest/" + documentation: "https://pypgx.readthedocs.io/en/latest/" + tool_dev_url: "https://github.com/sbslee/pypgx" + doi: "10.1371/journal.pone.0272129" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]`. + - vcf: + type: file + description: BGZIP compressed VCF file with SNVs/indels. Output of pypgx/createinputvcf. + pattern: "*.{vcf.gz}" + - tbi: + type: file + description: VCF tabix index. + pattern: "*.{vcf.gz.tbi}" + - coverage: + type: file + description: ZIP compressed file with depth of coverage information. Output + of pypgx/preparedepthofcoverage. Coverage information is only required when + running the module on a pharmacogene with known structural variants. + pattern: "*.{zip}" + - control_stats: + type: file + description: ZIP compressed file with control statistics. Output of pypgx/computecontrolstatistics. + Control statistics are only required when running the module on a pharmacogene + with known structural variants. + - pgx_gene: + type: string + description: Pharmacogene to genotype/phenotype. A list of supported genes is + available in the pypgx documentation "https://pypgx.readthedocs.io/en/latest/genes.html" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]`. + - resource_bundle: + type: directory + description: Path to the pypgx resource bundle (https://github.com/sbslee/pypgx-bundle). + - - assembly_version: + type: string + description: Genome assembly version to use. +output: + - results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*pypgx_output/results.zip": + type: file + description: Main output file of the pipeline in ZIP format, containing a table + with star-alleles per sample and CNV calls where applicable. + - cnv_calls: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*pypgx_output/cnv-calls.zip": + type: file + description: Optional output file in ZIP format, containing CNV calls per sample. + - consolidated_variants: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*pypgx_output/consolidated-variants.zip": + type: file + description: Output file in ZIP format, containing a consolidated (and phased) + VCF file. + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jorivansteenbrugge" +maintainers: + - "@jorivansteenbrugge" diff --git a/modules/nf-core/pypgx/runngspipeline/tests/main.nf.test b/modules/nf-core/pypgx/runngspipeline/tests/main.nf.test new file mode 100644 index 00000000000..3669ff5ccb1 --- /dev/null +++ b/modules/nf-core/pypgx/runngspipeline/tests/main.nf.test @@ -0,0 +1,127 @@ +nextflow_process { + + name "Test Process PYPGX_RUNNGSPIPELINE" + script "../main.nf" + process "PYPGX_RUNNGSPIPELINE" + + tag "modules" + tag "modules_nfcore" + tag "pypgx" + tag "pypgx/runngspipeline" + tag "pypgx/createinputvcf" + tag "pypgx/computecontrolstatistics" + tag "pypgx/preparedepthofcoverage" + tag "untar" + + test("human paired end bam - CYP2D6 locus - GRCh37 - CYP2D6 pharmacogene") { + setup { + run("PYPGX_CREATEINPUTVCF") { + script "../../createinputvcf/main.nf" + process { + """ + input[0] = [ + [id: 'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true) + ] + input[1] = [ + [id: 'GRCh37'], // meta2 map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/GRCh37_chr22.fasta.gz', checkIfExists: true) + ] + input[2] = ["CYP2D6"] // Pharmacogene + input[3] = "GRCh37" // assembly version + """ + } + } + run("PYPGX_PREPAREDEPTHOFCOVERAGE") { + script "../../preparedepthofcoverage" + process { + """ + input[0] = [ + [id: 'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true) + ] + input[1] = ["CYP2D6"] // Pharmacogene + input[2] = "GRCh37" // assembly version + """ + } + } + run("PYPGX_COMPUTECONTROLSTATISTICS") { + script "../../computecontrolstatistics/main.nf" + process { + """ + input[0] = [ + [id: 'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.PGx.CYP2D6.bam.bai', checkIfExists: true) + ] + input[1] = "22:42512500-42551883" + input[2] = "GRCh37" // assembly version + """ + } + } + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [id: 'resource_bundle'], + file('https://github.com/sbslee/pypgx-bundle/archive/refs/tags/0.25.0.tar.gz') + ] + """ + } + } + } + when { + process { + """ + input[0] = PYPGX_CREATEINPUTVCF.out.vcf + .join(PYPGX_CREATEINPUTVCF.out.tbi) + .join(PYPGX_PREPAREDEPTHOFCOVERAGE.out.coverage) + .join(PYPGX_COMPUTECONTROLSTATISTICS.out.control_stats) + .combine(Channel.fromList(["CYP2D6"])) + input[1] = UNTAR.out.untar // resource bundle + input[2] = 'GRCh37' //assembly version + """ + } + } + + then { + def results = process.out.results[0][1] + + + def zipFile = path(results).zip + def paths = zipFile.extractAll() + def metaFile = paths.find { it.getFileName().toString() == 'metadata.txt' } + + assertAll( + { assert process.success }, + { assert zipFile.isValid() }, + { assert paths.size() == 2 }, + { assert snapshot( + metaFile, + process.out.versions).match()} + ) + } + } + + test("stub") { + options "-stub" + when { + process { + """ + input[0] = [[id: 'test'], [], [], [], [], 'gene'] + input[1] = [[id: 'bundle'], []] + input[2] = '' + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/pypgx/runngspipeline/tests/main.nf.test.snap b/modules/nf-core/pypgx/runngspipeline/tests/main.nf.test.snap new file mode 100644 index 00000000000..54b1483992e --- /dev/null +++ b/modules/nf-core/pypgx/runngspipeline/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "results.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "cnv-calls.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "consolidated-variants.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c" + ] + ], + "3": [ + "versions.yml:md5,9eab385d76a79cf80f7ca26a25592525" + ], + "cnv_calls": [ + [ + { + "id": "test" + }, + "cnv-calls.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c" + ] + ], + "consolidated_variants": [ + [ + { + "id": "test" + }, + "consolidated-variants.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c" + ] + ], + "results": [ + [ + { + "id": "test" + }, + "results.zip:md5,76cdb2bad9582d23c1f6f4d868218d6c" + ] + ], + "versions": [ + "versions.yml:md5,9eab385d76a79cf80f7ca26a25592525" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2025-01-10T10:50:49.324358" + }, + "human paired end bam - CYP2D6 locus - GRCh37 - CYP2D6 pharmacogene": { + "content": [ + "metadata.txt:md5,ea596a886920435c2a3c719b0ae85a8a", + [ + "versions.yml:md5,9eab385d76a79cf80f7ca26a25592525" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2025-01-10T13:48:55.071007" + } +} \ No newline at end of file