diff --git a/samtools-stats/.dockerignore b/samtools-stats/.dockerignore new file mode 100644 index 0000000..71266ec --- /dev/null +++ b/samtools-stats/.dockerignore @@ -0,0 +1,5 @@ +.gitignore +.nextflow* +tests +work +outdir diff --git a/samtools-stats/Dockerfile b/samtools-stats/Dockerfile new file mode 100644 index 0000000..50a9faf --- /dev/null +++ b/samtools-stats/Dockerfile @@ -0,0 +1,32 @@ +FROM ubuntu:20.04 + +LABEL org.opencontainers.image.source https://github.com/icgc-argo-qc-wg/argo-qc-tools + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update -y && \ + apt-get install -y software-properties-common python3-pip python3-dev curl && \ + apt-get install -y libz-dev pkg-config libtool m4 autotools-dev automake libncurses5-dev libbz2-dev liblzma-dev + +# install samtools 1.12 +RUN cd /tmp \ + && curl -sSL -o samtools-1.12.tar.bz2 --retry 10 https://github.com/samtools/samtools/releases/download/1.12/samtools-1.12.tar.bz2 \ + && bunzip2 -c samtools-1.12.tar.bz2 |tar xf - \ + && cd samtools-1.12 \ + && ./configure --prefix=/usr/local \ + && make \ + && make install + +ENV PATH="/tools:${PATH}" + +COPY *.py /tools/ + +RUN groupadd -g 1000 ubuntu && \ + useradd -l -u 1000 -g ubuntu ubuntu && \ + install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu + +USER ubuntu + +ENTRYPOINT ["/usr/bin/env"] + +CMD ["/bin/bash"] diff --git a/samtools-stats/main.nf b/samtools-stats/main.nf new file mode 100755 index 0000000..55e9bf2 --- /dev/null +++ b/samtools-stats/main.nf @@ -0,0 +1,91 @@ +#!/usr/bin/env nextflow + +/* + Copyright (c) 2021, ICGC ARGO + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + Authors: + Junjun Zhang +*/ + +/********************************************************************/ +/* this block is auto-generated based on info from pkg.json where */ +/* changes can be made if needed, do NOT modify this block manually */ +nextflow.enable.dsl = 2 +version = '0.1.0' // package version + +container = [ + 'ghcr.io': 'ghcr.io/icgc-argo-qc-wg/argo-qc-tools.samtools-stats' +] +default_container_registry = 'ghcr.io' +/********************************************************************/ + + +// universal params go here +params.container_registry = "" +params.container_version = "" +params.container = "" + +params.cpus = 1 +params.mem = 1 // GB +params.publish_dir = "" // set to empty string will disable publishDir + +// tool specific parmas go here, add / change as needed +params.aligned_seq = "" +params.ref_genome_gz = "" // reference genome: *.fa.gz, index file: *.fa.gz.fai + + +include { getSecondaryFiles } from './wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/main.nf' + +process samtoolsStats { + container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" + publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir + + cpus params.cpus + memory "${params.mem} GB" + + input: + path aligned_seq + path ref_genome_gz + path ref_genome_gz_idx + + output: + path "${aligned_seq}.samtools_stats.qc.tgz", emit: qc_tar + + script: + """ + main.py -s ${aligned_seq} \ + -r ${ref_genome_gz} \ + -t ${params.cpus} + """ +} + + +// this provides an entry point for this main script, so it can be run directly without clone the repo +// using this command: nextflow run ///.nf -r .v --params-file xxx +workflow { + samtoolsStats( + file(params.aligned_seq), + file(params.ref_genome_gz), + Channel.fromPath( + getSecondaryFiles(params.ref_genome_gz, ['fai']), checkIfExists: true + ).collect() + ) +} diff --git a/samtools-stats/main.py b/samtools-stats/main.py new file mode 100755 index 0000000..df3d623 --- /dev/null +++ b/samtools-stats/main.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + Copyright (c) 2021, ICGC ARGO + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + Authors: + Junjun Zhang +""" + +import os +import sys +import argparse +import subprocess +from multiprocessing import cpu_count +from glob import glob +import json +import tarfile + + +def run_cmd(cmd): + proc = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout, stderr = proc.communicate() + + return ( + stdout.decode("utf-8").strip(), + stderr.decode("utf-8").strip(), + proc.returncode + ) + + +def get_tool_version(): + get_tool_version_cmd = "samtools --version | grep '^samtools'" + stdout, stderr, returncode = run_cmd(get_tool_version_cmd) + if returncode: + sys.exit(f"Error: unable to get version info for samtools.\nStdout: {stdout}\nStderr: {stderr}\n") + + return stdout.strip().split(' ')[-1] + + +def prep_qc_metrics(agg_bamstat, tool_ver): + qc_metrics = { + 'tool': { + 'name': 'samtools:stats', + 'version': tool_ver + }, + 'metrics': {} + } + + collected_sum_fields = { + 'raw total sequences': 'total_reads', + 'reads mapped': 'mapped_reads', + 'reads paired': 'paired_reads', + 'reads properly paired': 'properly_paired_reads', + 'pairs on different chromosomes': 'pairs_on_different_chromosomes', + 'total length': 'total_bases', + 'bases mapped (cigar)': 'mapped_bases_cigar', + 'mismatches': 'mismatch_bases', + 'error rate': 'error_rate', + 'bases duplicated': 'duplicated_bases', + 'insert size average': 'average_insert_size', + 'average length': 'average_length' + } + + with open(agg_bamstat, 'r') as f: + for row in f: + if not row.startswith('SN\t'): + continue + cols = row.replace(':', '').strip().split('\t') + if cols[1] not in collected_sum_fields: + continue + + qc_metrics['metrics'].update({ + collected_sum_fields[cols[1]]: float(cols[2]) if ('.' in cols[2] or 'e' in cols[2]) else int(cols[2]) + }) + + qc_metrics_file = 'qc_metrics.json' + with open(qc_metrics_file, "w") as j: + j.write(json.dumps(qc_metrics, indent=2)) + + return qc_metrics_file + + +def prepare_tarball(aligned_seq, qc_metrics, agg_bamstat, lane_bamstat): + tar_content = { + 'qc_metrics': qc_metrics, + 'agg_bamstat': agg_bamstat, + 'lane_bamstat': lane_bamstat + } + + with open('tar_content.json', 'w') as t: + t.write(json.dumps(tar_content, indent=2)) + + files_to_tar = ['tar_content.json', qc_metrics, agg_bamstat] + lane_bamstat + + tarfile_name = f"{os.path.basename(aligned_seq)}.samtools_stats.qc.tgz" + with tarfile.open(tarfile_name, "w:gz") as tar: + for f in files_to_tar: + tar.add(f, arcname=os.path.basename(f)) + + +def main(aligned_seq, reference, threads=1): + # get samtools version info + tool_ver = get_tool_version() + + # run samtools stats + stats_args = [ + '--reference', reference, + '-@', str(threads), + '-r', reference, + '--split', 'RG', + '-P', os.path.join(os.getcwd(), os.path.basename(aligned_seq)) + ] + + cmd = ['samtools', 'stats'] + stats_args + [aligned_seq] + stdout, stderr, returncode = run_cmd(" ".join(cmd)) + if returncode: + sys.exit(f"Error: 'samtools stats' failed.\nStdout: {stdout}\nStderr: {stderr}\n") + + agg_bamstat = f"{os.path.basename(aligned_seq)}.bamstat" + with open(agg_bamstat, 'w') as f: + f.write(stdout) + + # parse samtools stats output and put it in qc_metrics.json + qc_metrics_file = prep_qc_metrics(agg_bamstat, tool_ver) + + lane_bamstat = [] + for f in sorted(glob('*.bamstat')): + if f != agg_bamstat: + lane_bamstat.append(f) + + # prepare tarball to include output files and qc_metrics.json + prepare_tarball(aligned_seq, qc_metrics_file, agg_bamstat, lane_bamstat) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Tool: samtools-stats') + parser.add_argument('-s', '--aligned_seq', type=str, + help='Input aligned seq', required=True) + parser.add_argument('-r', '--reference', type=str, + help='Reference genome', required=True) + parser.add_argument('-t', '--threads', type=int, default=cpu_count(), + help='Number of threads') + args = parser.parse_args() + + if not os.path.isfile(args.aligned_seq): + sys.exit('Error: specified aligned seq file %s does not exist or is not accessible!' % args.aligned_seq) + + if not os.path.isfile(args.reference): + sys.exit('Error: specified reference file %s does not exist or is not accessible!' % args.reference) + + main(args.aligned_seq, args.reference, args.threads) diff --git a/samtools-stats/nextflow.config b/samtools-stats/nextflow.config new file mode 100644 index 0000000..f2cd1e3 --- /dev/null +++ b/samtools-stats/nextflow.config @@ -0,0 +1,4 @@ +docker { + enabled = true + runOptions = '-u \$(id -u):\$(id -g)' +} diff --git a/samtools-stats/pkg.json b/samtools-stats/pkg.json new file mode 100644 index 0000000..6056c9d --- /dev/null +++ b/samtools-stats/pkg.json @@ -0,0 +1,39 @@ +{ + "name": "samtools-stats", + "version": "0.1.0", + "description": "Samtools stats", + "main": "main.nf", + "deprecated": false, + "keywords": [ + "bioinformatics", + "seq", + "qc metrics" + ], + "repository": { + "type": "git", + "url": "https://github.com/icgc-argo-qc-wg/argo-qc-tools.git" + }, + "container": { + "registries": [ + { + "registry": "ghcr.io", + "type": "docker", + "org": "icgc-argo-qc-wg", + "default": true + } + ] + }, + "dependencies": [ + "github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1" + ], + "devDependencies": [], + "contributors": [ + { + "name": "Junjun Zhang", + "email": "junjun.ca@gmail.com" + } + ], + "license": "MIT", + "bugReport": "https://github.com/icgc-argo-qc-wg/argo-qc-tools/issues", + "homepage": "https://github.com/icgc-argo-qc-wg/argo-qc-tools#readme" +} \ No newline at end of file diff --git a/samtools-stats/tests/checker.nf b/samtools-stats/tests/checker.nf new file mode 100755 index 0000000..2ad4278 --- /dev/null +++ b/samtools-stats/tests/checker.nf @@ -0,0 +1,127 @@ +#!/usr/bin/env nextflow + +/* + Copyright (c) 2021, ICGC ARGO + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + Authors: + Junjun Zhang +*/ + +/* + This is an auto-generated checker workflow to test the generated main template workflow, it's + meant to illustrate how testing works. Please update to suit your own needs. +*/ + +/********************************************************************/ +/* this block is auto-generated based on info from pkg.json where */ +/* changes can be made if needed, do NOT modify this block manually */ +nextflow.enable.dsl = 2 +version = '0.1.0' // package version + +container = [ + 'ghcr.io': 'ghcr.io/icgc-argo-qc-wg/argo-qc-tools.samtools-stats' +] +default_container_registry = 'ghcr.io' +/********************************************************************/ + +// universal params +params.container_registry = "" +params.container_version = "" +params.container = "" + +// tool specific parmas go here, add / change as needed +params.aligned_seq = "" +params.ref_genome_gz = "" // reference genome: *.fa.gz, index file: *.fa.gz.fai +params.expected_output = "" + +include { samtoolsStats } from '../main' +include { getSecondaryFiles } from './wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/main.nf' + +process file_smart_diff { + container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" + + input: + path output_file + path expected_file + + output: + stdout() + + script: + """ + mkdir output expected + + tar xzf ${output_file} -C output + tar xzf ${expected_file} -C expected + + cd output + for f in *; do + if [ ! -f "../expected/\$f" ] + then + echo "Test FAILED, found unexpected file: \$f in the output tarball" && exit 1 + fi + + echo diff \$f ../expected/\$f + # we ignore diff from the lines with 'The command line' since they contain dynamic file path + EFFECTIVE_DIFF=`diff \$f ../expected/\$f | egrep '<|>' | grep -v ' # The command line was:' || true` + + if [ ! -z "\$EFFECTIVE_DIFF" ] + then + echo -e "Test FAILED, output file \$f mismatch:\n\$EFFECTIVE_DIFF" && exit 1 + fi + done + + echo "All files match, test PASSED" && exit 0 + """ +} + + +workflow checker { + take: + aligned_seq + ref_genome_gz + ref_genome_gz_idx + expected_output + + main: + samtoolsStats( + aligned_seq, + ref_genome_gz, + ref_genome_gz_idx + ) + + file_smart_diff( + samtoolsStats.out.qc_tar, + expected_output + ) +} + + +workflow { + checker( + file(params.aligned_seq), + file(params.ref_genome_gz), + Channel.fromPath( + getSecondaryFiles(params.ref_genome_gz, ['fai']), checkIfExists: true + ).collect(), + file(params.expected_output) + ) +} diff --git a/samtools-stats/tests/expected/expected.SA610149.0.20200122.wgs.grch38.cram.samtools_stats.qc.tgz b/samtools-stats/tests/expected/expected.SA610149.0.20200122.wgs.grch38.cram.samtools_stats.qc.tgz new file mode 100644 index 0000000..0595aa2 Binary files /dev/null and b/samtools-stats/tests/expected/expected.SA610149.0.20200122.wgs.grch38.cram.samtools_stats.qc.tgz differ diff --git a/samtools-stats/tests/input/README.md b/samtools-stats/tests/input/README.md new file mode 100644 index 0000000..9df14b7 --- /dev/null +++ b/samtools-stats/tests/input/README.md @@ -0,0 +1 @@ +This folder contains tiny data files for testing. diff --git a/samtools-stats/tests/input/SA610149.0.20200122.wgs.grch38.cram b/samtools-stats/tests/input/SA610149.0.20200122.wgs.grch38.cram new file mode 100644 index 0000000..e3697f6 Binary files /dev/null and b/samtools-stats/tests/input/SA610149.0.20200122.wgs.grch38.cram differ diff --git a/samtools-stats/tests/input/tiny-grch38-chr11-530001-537000.fa.gz b/samtools-stats/tests/input/tiny-grch38-chr11-530001-537000.fa.gz new file mode 100644 index 0000000..4669945 Binary files /dev/null and b/samtools-stats/tests/input/tiny-grch38-chr11-530001-537000.fa.gz differ diff --git a/samtools-stats/tests/input/tiny-grch38-chr11-530001-537000.fa.gz.fai b/samtools-stats/tests/input/tiny-grch38-chr11-530001-537000.fa.gz.fai new file mode 100644 index 0000000..88de907 --- /dev/null +++ b/samtools-stats/tests/input/tiny-grch38-chr11-530001-537000.fa.gz.fai @@ -0,0 +1 @@ +chr11 537000 40 50 51 diff --git a/samtools-stats/tests/nextflow.config b/samtools-stats/tests/nextflow.config new file mode 100644 index 0000000..4e214a1 --- /dev/null +++ b/samtools-stats/tests/nextflow.config @@ -0,0 +1 @@ +includeConfig '../nextflow.config' diff --git a/samtools-stats/tests/test-job-1.json b/samtools-stats/tests/test-job-1.json new file mode 100644 index 0000000..1040a28 --- /dev/null +++ b/samtools-stats/tests/test-job-1.json @@ -0,0 +1,8 @@ +{ + "aligned_seq": "input/SA610149.0.20200122.wgs.grch38.cram", + "ref_genome_gz": "input/tiny-grch38-chr11-530001-537000.fa.gz", + "expected_output": "expected/expected.SA610149.0.20200122.wgs.grch38.cram.samtools_stats.qc.tgz", + "publish_dir": "outdir", + "cpus": 2, + "mem": 1 +} diff --git a/samtools-stats/tests/wfpr_modules b/samtools-stats/tests/wfpr_modules new file mode 120000 index 0000000..de8975c --- /dev/null +++ b/samtools-stats/tests/wfpr_modules @@ -0,0 +1 @@ +../wfpr_modules \ No newline at end of file diff --git a/samtools-stats/wfpr_modules b/samtools-stats/wfpr_modules new file mode 120000 index 0000000..de8975c --- /dev/null +++ b/samtools-stats/wfpr_modules @@ -0,0 +1 @@ +../wfpr_modules \ No newline at end of file diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/.dockerignore b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/.dockerignore new file mode 100644 index 0000000..71266ec --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/.dockerignore @@ -0,0 +1,5 @@ +.gitignore +.nextflow* +tests +work +outdir diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/.gitignore b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/.gitignore new file mode 100644 index 0000000..a50828c --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/.gitignore @@ -0,0 +1,69 @@ +*.py[cod] + +# C extensions +*.so + +# Packages +*.egg +*.egg-info +dist +build +eggs +.eggs +parts +bin +var +sdist +develop-eggs +.installed.cfg +lib +lib64 +venv*/ +pyvenv*/ + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox +.coverage.* +nosetests.xml +coverage.xml +htmlcov + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject +.idea +*.iml +*.komodoproject + +# Complexity +output/*.html +output/*/index.html + +# Sphinx +docs/_build + +.DS_Store +*~ +.*.sw[po] +.build +.ve +.env +.cache +.pytest +.bootstrap +.appveyor.token +*.bak +*.log +.vscode +.python-version +.nextflow* +work +outdir diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/Dockerfile b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/Dockerfile new file mode 100644 index 0000000..ae0e0c8 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/Dockerfile @@ -0,0 +1,11 @@ +FROM ubuntu:20.04 + +LABEL org.opencontainers.image.source https://github.com/icgc-argo/data-processing-utility-tools + +RUN groupadd -g 1000 ubuntu && \ + useradd -l -u 1000 -g ubuntu ubuntu && \ + install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu + +USER ubuntu + +CMD ["/bin/bash"] diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/main.nf b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/main.nf new file mode 100755 index 0000000..e3eb963 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/main.nf @@ -0,0 +1,55 @@ +#!/usr/bin/env nextflow + +/* + Copyright (C) 2021, Ontario Institute for Cancer Research + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + Authors: + Junjun Zhang +*/ + +/********************************************************************/ +/* this block is auto-generated based on info from pkg.json where */ +/* changes can be made if needed, do NOT modify this block manually */ +nextflow.enable.dsl = 2 +version = '1.0.1' +/********************************************************************/ + + +// this is kind of like CWL's secondary files +def getSecondaryFiles(main_file, exts){ + if (!(exts instanceof List)) { + exit 1, "[getSecondaryFiles] param: exts must be a list of strings" + } + + def secondaryFiles = [] + for (ext in exts) { + if (ext.startsWith("^")) { + ext = ext.replace("^", "") + parts = main_file.split("\\.").toList() + parts.removeLast() + secondaryFiles.add((parts + [ext]).join(".")) + } else { + secondaryFiles.add(main_file + '.' + ext) + } + } + return secondaryFiles +} + + +// get specific secondary files for BWA alignment, ensure none is missing +def getBwaSecondaryFiles(main_file){ + return getSecondaryFiles(main_file, ['fai', 'sa', 'bwt', 'ann', 'amb', 'pac', 'alt']) +} diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/nextflow.config b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/nextflow.config new file mode 100644 index 0000000..f2cd1e3 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/nextflow.config @@ -0,0 +1,4 @@ +docker { + enabled = true + runOptions = '-u \$(id -u):\$(id -g)' +} diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/pkg.json b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/pkg.json new file mode 100644 index 0000000..248548a --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/pkg.json @@ -0,0 +1,38 @@ +{ + "name": "helper-functions", + "version": "1.0.1", + "description": "A collection of helper functions", + "main": "main.nf", + "deprecated": false, + "keywords": [ + "bioinformatics", + "utils", + "function", + "secondary file" + ], + "repository": { + "type": "git", + "url": "https://github.com/icgc-argo/data-processing-utility-tools.git" + }, + "container": { + "registries": [ + { + "registry": "ghcr.io", + "type": "docker", + "org": "icgc-argo", + "default": true + } + ] + }, + "dependencies": [], + "devDependencies": [], + "contributors": [ + { + "name": "Junjun Zhang", + "email": "junjun.ca@gmail.com" + } + ], + "license": "GNU Affero General Public License v3", + "bugReport": "https://github.com/icgc-argo/data-processing-utility-tools/issues", + "homepage": "https://github.com/icgc-argo/data-processing-utility-tools#readme" +} \ No newline at end of file diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/wfpr_modules b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/wfpr_modules new file mode 120000 index 0000000..1cc74ba --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.1/wfpr_modules @@ -0,0 +1 @@ +../../../../../wfpr_modules \ No newline at end of file