-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/main' into [email protected]
- Loading branch information
Showing
24 changed files
with
19,998 additions
and
18,459 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
.gitignore | ||
.nextflow* | ||
tests | ||
work | ||
outdir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
FROM python:3.9 | ||
|
||
LABEL org.opencontainers.image.source https://github.com/icgc-argo-workflows/icgc-argo-mutational-signatures-mgen | ||
|
||
### install python packages | ||
|
||
RUN pip install SigProfilerMatrixGenerator | ||
COPY refinstall.py . | ||
RUN python refinstall.py | ||
COPY ICGC_convert_matGen_parser.py . | ||
#CMD ["python" ,"./refinstall.py"] | ||
|
||
RUN mkdir -p /tools | ||
ENV PATH="/tools:${PATH}" | ||
COPY ICGC_convert_matGen_parser.py . /tools/ | ||
|
||
ENTRYPOINT ["python"] | ||
|
||
#CMD ["/bin/bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Mon Aug 22 10:51:36 2022 | ||
@author: Lancelot Seillier | ||
""" | ||
|
||
'''Import dependecies/packages''' | ||
|
||
import pandas as pd | ||
import os | ||
import shutil | ||
import argparse | ||
import SigProfilerMatrixGenerator as refCheck | ||
from SigProfilerMatrixGenerator.scripts import SigProfilerMatrixGeneratorFunc as matGen | ||
from SigProfilerMatrixGenerator import install as genInstall | ||
|
||
'''CMD line parser''' | ||
|
||
parser = argparse.ArgumentParser(prog='MatGenerator.py', description='Generate trinucleotide matrix using SigProfilerMatrixGenerator.') | ||
parser.add_argument('i', metavar='--input_path', type=str, help='Define input data file path (will be copied by SigProfilerMatrixGenerator)') | ||
parser.add_argument('o', metavar='--output_file', type=str, help='Define the name of the output MAF file.') | ||
parser.add_argument('ref', metavar='--reference_genome', type=str, help='Genome reference on which trinucleotide context should be interpolated (opt: GRCh37/38)') | ||
|
||
args = parser.parse_args() | ||
|
||
|
||
'''Read input data and generate df for data conversion.''' | ||
|
||
maf_raw = pd.read_table(args.i) | ||
maf_out_raw = pd.DataFrame(columns=["Hugo", "Entrez", "Center", "Genome", "Chrom", "Start", "End", "Strand", "Classification", "Type", "Ref", "Alt1", "Alt2", "dbSNP", "SNP_Val_status", "Tumor_sample"]) | ||
|
||
'''Assign values to the converted MAF format ''' | ||
|
||
maf_out_raw["Hugo"] = maf_raw["Hugo_Symbol"] | ||
maf_out_raw["Entrez"] = "." | ||
maf_out_raw["Center"] = "ICGC_consensus" | ||
maf_out_raw["Genome"] = "GRCh37" | ||
maf_out_raw["Chrom"] = maf_raw["Chromosome"] | ||
maf_out_raw["Start"] = maf_raw["Start_position"] | ||
maf_out_raw["End"] = maf_raw["End_position"] | ||
maf_out_raw["Strand"] = maf_raw["Strand"] | ||
maf_out_raw["Classification"] = maf_raw["Variant_Classification"] | ||
maf_out_raw["Type"] = maf_raw["Variant_Type"] | ||
maf_out_raw["Ref"] = maf_raw["Reference_Allele"] | ||
maf_out_raw["Alt1"] = maf_raw["Tumor_Seq_Allele1"] | ||
maf_out_raw["Alt2"] = maf_raw["Tumor_Seq_Allele2"] | ||
maf_out_raw["dbSNP"] = maf_raw["dbSNP_RS"] | ||
maf_out_raw["SNP_Val_status"] = maf_raw["dbSNP_Val_Status"] | ||
maf_out_raw["Tumor_sample"] = maf_raw[['Project_Code', 'Donor_ID']].apply(lambda x: '_'.join(x), axis=1) | ||
|
||
'''Print new MAF format to the folder under investigation ''' | ||
|
||
if os.path.exists('./maf/') == False: | ||
os.mkdir('maf') | ||
else: | ||
print('Output folder already exists... continue...') | ||
|
||
maf_out_raw.to_csv( './maf/' + args.o + '.maf', index = False, sep="\t") | ||
|
||
'''Generate the trinucleotide matrix using SigProfilerMatrixGenrator''' | ||
|
||
'''INSTALL REFERENCE GENOME - HAS TO BE PERFORMED ONLY ONCE FOR EACH RESPECTIVE GENOME REFERENCE ''' | ||
|
||
if os.path.exists(os.path.join(refCheck.__path__[0], 'references/chromosomes/tsb/', args.ref)) == True: | ||
print('Reference genome already installed, procede with matrix generation...') | ||
else: | ||
genInstall.install(args.ref, rsync=False) | ||
|
||
matrices = matGen.SigProfilerMatrixGeneratorFunc(args.o, args.ref, './maf', exome=False, bed_file=None, chrom_based=False, plot=False, tsb_stat=False, seqInfo=False) | ||
|
||
'''Move output and rename if required.''' | ||
|
||
shutil.move('./maf/output/SBS/' + args.o + '.SBS96.all', './Trinucleotide_matrix_' + args.o + '.txt') | ||
|
||
print('Task completed!') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Package matrixgenerator | ||
|
||
|
||
Please update this with a brief introduction of the package. | ||
|
||
|
||
## Package development | ||
|
||
The initial version of this package was created by the WorkFlow Package Manager CLI tool, please refer to | ||
the [documentation](https://wfpm.readthedocs.io) for details on the development procedure including | ||
versioning, updating, CI testing and releasing. | ||
|
||
|
||
## Inputs | ||
|
||
Please list all input parameters | ||
|
||
|
||
## Outputs | ||
|
||
Please list all outputs | ||
|
||
|
||
## Usage | ||
|
||
### Run the package directly | ||
|
||
With inputs prepared, you should be able to run the package directly using the following command. | ||
Please replace the params file with a real one (with all required parameters and input files). Example | ||
params file(s) can be found in the `tests` folder. | ||
|
||
``` | ||
nextflow run icgc-argo-workflows/icgc-argo-mutational-signatures/matrixgenerator/main.nf -r matrixgenerator.v0.1.0 -params-file <your-params-json-file> | ||
``` | ||
|
||
### Import the package as a dependency | ||
|
||
To import this package into another package as a dependency, please follow these steps at the | ||
importing package side: | ||
|
||
1. add this package's URI `github.com/icgc-argo-workflows/icgc-argo-mutational-signatures/[email protected]` in the `dependencies` list of the `pkg.json` file | ||
2. run `wfpm install` to install the dependency | ||
3. add the `include` statement in the main Nextflow script to import the dependent package from this path: `./wfpr_modules/github.com/icgc-argo-workflows/icgc-argo-mutational-signatures/[email protected]/main.nf` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
/* | ||
Copyright (c) 2021, ICGC ARGO | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. | ||
Authors: | ||
[Lancelot Seillier] | ||
*/ | ||
|
||
/********************************************************************/ | ||
/* this block is auto-generated based on info from pkg.json where */ | ||
/* changes can be made if needed, do NOT modify this block manually */ | ||
nextflow.enable.dsl = 2 | ||
version = '0.1.0' // package version | ||
|
||
container = [ | ||
'ghcr.io': 'ghcr.io/icgc-argo-workflows/icgc-argo-mutational-signatures.matrixgenerator' | ||
] | ||
default_container_registry = 'ghcr.io' | ||
/********************************************************************/ | ||
|
||
|
||
// universal params go here | ||
params.container_registry = "" | ||
params.container_version = "" | ||
params.container = "" | ||
|
||
params.cpus = 1 | ||
params.mem = 1 // GB | ||
params.publish_dir = "" // set to empty string will disable publishDir | ||
|
||
|
||
// tool specific parmas go here, add / change as needed | ||
params.input_file = "" | ||
params.output_pattern = "tmp" // output file name pattern | ||
|
||
|
||
process matrixgenerator { | ||
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" | ||
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir | ||
|
||
cpus params.cpus | ||
memory "${params.mem} GB" | ||
|
||
input: // input, make update as needed | ||
path input_file | ||
|
||
output: // output, make update as needed | ||
path "Trinucleotide_matrix_${params.output_pattern}.txt", emit: output_file | ||
|
||
script: | ||
// add and initialize variables here as needed | ||
|
||
""" | ||
mkdir -p output_dir | ||
python /tools/ICGC_convert_matGen_parser.py \ | ||
${input_file} \ | ||
${params.output_pattern} \ | ||
GRCh38 | ||
""" | ||
} | ||
|
||
|
||
// this provides an entry point for this main script, so it can be run directly without clone the repo | ||
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx | ||
workflow { | ||
matrixgenerator( | ||
file(params.input_file) | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
Copyright (c) 2021, ICGC ARGO | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. | ||
Authors: | ||
[Lancelot Seillier] | ||
""" | ||
|
||
import os | ||
import sys | ||
import argparse | ||
import subprocess | ||
|
||
|
||
def main(): | ||
""" | ||
Python implementation of tool: matrixgenerator | ||
This is auto-generated Python code, please update as needed! | ||
""" | ||
|
||
parser = argparse.ArgumentParser(description='Tool: matrixgenerator') | ||
parser.add_argument('-i', '--input-file', dest='input_file', type=str, | ||
help='Input file', required=True) | ||
parser.add_argument('-o', '--output-dir', dest='output_dir', type=str, | ||
help='Output directory', required=True) | ||
args = parser.parse_args() | ||
|
||
if not os.path.isfile(args.input_file): | ||
sys.exit('Error: specified input file %s does not exist or is not accessible!' % args.input_file) | ||
|
||
if not os.path.isdir(args.output_dir): | ||
sys.exit('Error: specified output dir %s does not exist or is not accessible!' % args.output_dir) | ||
|
||
subprocess.run(f"fastqc -o {args.output_dir} {args.input_file}", shell=True, check=True) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
docker { | ||
enabled = true | ||
runOptions = '-u \$(id -u):\$(id -g)' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ | ||
"name": "matrixgenerator", | ||
"version": "0.1.0", | ||
"description": "Generates trinucleotide matrices from ICGC-MAF files", | ||
"main": "main.nf", | ||
"deprecated": false, | ||
"keywords": [ | ||
"bioinformatics", | ||
"seq", | ||
"qc metrics" | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/icgc-argo-workflows/icgc-argo-mutational-signatures.git" | ||
}, | ||
"container": { | ||
"registries": [ | ||
{ | ||
"registry": "ghcr.io", | ||
"type": "docker", | ||
"org": "icgc-argo-workflows", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"dependencies": [], | ||
"devDependencies": [], | ||
"contributors": [ | ||
{ | ||
"name": "[Lancelot Seillier]", | ||
"email": "[email protected]" | ||
} | ||
], | ||
"license": "MIT", | ||
"bugReport": "https://github.com/icgc-argo-workflows/icgc-argo-mutational-signatures/issues", | ||
"homepage": "https://github.com/icgc-argo-workflows/icgc-argo-mutational-signatures#readme" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from SigProfilerMatrixGenerator import install as genInstall | ||
genInstall.install('GRCh38', rsync=False,bash=True) |
Oops, something went wrong.