Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into [email protected]
Browse files Browse the repository at this point in the history
  • Loading branch information
alvinwt committed Nov 8, 2022
2 parents 9537732 + 50c85f2 commit f5fbe53
Show file tree
Hide file tree
Showing 24 changed files with 19,998 additions and 18,459 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: Set up Python 3.6
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.6
python-version: 3.9

- name: Extract package name and version from branch name
id: get_pkg_info
Expand Down Expand Up @@ -80,10 +80,10 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: Set up Python 3.6
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.6
python-version: 3.9

- name: Install dependencies
run: |
Expand Down
5 changes: 5 additions & 0 deletions matrixgenerator/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.gitignore
.nextflow*
tests
work
outdir
19 changes: 19 additions & 0 deletions matrixgenerator/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM python:3.9

LABEL org.opencontainers.image.source https://github.com/icgc-argo-workflows/icgc-argo-mutational-signatures-mgen

### install python packages

RUN pip install SigProfilerMatrixGenerator
COPY refinstall.py .
RUN python refinstall.py
COPY ICGC_convert_matGen_parser.py .
#CMD ["python" ,"./refinstall.py"]

RUN mkdir -p /tools
ENV PATH="/tools:${PATH}"
COPY ICGC_convert_matGen_parser.py . /tools/

ENTRYPOINT ["python"]

#CMD ["/bin/bash"]
77 changes: 77 additions & 0 deletions matrixgenerator/ICGC_convert_matGen_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 22 10:51:36 2022
@author: Lancelot Seillier
"""

'''Import dependecies/packages'''

import pandas as pd
import os
import shutil
import argparse
import SigProfilerMatrixGenerator as refCheck
from SigProfilerMatrixGenerator.scripts import SigProfilerMatrixGeneratorFunc as matGen
from SigProfilerMatrixGenerator import install as genInstall

'''CMD line parser'''

parser = argparse.ArgumentParser(prog='MatGenerator.py', description='Generate trinucleotide matrix using SigProfilerMatrixGenerator.')
parser.add_argument('i', metavar='--input_path', type=str, help='Define input data file path (will be copied by SigProfilerMatrixGenerator)')
parser.add_argument('o', metavar='--output_file', type=str, help='Define the name of the output MAF file.')
parser.add_argument('ref', metavar='--reference_genome', type=str, help='Genome reference on which trinucleotide context should be interpolated (opt: GRCh37/38)')

args = parser.parse_args()


'''Read input data and generate df for data conversion.'''

maf_raw = pd.read_table(args.i)
maf_out_raw = pd.DataFrame(columns=["Hugo", "Entrez", "Center", "Genome", "Chrom", "Start", "End", "Strand", "Classification", "Type", "Ref", "Alt1", "Alt2", "dbSNP", "SNP_Val_status", "Tumor_sample"])

'''Assign values to the converted MAF format '''

maf_out_raw["Hugo"] = maf_raw["Hugo_Symbol"]
maf_out_raw["Entrez"] = "."
maf_out_raw["Center"] = "ICGC_consensus"
maf_out_raw["Genome"] = "GRCh37"
maf_out_raw["Chrom"] = maf_raw["Chromosome"]
maf_out_raw["Start"] = maf_raw["Start_position"]
maf_out_raw["End"] = maf_raw["End_position"]
maf_out_raw["Strand"] = maf_raw["Strand"]
maf_out_raw["Classification"] = maf_raw["Variant_Classification"]
maf_out_raw["Type"] = maf_raw["Variant_Type"]
maf_out_raw["Ref"] = maf_raw["Reference_Allele"]
maf_out_raw["Alt1"] = maf_raw["Tumor_Seq_Allele1"]
maf_out_raw["Alt2"] = maf_raw["Tumor_Seq_Allele2"]
maf_out_raw["dbSNP"] = maf_raw["dbSNP_RS"]
maf_out_raw["SNP_Val_status"] = maf_raw["dbSNP_Val_Status"]
maf_out_raw["Tumor_sample"] = maf_raw[['Project_Code', 'Donor_ID']].apply(lambda x: '_'.join(x), axis=1)

'''Print new MAF format to the folder under investigation '''

if os.path.exists('./maf/') == False:
os.mkdir('maf')
else:
print('Output folder already exists... continue...')

maf_out_raw.to_csv( './maf/' + args.o + '.maf', index = False, sep="\t")

'''Generate the trinucleotide matrix using SigProfilerMatrixGenrator'''

'''INSTALL REFERENCE GENOME - HAS TO BE PERFORMED ONLY ONCE FOR EACH RESPECTIVE GENOME REFERENCE '''

if os.path.exists(os.path.join(refCheck.__path__[0], 'references/chromosomes/tsb/', args.ref)) == True:
print('Reference genome already installed, procede with matrix generation...')
else:
genInstall.install(args.ref, rsync=False)

matrices = matGen.SigProfilerMatrixGeneratorFunc(args.o, args.ref, './maf', exome=False, bed_file=None, chrom_based=False, plot=False, tsb_stat=False, seqInfo=False)

'''Move output and rename if required.'''

shutil.move('./maf/output/SBS/' + args.o + '.SBS96.all', './Trinucleotide_matrix_' + args.o + '.txt')

print('Task completed!')
43 changes: 43 additions & 0 deletions matrixgenerator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Package matrixgenerator


Please update this with a brief introduction of the package.


## Package development

The initial version of this package was created by the WorkFlow Package Manager CLI tool, please refer to
the [documentation](https://wfpm.readthedocs.io) for details on the development procedure including
versioning, updating, CI testing and releasing.


## Inputs

Please list all input parameters


## Outputs

Please list all outputs


## Usage

### Run the package directly

With inputs prepared, you should be able to run the package directly using the following command.
Please replace the params file with a real one (with all required parameters and input files). Example
params file(s) can be found in the `tests` folder.

```
nextflow run icgc-argo-workflows/icgc-argo-mutational-signatures/matrixgenerator/main.nf -r matrixgenerator.v0.1.0 -params-file <your-params-json-file>
```

### Import the package as a dependency

To import this package into another package as a dependency, please follow these steps at the
importing package side:

1. add this package's URI `github.com/icgc-argo-workflows/icgc-argo-mutational-signatures/[email protected]` in the `dependencies` list of the `pkg.json` file
2. run `wfpm install` to install the dependency
3. add the `include` statement in the main Nextflow script to import the dependent package from this path: `./wfpr_modules/github.com/icgc-argo-workflows/icgc-argo-mutational-signatures/[email protected]/main.nf`
89 changes: 89 additions & 0 deletions matrixgenerator/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env nextflow

/*
Copyright (c) 2021, ICGC ARGO
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Authors:
[Lancelot Seillier]
*/

/********************************************************************/
/* this block is auto-generated based on info from pkg.json where */
/* changes can be made if needed, do NOT modify this block manually */
nextflow.enable.dsl = 2
version = '0.1.0' // package version

container = [
'ghcr.io': 'ghcr.io/icgc-argo-workflows/icgc-argo-mutational-signatures.matrixgenerator'
]
default_container_registry = 'ghcr.io'
/********************************************************************/


// universal params go here
params.container_registry = ""
params.container_version = ""
params.container = ""

params.cpus = 1
params.mem = 1 // GB
params.publish_dir = "" // set to empty string will disable publishDir


// tool specific parmas go here, add / change as needed
params.input_file = ""
params.output_pattern = "tmp" // output file name pattern


process matrixgenerator {
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir

cpus params.cpus
memory "${params.mem} GB"

input: // input, make update as needed
path input_file

output: // output, make update as needed
path "Trinucleotide_matrix_${params.output_pattern}.txt", emit: output_file

script:
// add and initialize variables here as needed

"""
mkdir -p output_dir
python /tools/ICGC_convert_matGen_parser.py \
${input_file} \
${params.output_pattern} \
GRCh38
"""
}


// this provides an entry point for this main script, so it can be run directly without clone the repo
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
workflow {
matrixgenerator(
file(params.input_file)
)
}
59 changes: 59 additions & 0 deletions matrixgenerator/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Copyright (c) 2021, ICGC ARGO
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Authors:
[Lancelot Seillier]
"""

import os
import sys
import argparse
import subprocess


def main():
"""
Python implementation of tool: matrixgenerator
This is auto-generated Python code, please update as needed!
"""

parser = argparse.ArgumentParser(description='Tool: matrixgenerator')
parser.add_argument('-i', '--input-file', dest='input_file', type=str,
help='Input file', required=True)
parser.add_argument('-o', '--output-dir', dest='output_dir', type=str,
help='Output directory', required=True)
args = parser.parse_args()

if not os.path.isfile(args.input_file):
sys.exit('Error: specified input file %s does not exist or is not accessible!' % args.input_file)

if not os.path.isdir(args.output_dir):
sys.exit('Error: specified output dir %s does not exist or is not accessible!' % args.output_dir)

subprocess.run(f"fastqc -o {args.output_dir} {args.input_file}", shell=True, check=True)


if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions matrixgenerator/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
docker {
enabled = true
runOptions = '-u \$(id -u):\$(id -g)'
}
37 changes: 37 additions & 0 deletions matrixgenerator/pkg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"name": "matrixgenerator",
"version": "0.1.0",
"description": "Generates trinucleotide matrices from ICGC-MAF files",
"main": "main.nf",
"deprecated": false,
"keywords": [
"bioinformatics",
"seq",
"qc metrics"
],
"repository": {
"type": "git",
"url": "https://github.com/icgc-argo-workflows/icgc-argo-mutational-signatures.git"
},
"container": {
"registries": [
{
"registry": "ghcr.io",
"type": "docker",
"org": "icgc-argo-workflows",
"default": true
}
]
},
"dependencies": [],
"devDependencies": [],
"contributors": [
{
"name": "[Lancelot Seillier]",
"email": "[email protected]"
}
],
"license": "MIT",
"bugReport": "https://github.com/icgc-argo-workflows/icgc-argo-mutational-signatures/issues",
"homepage": "https://github.com/icgc-argo-workflows/icgc-argo-mutational-signatures#readme"
}
2 changes: 2 additions & 0 deletions matrixgenerator/refinstall.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from SigProfilerMatrixGenerator import install as genInstall
genInstall.install('GRCh38', rsync=False,bash=True)
Loading

0 comments on commit f5fbe53

Please sign in to comment.