diff --git a/.bumpversion.cfg b/.bumpversion.cfg new file mode 100644 index 0000000..a7ac861 --- /dev/null +++ b/.bumpversion.cfg @@ -0,0 +1,8 @@ +[bumpversion] +current_version = 2.1.8 +commit = True +tag = True +tag_name = {new_version} +message = Bump version: {current_version} -> {new_version} [skip ci] + +[bumpversion:file:fusion_report/__init__.py] diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c545b67..066d0da 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -9,3 +9,9 @@ Thank you for contribution to `fusion-report` project. - [ ] Documentation in `docs` is updated - [ ] `CHANGELOG.md` is updated - [ ] `README` is updated + +### This [version](https://semver.org/) is a + +- [ ] **MAJOR** - when you make incompatible API changes +- [ ] **MINOR** - when you add functionality in a backwards compatible manner +- [ ] **PATCH** - when you make backwards compatible bug fixes or documentation/instructions diff --git a/.github/workflows/bump_version.yml b/.github/workflows/bump_version.yml new file mode 100644 index 0000000..1db481d --- /dev/null +++ b/.github/workflows/bump_version.yml @@ -0,0 +1,17 @@ +name: "Bump Version" +on: + push: + branches: + - "master" +jobs: + bump-version: + runs-on: ubuntu-latest + name: Bump version and push tags to master + steps: + - name: Bump version + uses: Clinical-Genomics/bump2version-ci@v3 + env: + BUMPVERSION_TOKEN: ${{ secrets.BUMPVERSION_TOKEN }} + BUMPVERSION_AUTHOR: ${{ secrets.BUMPVERSION_AUTHOR }} + BUMPVERSION_EMAIL: ${{ secrets.BUMPVERSION_EMAIL }} + GITHUB_REPOSITORY: ${{ github.repository }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 344828b..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: CI [dev] - -on: - push: - branches: [ dev ] - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.6 - uses: actions/setup-python@v1 - with: - python-version: 3.6 - - - name: Install dependencies & package - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt -r requirements-dev.txt - python setup.py install - - - name: Lint - run: | - flake8 fusion_report/ - mypy fusion_report/ - - - name: Quick test - run: | - fusion_report --help - fusion_report run --help - fusion_report download --help diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml new file mode 100644 index 0000000..4688855 --- /dev/null +++ b/.github/workflows/integration_tests.yml @@ -0,0 +1,33 @@ +name: Integration Tests + +on: + pull_request: + branches: [master] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + + - name: Install dependencies & package + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + python setup.py install + + - name: Test download of databases + run: fusion_report download --cosmic_usr "${{ secrets.COSMIC_USERNAME }}" --cosmic_passwd "${{ secrets.COSMIC_PASSWD }}" ./db + + - name: Test run + run: | + fusion_report run "example-sample" ./docs/example ./db/ \ + --arriba tests/test_data/arriba.tsv \ + --fusioncatcher tests/test_data/fusioncatcher.txt \ + --starfusion tests/test_data/starfusion.tsv \ + --export csv diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..406d1cf --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,21 @@ +name: Black + +on: [pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Check out git repository + uses: actions/checkout@v3 + + - name: Setup Python 3.8 + uses: actions/setup-python@v4 + with: + python-version: 3.8 + + - name: Set up Black + uses: psf/black@stable + with: + options: ". --check --line-length 100" + version: "~=23.3.0" diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml deleted file mode 100644 index b9f4518..0000000 --- a/.github/workflows/pr.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: CI [pull request] - -on: - pull_request: - branches: [ master ] - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.6 - uses: actions/setup-python@v1 - with: - python-version: 3.6 - - - name: Install dependencies & package - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt -r requirements-dev.txt - python setup.py install - - - name: Lint - run: | - flake8 fusion_report/ - mypy fusion_report/ - - - name: Test - run: fusion_report download --cosmic_usr "${{ secrets.USERNAME }}" --cosmic_passwd "${{ secrets.PASSWORD }}" ./db - - - name: Run application - run: | - fusion_report run "example-sample" ./example ./db/ \ - --arriba tests/test_data/arriba.tsv \ - --dragen tests/test_data/dragen.tsv \ - --ericscript tests/test_data/ericscript.tsv \ - --fusioncatcher tests/test_data/fusioncatcher.txt \ - --pizzly tests/test_data/pizzly.tsv \ - --squid tests/test_data/squid.txt \ - --starfusion tests/test_data/starfusion.tsv \ - --export csv diff --git a/.github/workflows/publish_image.yml b/.github/workflows/publish_image.yml new file mode 100644 index 0000000..4e7a86c --- /dev/null +++ b/.github/workflows/publish_image.yml @@ -0,0 +1,32 @@ +name: Build and publish image on new release event + +on: + release: + types: + - created + +jobs: + docker-image-release_push: + runs-on: ubuntu-latest + steps: + - name: Check Out Repo + uses: actions/checkout@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + + - name: Build and push + id: docker_build + uses: docker/build-push-action@v3 + with: + context: ./ + file: ./Dockerfile + push: true + tags: "clinicalgenomics/fusion-report:${{github.event.release.tag_name}}" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index ff53e50..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: CI [master] - -on: - push: - branches: [ master ] - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.6 - uses: actions/setup-python@v1 - with: - python-version: 3.6 - - - name: Install dependencies & package - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt -r requirements-dev.txt - python setup.py install - - - name: Lint - run: | - flake8 fusion_report/ - mypy fusion_report/ - - - name: Test - run: fusion_report download --cosmic_usr "${{ secrets.USERNAME }}" --cosmic_passwd "${{ secrets.PASSWORD }}" ./db - - - name: Run application - run: | - fusion_report run "example-sample" ./docs/example ./db/ \ - --arriba tests/test_data/arriba.tsv \ - --dragen tests/test_data/dragen.tsv \ - --ericscript tests/test_data/ericscript.tsv \ - --fusioncatcher tests/test_data/fusioncatcher.txt \ - --pizzly tests/test_data/pizzly.tsv \ - --squid tests/test_data/squid.txt \ - --starfusion tests/test_data/starfusion.tsv \ - --export csv - - - name: Build example - run: | - git config user.name "GitHub Actions Bot" - git config user.email "<>" - git add docs/example - git commit -m "docs: generate example ($(echo ${GITHUB_SHA} | cut -c1-7))" - git push origin master - - - name: Publish documentation - uses: peaceiris/actions-gh-pages@v3 - if: github.event_name == 'push' && github.ref == 'refs/heads/master' - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_branch: gh-pages - publish_dir: ./docs diff --git a/.gitignore b/.gitignore index 234e374..97d2a6b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ tests/sss *.log .tox/ .DS_Store +testfusionreport.sh +thisismydb/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index ef54ebc..f4111ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.1.8] + +### Removed + +- Removed FusionGDB + ## [2.1.5](https://github.com/matq007/fusion-report/releases/tag/2.1.5) ### Added diff --git a/README.md b/README.md index ad1bfc2..89eaddb 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ python3 setup.py install ```bash # Download required databases -# Currently supported databases: FusionGDB, Mitelman and COSMIC +# Currently supported databases: FusionGDB2, Mitelman and COSMIC # COSMIC requires login credentials to download Fusion gene Database fusion_report download --cosmic_usr '' --cosmic_passwd '' /path/to/db/ diff --git a/docs/add_database.md b/docs/add_database.md index 1011758..05ed6bf 100644 --- a/docs/add_database.md +++ b/docs/add_database.md @@ -31,7 +31,7 @@ class Test(Db, metaclass=Singleton): ```python local_fusions: Dict[str, List[str]] = { - FusionGDB(path).name: FusionGDB(path).get_all_fusions(), + FusionGDB2(path).name: FusionGDB2(path).get_all_fusions(), MitelmanDB(path).name: MitelmanDB(path).get_all_fusions(), CosmicDB(path).name: CosmicDB(path).get_all_fusions(), TestDB(path).name: TestDB(path).get_all_fusions() # add your database here diff --git a/docs/download.md b/docs/download.md index 6c7c6bf..3bf2cb4 100644 --- a/docs/download.md +++ b/docs/download.md @@ -2,7 +2,7 @@ Currently the tool supports three different databases: -* [FusionGDB](https://ccsm.uth.edu/FusionGDB/index.html) +* [FusionGDB2](https://compbio.uth.edu/FusionGDB2/tables) * [Mitelman](https://cgap.nci.nih.gov/Chromosomes/Mitelman) * [COSMIC](https://cancer.sanger.ac.uk/cosmic/fusion) @@ -15,24 +15,19 @@ fusion_report download /path/to/db ``` -## Manual download - -### FusionGDB - -Website: [https://ccsm.uth.edu/FusionGDB/index.html](https://ccsm.uth.edu/FusionGDB/index.html) +With a non-academic/research login -> using QIAGEN with a commercial license: ```bash -# Download all files -wget --no-check-certificate https://ccsm.uth.edu/FusionGDB/tables/TCGA_ChiTaRS_combined_fusion_information_on_hg19.txt -O TCGA_ChiTaRS_combined_fusion_information_on_hg19.txt -wget --no-check-certificate https://ccsm.uth.edu/FusionGDB/tables/TCGA_ChiTaRS_combined_fusion_ORF_analyzed_gencode_h19v19.txt -O TCGA_ChiTaRS_combined_fusion_ORF_analyzed_gencode_h19v19.txt -wget --no-check-certificate https://ccsm.uth.edu/FusionGDB/tables/uniprot_gsymbol.txt -O uniprot_gsymbol.txt -wget --no-check-certificate https://ccsm.uth.edu/FusionGDB/tables/fusion_uniprot_related_drugs.txt -O fusion_uniprot_related_drugs.txt -wget --no-check-certificate https://ccsm.uth.edu/FusionGDB/tables/fusion_ppi.txt -O fusion_ppi.txt -wget --no-check-certificate https://ccsm.uth.edu/FusionGDB/tables/fgene_disease_associations.txt -O fgene_disease_associations.txt -# Create database and import the data -sqlite3 fusiongdb.db < fusion_report/db/FusionGDB.sql +fusion_report download + --cosmic_usr '' + --cosmic_passwd 'QIAGEN ' + --qiagen + /path/to/db ``` + +## Manual download + ### Mitelman Website: [https://cgap.nci.nih.gov/Chromosomes/Mitelman](https://cgap.nci.nih.gov/Chromosomes/Mitelman) diff --git a/docs/score.md b/docs/score.md index 3bf6d0a..85a09cf 100644 --- a/docs/score.md +++ b/docs/score.md @@ -4,7 +4,7 @@ One disadvantage of the tools is that they tend to report false positive results estimated score for a fusion. The only way how to correctly verify a fusion is biologically (RT-qPCR …). **fusion-report** uses weighted approach of assigning weights to tools and databases. By default, each tool -is assigned the same weight. This is because each tool uses different approach of discovering fusions and +is assigned the same weight. This is because each tool uses different approach of discovering fusions and report different results, for example FusionCatcher will work best on somatic samples. You can customize weight of individual tool by specific parameter `_weight 30`. @@ -12,9 +12,8 @@ The sum of the weights has to be 100! Currently weights for databases are not adjustable. The weights for databases are as follows: -* FusionGDB (20) -* COSMIC (40) -* MITELMAN (40) +* COSMIC (50) +* MITELMAN (50) * FusionGDB2 (0) > It is strongly suggested to use all supported databases in order to get the best estimated score. diff --git a/fusion_report/__init__.py b/fusion_report/__init__.py index e69de29..86931fa 100644 --- a/fusion_report/__init__.py +++ b/fusion_report/__init__.py @@ -0,0 +1,2 @@ +__title__ = "fusion_report" +__version__ = "2.1.8" diff --git a/fusion_report/app.py b/fusion_report/app.py index 5d5295b..8162be6 100644 --- a/fusion_report/app.py +++ b/fusion_report/app.py @@ -21,7 +21,6 @@ from fusion_report.common.models.fusion import Fusion from fusion_report.common.report import Report from fusion_report.data.cosmic import CosmicDB -from fusion_report.data.fusiongdb import FusionGDB from fusion_report.data.fusiongdb2 import FusionGDB2 from fusion_report.data.mitelman import MitelmanDB from fusion_report.download import Download @@ -56,24 +55,26 @@ def run(self): """ params = self.args.parse() try: - if params.command == 'run': - Logger(__name__).info('Running application...') + if params.command == "run": + Logger(__name__).info("Running application...") self.preprocess(params) self.generate_report(params) self.export_results(params.output, params.export) self.generate_multiqc( - params.output, self.manager.fusions, - params.sample, len(self.manager.running_tools) + params.output, + self.manager.fusions, + params.sample, + len(self.manager.running_tools), ) self.generate_fusion_list(params.output, params.tool_cutoff) - elif params.command == 'download': - Logger(__name__).info('Downloading resources...') + elif params.command == "download": + Logger(__name__).info("Downloading resources...") Download(params) - elif params.command == 'sync': - Logger(__name__).info('Synchronizing databases...') + elif params.command == "sync": + Logger(__name__).info("Synchronizing databases...") Sync(params) else: - sys.exit(f'Command {params.command} not recognized!') + sys.exit(f"Command {params.command} not recognized!") except (AppException, DbException, DownloadException, IOError) as ex: raise AppException(ex) @@ -86,44 +87,26 @@ def preprocess(self, params: Namespace) -> None: def generate_report(self, params: Namespace) -> None: """Generate fusion report with all pages.""" report = Report(params.config, params.output) - fusions = [fusion for fusion in self.manager.fusions if len(fusion.tools) >= params.tool_cutoff] + fusions = [ + fusion for fusion in self.manager.fusions if len(fusion.tools) >= params.tool_cutoff + ] index_page = report.create_page( - 'Summary', filename='index.html', page_variables={'sample': params.sample} + "Summary", filename="index.html", page_variables={"sample": params.sample} ) index_page.add_module( - 'index_summary', self.manager, params={'tool_cutoff': params.tool_cutoff} + "index_summary", self.manager, params={"tool_cutoff": params.tool_cutoff} ) report.render(index_page) with tqdm(total=len(fusions)) as pbar: for fusion in fusions: fusion_page = report.create_page( - fusion.name, page_variables={'sample': params.sample} - ) - fusion_page.add_module('fusion_summary', params={'fusion': fusion}) - fusion_page.add_module( - 'fusiongdb.variations', - params={'fusion': fusion.name, 'db_path': params.db_path} - ) - fusion_page.add_module( - 'fusiongdb.transcripts', - params={'fusion': fusion.name, 'db_path': params.db_path} - ) - fusion_page.add_module( - 'fusiongdb.ppi', - params={'fusion': fusion.name, 'db_path': params.db_path} - ) - fusion_page.add_module( - 'fusiongdb.drugs', - params={'fusion': fusion.name, 'db_path': params.db_path} - ) - fusion_page.add_module( - 'fusiongdb.diseases', - params={'fusion': fusion.name, 'db_path': params.db_path} + fusion.name, page_variables={"sample": params.sample} ) + fusion_page.add_module("fusion_summary", params={"fusion": fusion}) report.render(fusion_page) - pbar.set_description(f'Processing {fusion.name}') + pbar.set_description(f"Processing {fusion.name}") time.sleep(0.1) pbar.update(1) @@ -133,15 +116,14 @@ def parse_fusion_outputs(self, params: Dict[str, Any]) -> None: if param in self.manager.supported_tools and value: # param: fusion tool # value: fusion tool output - self.manager.parse(param, value, params['allow_multiple_gene_symbols']) + self.manager.parse(param, value, params["allow_multiple_gene_symbols"]) def enrich(self, path: str) -> None: """Enrich fusion with all relevant information from local databases.""" local_fusions: Dict[str, List[str]] = { CosmicDB(path).name: CosmicDB(path).get_all_fusions(), MitelmanDB(path).name: MitelmanDB(path).get_all_fusions(), - FusionGDB(path).name: FusionGDB(path).get_all_fusions(), - FusionGDB2(path).name: FusionGDB2(path).get_all_fusions() + FusionGDB2(path).name: FusionGDB2(path).get_all_fusions(), } for fusion in self.manager.fusions: for db_name, db_list in local_fusions.items(): @@ -153,43 +135,43 @@ def export_results(self, path: str, extension: str) -> None: Currently supporting file types: JSON and CSV """ dest = f"{os.path.join(path, 'fusions')}.{extension}" - if extension == 'json': - with open(dest, 'w', encoding='utf-8') as output: + if extension == "json": + with open(dest, "w", encoding="utf-8") as output: results = [fusion.json_serialize() for fusion in self.manager.fusions] output.write(rapidjson.dumps(results)) - elif extension == 'csv': - with open(dest, "w", encoding='utf-8') as output: + elif extension == "csv": + with open(dest, "w", encoding="utf-8") as output: csv_writer = csv.writer( - output, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL + output, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL ) # header - header = ['Fusion', 'Databases', 'Score', 'Explained score'] + header = ["Fusion", "Databases", "Score", "Explained score"] header.extend([x for x in sorted(self.manager.running_tools)]) csv_writer.writerow(header) for fusion in self.manager.fusions: row: List[Any] = [ fusion.name, - ','.join(fusion.dbs), + ",".join(fusion.dbs), fusion.score, fusion.score_explained, ] for tool in sorted(self.manager.running_tools): if tool in fusion.tools.keys(): row.append( - ','.join([ - f'{key}: {value}' for key, value in fusion.tools[tool].items() - ]) + ",".join( + [f"{key}: {value}" for key, value in fusion.tools[tool].items()] + ) ) else: - row.append('') + row.append("") csv_writer.writerow(row) else: - Logger(__name__).error('Export output %s not supported', extension) + Logger(__name__).error("Export output %s not supported", extension) def generate_fusion_list(self, path: str, cutoff: int): """ Helper function that generates file containing list of found fusions and filtered list of - fusions. One of these files ise used by FusionInspector to visualize the fusions. + fusions. One of these files is used by FusionInspector to visualize the fusions. Input for FusionInspector expects list of fusions in format `geneA--geneB\n`. Returns: @@ -197,15 +179,15 @@ def generate_fusion_list(self, path: str, cutoff: int): - fusions_list_filtered.tsv """ # unfiltered list - with open(os.path.join(path, 'fusion_list.tsv'), 'w', encoding='utf-8') as output: + with open(os.path.join(path, "fusion_list.tsv"), "w", encoding="utf-8") as output: for fusion in self.manager.fusions: - output.write(f'{fusion.name}\n') + output.write(f"{fusion.name}\n") # filtered list - with open(os.path.join(path, 'fusion_list_filtered.tsv'), 'w', encoding='utf-8') as output: + with open(os.path.join(path, "fusion_list_filtered.tsv"), "w", encoding="utf-8") as output: for fusion in self.manager.fusions: if len(fusion.tools) >= cutoff: - output.write(f'{fusion.name}\n') + output.write(f"{fusion.name}\n") def score(self, params: Dict[str, Any]) -> None: """Custom scoring function for individual fusion. @@ -214,13 +196,13 @@ def score(self, params: Dict[str, Any]) -> None: """ for fusion in self.manager.fusions: - # tool estimation tool_score: float = sum( - [params[f'{tool.lower()}_weight'] / 100.0 for tool, _ in fusion.tools.items()] + [params[f"{tool.lower()}_weight"] / 100.0 for tool, _ in fusion.tools.items()] ) tool_score_expl: List[str] = [ - format((params[f'{tool}_weight'] / 100.0), '.3f') for tool, _ in fusion.tools.items() + format((params[f"{tool}_weight"] / 100.0), ".3f") + for tool, _ in fusion.tools.items() ] # database estimation @@ -228,41 +210,42 @@ def score(self, params: Dict[str, Any]) -> None: float(Settings.FUSION_WEIGHTS[db_name.lower()]) for db_name in fusion.dbs ) db_score_expl: List[str] = [ - format(Settings.FUSION_WEIGHTS[db_name.lower()], '.3f') for db_name in fusion.dbs + format(Settings.FUSION_WEIGHTS[db_name.lower()], ".3f") for db_name in fusion.dbs ] - score: float = float('%0.3f' % (0.5 * tool_score + 0.5 * db_score)) - score_explained = f'0.5 * ({" + ".join(tool_score_expl)}) + 0.5 * ({" + ".join(db_score_expl)})' + score: float = float("%0.3f" % (0.5 * tool_score + 0.5 * db_score)) + score_explained = ( + f'0.5 * ({" + ".join(tool_score_expl)}) + 0.5 * ({" + ".join(db_score_expl)})' + ) fusion.score, fusion.score_explained = score, score_explained @staticmethod - def generate_multiqc(path: str, fusions: List[Fusion], - sample_name: str, running_tools_count: int) -> None: + def generate_multiqc( + path: str, fusions: List[Fusion], sample_name: str, running_tools_count: int + ) -> None: """Helper function that generates MultiQC Fusion section (`fusion_genes_mqc.json`).""" counts: Dict[str, int] = defaultdict(lambda: 0) for fusion in fusions: tools = fusion.dbs if len(tools) == running_tools_count: - counts['together'] += 1 + counts["together"] += 1 for tool in tools: counts[tool] += 1 configuration = { - 'id': 'fusion_genes', - 'section_name': 'Fusion genes', - 'description': 'Number of fusion genes found by various tools', - 'plot_type': 'bargraph', - 'pconfig': { - 'id': 'barplot_config_only', - 'title': 'Detected fusion genes', - 'ylab': 'Number of detected fusion genes' + "id": "fusion_genes", + "section_name": "Fusion genes", + "description": "Number of fusion genes found by various tools", + "plot_type": "bargraph", + "pconfig": { + "id": "barplot_config_only", + "title": "Detected fusion genes", + "ylab": "Number of detected fusion genes", }, - 'data': { - sample_name: counts - } + "data": {sample_name: counts}, } dest = f"{os.path.join(path, 'fusion_genes_mqc.json')}" - with open(dest, 'w', encoding='utf-8') as output: + with open(dest, "w", encoding="utf-8") as output: output.write(rapidjson.dumps(configuration)) diff --git a/fusion_report/args_builder.py b/fusion_report/args_builder.py index dd7aa46..34c1114 100644 --- a/fusion_report/args_builder.py +++ b/fusion_report/args_builder.py @@ -19,105 +19,118 @@ class ArgsBuilder: """ def __init__(self): - configuration = os.path.join(Settings.ROOT_DIR, 'arguments.json') - self.arguments: Dict[str, Any] = rapidjson.loads(open(configuration, 'r').read()) - self.arguments['weight'] = float(100 / len(self.supported_tools)) + configuration = os.path.join(Settings.ROOT_DIR, "arguments.json") + self.arguments: Dict[str, Any] = rapidjson.loads(open(configuration, "r").read()) + self.arguments["weight"] = float(100 / len(self.supported_tools)) self.parser = ArgumentParser( - description='''Tool for generating friendly UI custom report.''' + description="""Tool for generating friendly UI custom report.""" ) self.parser.add_argument( - '--version', '-v', - action='version', - version=f'fusion-report {Settings.VERSION}' + "--version", + "-v", + action="version", + version=f"fusion-report {Settings.VERSION}", ) - self.command_parser: _SubParsersAction = self.parser.add_subparsers(dest='command') + self.command_parser: _SubParsersAction = self.parser.add_subparsers(dest="command") @property def supported_tools(self): """Return all supported fusion detection tools.""" - return [tool['key'].replace('--', '') for tool in self.arguments['args']['run']['tools']] + return [tool["key"].replace("--", "") for tool in self.arguments["args"]["run"]["tools"]] def build(self) -> None: """Build command-line arguments.""" - self.run_args(self.arguments['args']['run'], self.arguments['weight']) - self.download_args(self.arguments['args']['download']) - self.sync_args(self.arguments['args']['download']) + self.run_args(self.arguments["args"]["run"], self.arguments["weight"]) + self.download_args(self.arguments["args"]["download"]) + self.sync_args(self.arguments["args"]["download"]) def run_args(self, args, weight) -> None: """Build run command-line arguments.""" - run_parser = self.command_parser.add_parser('run', help='Run application') + run_parser = self.command_parser.add_parser("run", help="Run application") # mandatory run_mandatory = run_parser.add_argument_group( - 'Mandatory arguments', 'Required arguments to run app.' + "Mandatory arguments", "Required arguments to run app." ) - for mandatory in args['mandatory']: - run_mandatory.add_argument(mandatory['key'], help=mandatory['help'], type=str) + for mandatory in args["mandatory"]: + run_mandatory.add_argument(mandatory["key"], help=mandatory["help"], type=str) # fusion tools run_tools = run_parser.add_argument_group( - 'Tools', 'List of all supported tools with their weights.' + "Tools", "List of all supported tools with their weights." ) - for tool in args['tools']: - run_tools.add_argument(tool['key'], help=tool['help'], type=str) + for tool in args["tools"]: + run_tools.add_argument(tool["key"], help=tool["help"], type=str) run_tools.add_argument( - f'{tool["key"]}_weight', help=tool['help'], - type=float, default=weight + f'{tool["key"]}_weight', help=tool["help"], type=float, default=weight ) # optionals run_optional = run_parser.add_argument_group( - 'Optionals', 'List of optional configuration parameters.' + "Optionals", "List of optional configuration parameters." ) - for optional in args['optionals']: - if len(optional['key']) > 1: - if optional.get('action'): + for optional in args["optionals"]: + if len(optional["key"]) > 1: + if optional.get("action"): run_optional.add_argument( - optional['key'][0], optional['key'][1], - help=optional.get('help'), - action=optional.get('action') + optional["key"][0], + optional["key"][1], + help=optional.get("help"), + action=optional.get("action"), ) - else: + else: run_optional.add_argument( - optional['key'][0], optional['key'][1], - default=optional.get('default'), help=optional.get('help'), - type=type(optional.get('default')) + optional["key"][0], + optional["key"][1], + default=optional.get("default"), + help=optional.get("help"), + type=type(optional.get("default")), ) else: - if optional.get('action'): + if optional.get("action"): run_optional.add_argument( - optional['key'][0], default=optional.get('default'), help=optional.get('help'), - action=optional.get('action') + optional["key"][0], + default=optional.get("default"), + help=optional.get("help"), + action=optional.get("action"), ) else: run_optional.add_argument( - optional['key'][0], default=optional.get('default'), help=optional.get('help'), - type=type(optional.get('default')) + optional["key"][0], + default=optional.get("default"), + help=optional.get("help"), + type=type(optional.get("default")), ) def download_args(self, args: Dict[str, Any]) -> None: """Build download command-line arguments.""" - download_parser = self.command_parser.add_parser('download', - help='Download required databases') - for mandatory in args['mandatory']: - download_parser.add_argument(mandatory['key'], help=mandatory['help'], type=str) + download_parser = self.command_parser.add_parser( + "download", help="Download required databases" + ) + for mandatory in args["mandatory"]: + download_parser.add_argument(mandatory["key"], help=mandatory["help"], type=str) self._cosmic(args, download_parser) def sync_args(self, args: Dict[str, Any]) -> None: """Build sync command-line arguments.""" - download_parser = self.command_parser.add_parser('sync', - help='Synchronize databases') - for mandatory in args['mandatory']: - download_parser.add_argument(mandatory['key'], help=mandatory['help'], type=str) + download_parser = self.command_parser.add_parser("sync", help="Synchronize databases") + for mandatory in args["mandatory"]: + download_parser.add_argument(mandatory["key"], help=mandatory["help"], type=str) self._cosmic(args, download_parser) def _cosmic(self, args: Dict[str, Any], parser) -> None: """Build COSMIC command-line arguments.""" download_cosmic = parser.add_argument_group( - 'COSMIC', '''Option credential parameters. You can either provide username and password - which will be used to generate base64 token or the token itself.''' + "COSMIC", + """Option credential parameters. You can either provide username and password + which will be used to generate base64 token or the token itself.""", ) - for cosmic in args['cosmic']: - download_cosmic.add_argument(cosmic['key'], help=cosmic['help'], type=str) + for cosmic in args["cosmic"]: + if not cosmic.get("action"): + download_cosmic.add_argument(cosmic["key"], help=cosmic.get("help"), type=str) + else: + download_cosmic.add_argument( + cosmic["key"], help=cosmic.get("help"), action=cosmic.get("action") + ) def parse(self) -> Namespace: """Parse arguments.""" diff --git a/fusion_report/arguments.json b/fusion_report/arguments.json index 2b1ca12..71c5f7a 100644 --- a/fusion_report/arguments.json +++ b/fusion_report/arguments.json @@ -91,6 +91,11 @@ { "key": "--cosmic_token", "help": "COSMIC token" + }, + { + "key": "--qiagen", + "help": "Use QIAGEN to download COSMIC db (commercial usage)", + "action": "store_true" } ] } diff --git a/fusion_report/common/base_page.py b/fusion_report/common/base_page.py index 0a953d2..e147519 100644 --- a/fusion_report/common/base_page.py +++ b/fusion_report/common/base_page.py @@ -15,9 +15,10 @@ class BasePage: view: View modules: Custom modules """ + def __init__(self, title: str, view: str, filename: str = None) -> None: self.title: str = title.strip() - self.view: str = f'views/{view}.html' + self.view: str = f"views/{view}.html" self.modules: Dict[str, Any] = {} self.filename: str = filename if filename else self._set_filename(title) @@ -26,15 +27,11 @@ def add_module(self, name: str, manager: FusionManager = None, params=None) -> N if name not in self.modules: self.modules[name] = ModuleLoader(manager, params).exec(name) else: - Logger(__name__).warning('Module %s already loaded', name) + Logger(__name__).warning("Module %s already loaded", name) def get_content(self) -> Dict[str, Any]: """Helper serialization method for templating engine.""" - return { - 'title': self.title, - 'filename': self.filename, - 'view': self.view - } + return {"title": self.title, "filename": self.filename, "view": self.view} @staticmethod def _set_filename(fusion: str) -> str: @@ -46,8 +43,8 @@ def _set_filename(fusion: str) -> str: Returns: str: filename of the fusion """ - for char in ['/', '\\', '--']: + for char in ["/", "\\", "--"]: if char in fusion: - fusion = fusion.replace(char, '_') + fusion = fusion.replace(char, "_") - return f'{fusion}.html' + return f"{fusion}.html" diff --git a/fusion_report/common/db.py b/fusion_report/common/db.py index 52f99d3..da5d5c1 100644 --- a/fusion_report/common/db.py +++ b/fusion_report/common/db.py @@ -21,7 +21,7 @@ class Db: def __init__(self, path: str, name: str, schema: str) -> None: self.name: str = name self._schema: str = schema - self.database: str = f'{name.lower()}.db' + self.database: str = f"{name.lower()}.db" self.connection = self.connect(path, self.database) def connect(self, path: str, database: str): @@ -40,46 +40,47 @@ def connect(self, path: str, database: str): except sqlite3.DatabaseError as ex: raise DbException(ex) - def setup(self, files: List[str], delimiter: str = '', - skip_header=False, encoding='utf-8') -> None: + def setup( + self, files: List[str], delimiter: str = "", skip_header=False, encoding="utf-8" + ) -> None: """Sets up database. For most databases there is available schema and text files which - contain all the data. This methods builds database using it's schema and imports - all provided data files. + contain all the data. This methods builds database using it's schema and imports + all provided data files. - Args: - files: all necessary files required to be imported - delimiter: separator used in data files - skip_header: ignore header when importing files, default: False - encoding: data file encoding, some files are not using utf-8 as default (Mitelman) + Args: + files: all necessary files required to be imported + delimiter: separator used in data files + skip_header: ignore header when importing files, default: False + encoding: data file encoding, some files are not using utf-8 as default (Mitelman) - Raises: - DbException + Raises: + DbException """ try: # build database schema self.create_database() # import all data files except .sql files - for file in filter(lambda x: not x.endswith('.sql'), files): - with open(file, 'r', encoding=encoding) as resource: + for file in filter(lambda x: not x.endswith(".sql"), files): + with open(file, "r", encoding=encoding) as resource: if skip_header: next(resource) first_line: List[str] = resource.readline().split(delimiter) rows: List[List[str]] = [first_line] for line in resource: row = line.split(delimiter) - rows.append(row + ['' for _ in range(len(row), len(first_line))]) + rows.append(row + ["" for _ in range(len(row), len(first_line))]) self.connection.executemany( - f'''INSERT INTO {file.split('/')[-1].split('.')[0].lower()} - VALUES ({','.join(['?' for _ in range(0, len(first_line))])})''', - rows + f"""INSERT INTO {file.split('/')[-1].split('.')[0].lower()} + VALUES ({','.join(['?' for _ in range(0, len(first_line))])})""", + rows, ) self.connection.commit() except (IOError, sqlite3.Error) as ex: raise DbException(ex) def create_database(self): - """ Build database from schema file.""" - with open(self.schema, 'r', encoding='utf-8') as schema: + """Build database from schema file.""" + with open(self.schema, "r", encoding="utf-8") as schema: self.connection.executescript(schema.read().lower()) def select(self, query: str, params: List[str] = None): @@ -121,7 +122,7 @@ def execute(self, query: str, params: List[str] = None): @property def schema(self): """Returns database schema.""" - return os.path.join(Settings.ROOT_DIR, f'data/schema/{self._schema}') + return os.path.join(Settings.ROOT_DIR, f"data/schema/{self._schema}") @classmethod def __dict_factory(cls, cursor, row): diff --git a/fusion_report/common/exceptions/app.py b/fusion_report/common/exceptions/app.py index 92446a7..de1c125 100644 --- a/fusion_report/common/exceptions/app.py +++ b/fusion_report/common/exceptions/app.py @@ -3,4 +3,5 @@ class AppException(Exception): """Raised when application fails.""" + pass diff --git a/fusion_report/common/exceptions/config.py b/fusion_report/common/exceptions/config.py index 97450e1..c15518b 100644 --- a/fusion_report/common/exceptions/config.py +++ b/fusion_report/common/exceptions/config.py @@ -3,4 +3,5 @@ class ConfigException(Exception): """Raised when configuration parsing fails.""" + pass diff --git a/fusion_report/common/exceptions/db.py b/fusion_report/common/exceptions/db.py index f6432cd..b0ffd48 100644 --- a/fusion_report/common/exceptions/db.py +++ b/fusion_report/common/exceptions/db.py @@ -3,4 +3,5 @@ class DbException(Exception): """Raised when establishing connection with database.""" + pass diff --git a/fusion_report/common/exceptions/download.py b/fusion_report/common/exceptions/download.py index 4cae0fd..8944a05 100644 --- a/fusion_report/common/exceptions/download.py +++ b/fusion_report/common/exceptions/download.py @@ -3,4 +3,5 @@ class DownloadException(Exception): """Raised when issue occurs while downloading databases.""" + pass diff --git a/fusion_report/common/exceptions/module.py b/fusion_report/common/exceptions/module.py index c4102a6..4b3368e 100644 --- a/fusion_report/common/exceptions/module.py +++ b/fusion_report/common/exceptions/module.py @@ -3,4 +3,5 @@ class ModuleException(Exception): """Raised with errors related to CustomModule, BaseModule and Loader.""" + pass diff --git a/fusion_report/common/exceptions/report.py b/fusion_report/common/exceptions/report.py index 1d501d3..c445afb 100644 --- a/fusion_report/common/exceptions/report.py +++ b/fusion_report/common/exceptions/report.py @@ -3,4 +3,5 @@ class ReportException(Exception): """Raised when generating report.""" + pass diff --git a/fusion_report/common/fusion_manager.py b/fusion_report/common/fusion_manager.py index ef7dd80..5f7f4d6 100644 --- a/fusion_report/common/fusion_manager.py +++ b/fusion_report/common/fusion_manager.py @@ -15,6 +15,7 @@ class FusionManager: running_tools: List of executed fusion detection tools supported_tools: List of all supported fusion detection tools """ + def __init__(self, supported_tools: List[str]) -> None: self.fusions: List[Fusion] = [] self.running_tools: Set[str] = set() @@ -30,10 +31,10 @@ def parse(self, tool: str, file: str, allow_multiple_genes: bool) -> None: self.running_tools.add(tool) factory_parser = self.__build_factory(tool) try: - with open(file, 'r', encoding='utf-8') as fusion_output: - factory_parser.set_header(fusion_output.readline().replace('"', '')) + with open(file, "r", encoding="utf-8") as fusion_output: + factory_parser.set_header(fusion_output.readline().replace('"', "")) for line in fusion_output: - line = line.replace('"', '').strip() + line = line.replace('"', "").strip() fusion_list: List[Tuple[str, Dict[str, Any]]] = factory_parser.parse(line) if allow_multiple_genes is None and len(fusion_list) > 1: fusion_list = [fusion_list[0]] @@ -43,7 +44,8 @@ def parse(self, tool: str, file: str, allow_multiple_genes: bool) -> None: raise AppException(ex) else: Logger(__name__).error( - 'Tool %s is not supported. To integrate the tool please create an issue', tool + "Tool %s is not supported. To integrate the tool please create an issue", + tool, ) def add(self, fusion_name: str, tool: str, details: Dict[str, Any]) -> None: @@ -76,7 +78,7 @@ def __build_factory(tool: str): AppException """ try: - module_name: str = f'fusion_report.parsers.{tool.lower()}' + module_name: str = f"fusion_report.parsers.{tool.lower()}" module = __import__(module_name, fromlist=[tool.capitalize()]) klass = getattr(module, tool.capitalize()) return klass() diff --git a/fusion_report/common/logger.py b/fusion_report/common/logger.py index abef1f4..a65c401 100644 --- a/fusion_report/common/logger.py +++ b/fusion_report/common/logger.py @@ -17,12 +17,13 @@ class Logger(metaclass=Singleton): logger: Logger instance filename: Logger file name """ + logger: Any = {} def __init__(self, name: str) -> None: if not self.logger: self.logger = logging.getLogger(name) - self.filename = 'fusion_report.log' + self.filename = "fusion_report.log" self.logger.setLevel(logging.INFO) self.logger.addHandler(self.get_critical_handler(self.filename)) self.logger.addHandler(self.get_info_handler()) @@ -58,9 +59,9 @@ def warning(self, msg: str, *args) -> None: @staticmethod def get_critical_handler(filename: str) -> TimedRotatingFileHandler: """Logging handler for levels: CRITICAL, ERROR and WARNING.""" - file_handler = TimedRotatingFileHandler(filename, when='midnight') - file_handler.setFormatter(logging.Formatter( - '%(asctime)s - %(levelname)s - %(name)s - %(message)s') + file_handler = TimedRotatingFileHandler(filename, when="midnight") + file_handler.setFormatter( + logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") ) file_handler.setLevel(logging.WARNING) @@ -70,7 +71,7 @@ def get_critical_handler(filename: str) -> TimedRotatingFileHandler: def get_info_handler() -> logging.StreamHandler: """Logging handler for level INFO.""" info_handler = logging.StreamHandler(sys.stdout) - info_handler.setFormatter(ColoredFormatter('%(log_color)s%(message)s%(reset)s')) + info_handler.setFormatter(ColoredFormatter("%(log_color)s%(message)s%(reset)s")) info_handler.setLevel(logging.INFO) return info_handler diff --git a/fusion_report/common/models/fusion.py b/fusion_report/common/models/fusion.py index 316ce13..230921b 100644 --- a/fusion_report/common/models/fusion.py +++ b/fusion_report/common/models/fusion.py @@ -13,50 +13,51 @@ class Fusion: dbs: List of databases where fusion was found tools: List of tools which detected fusion """ + def __init__(self, name: str) -> None: self.name: str = name.strip() - self._score: Dict[str, Any] = {'score': 0, 'explained': ''} + self._score: Dict[str, Any] = {"score": 0, "explained": ""} self.dbs: List[str] = [] self.tools: Dict[str, Any] = {} @property def score(self) -> float: - return self._score['score'] + return self._score["score"] @score.setter def score(self, value: float) -> None: - self._score['score'] = float(value) + self._score["score"] = float(value) @property def score_explained(self) -> str: """Returns explanation of how the score was calculated.""" - return self._score['explained'] + return self._score["explained"] @score_explained.setter def score_explained(self, value: str) -> None: - self._score['explained'] = value + self._score["explained"] = value def add_tool(self, tool: str, details: Dict[str, Any]) -> None: """Add new fusion tool to the list.""" if tool and tool not in self.tools.keys(): self.tools[tool] = details else: - Logger(__name__).debug('Tool %s already in list or empty', tool) + Logger(__name__).debug("Tool %s already in list or empty", tool) def add_db(self, database: str) -> None: """Add new database to the list.""" if database and database not in self.dbs: self.dbs.append(database) else: - Logger(__name__).debug('Database %s already in list or empty', database) + Logger(__name__).debug("Database %s already in list or empty", database) def json_serialize(self) -> Dict[str, Any]: """Helper serialization method for templating engine.""" json: Dict[str, Any] = { - 'Fusion': self.name, - 'Databases': self.dbs, - 'Score': self.score, - 'Explained score': self.score_explained, + "Fusion": self.name, + "Databases": self.dbs, + "Score": self.score, + "Explained score": self.score_explained, } return {**json, **self.tools} diff --git a/fusion_report/common/net.py b/fusion_report/common/net.py index d7817a9..cb02165 100644 --- a/fusion_report/common/net.py +++ b/fusion_report/common/net.py @@ -3,112 +3,171 @@ import gzip import os import shutil -import ssl -import urllib.error -import urllib.request +import requests import time import pandas as pd from zipfile import ZipFile - +import subprocess +import json from argparse import Namespace from typing import List -import rapidjson - from fusion_report.common.exceptions.download import DownloadException from fusion_report.common.logger import Logger from fusion_report.data.cosmic import CosmicDB from fusion_report.settings import Settings -from multiprocessing import Pool -from fusion_report.data.fusiongdb import FusionGDB from fusion_report.data.fusiongdb2 import FusionGDB2 from fusion_report.data.mitelman import MitelmanDB -class Net: +LOG = Logger(__name__) + +class Net: @staticmethod def get_cosmic_token(params: Namespace): if params.cosmic_token is not None: return params.cosmic_token - if ( - params.cosmic_token is None - and (params.cosmic_usr is not None or params.cosmic_passwd is not None) - ): - return base64.b64encode( - f'{params.cosmic_usr}:{params.cosmic_passwd}'.encode() - ).decode('utf-8') + if params.cosmic_usr is not None and params.cosmic_passwd is not None: + return base64.b64encode(f"{params.cosmic_usr}:{params.cosmic_passwd}".encode()).decode( + "utf-8" + ) else: - raise DownloadException('COSMIC credentials have not been provided correctly') + raise DownloadException("COSMIC credentials have not been provided correctly") @staticmethod - def get_large_file(url: str, ignore_ssl: bool = False) -> None: - """Method for downloading a large file.""" - - ctx = None - if ignore_ssl: - ctx = ssl.create_default_context() - ctx.check_hostname = False - ctx.verify_mode = ssl.CERT_NONE - - if url.startswith('https') or url.startswith('ftp'): - try: - with urllib.request.urlopen(url, context=ctx) as response: - file = url.split('/')[-1].split('?')[0] - Logger(__name__).info('Downloading %s', file) - # only download if file size doesn't match - if not os.path.exists(file) or \ - (response.info()['Content-Length'] or 0) != os.stat(file).st_size: - with open(file, 'wb') as out_file: - shutil.copyfileobj(response, out_file) - except urllib.error.HTTPError as ex: - raise DownloadException(ex) + def run_qiagen_cmd(cmd, return_output=False, silent=False): + if not silent: + print(cmd) + if return_output: + output = subprocess.check_output(cmd, shell=True, executable="/bin/bash").strip() + return output else: - Logger(__name__).error('Downloading resources supports only HTTPS or FTP') + subprocess.check_call(cmd, shell=True, executable="/bin/bash") + + @staticmethod + def get_qiagen_files(token: str, output_path: str): + files_request = ( + "curl --stderr -s -X GET " + '-H "Content-Type: application/octet-stream" ' + '-H "Authorization: Bearer {token}" ' + '"https://my.qiagendigitalinsights.com/bbp/data/files/cosmic"' + " -o {output_path}qiagen_files.tsv" + ) + cmd = files_request.format(token=token, output_path=output_path) + return Net.run_qiagen_cmd(cmd, True, True) + + @staticmethod + def download_qiagen_file(token: str, file_id: str, output_path: str): + file_request = ( + "curl -s -X GET " + '-H "Content-Type: application/octet-stream" ' + '-H "Authorization: Bearer {token}" ' + '"https://my.qiagendigitalinsights.com/bbp/data/download/cosmic-download?name={file_id}"' + " -o {output_path}CosmicFusionExport.tsv.gz" + ) + cmd = file_request.format(token=token, file_id=file_id, output_path=output_path) + Net.run_qiagen_cmd(cmd, True, True) + + @staticmethod + def fetch_fusion_file_id(output_path: str): + df = pd.read_csv( + output_path + "/qiagen_files.tsv", + names=["file_id", "file_name", "genome_draft"], + sep="\t", + ) + file_id = df.loc[ + (df["file_name"] == Settings.COSMIC["FILE"]) & (df["genome_draft"] == "cosmic/GRCh38"), + "file_id", + ].values[0] + return file_id @staticmethod - def get_cosmic(token: str, return_err: List[str]) -> None: - """Method for download COSMIC database.""" + def get_cosmic_qiagen_token(params: Namespace): + token_request = ( + "curl -s -X POST " + '-H "Content-Type: application/x-www-form-urlencoded" ' + '-d "grant_type=password&client_id=603912630-14192122372034111918-SmRwso&username={uid}&password={pwd}" ' + '"https://apps.ingenuity.com/qiaoauth/oauth/token"' + ) + cmd = token_request.format(uid=params.cosmic_usr, pwd=params.cosmic_passwd) + token_response = Net.run_qiagen_cmd(cmd, True, True).decode("UTF-8") + return json.loads(token_response)["access_token"] + + @staticmethod + def get_large_file(url: str) -> None: + """Method for downloading a large file.""" + LOG.info(f"Downloading {url}") + try: + headers = {"User-Agent": "Mozilla/5.0"} + response = requests.get(url, headers=headers, stream=True) + + file = url.split("/")[-1].split("?")[0] + + if ( + not os.path.exists(file) + or (response.headers.get("Content-Length") or 0) != os.stat(file).st_size + ): + with open(file, "wb") as out_file: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + out_file.write(chunk) + except Exception as ex: + LOG.error(f"Error downloading {url}, {ex}") + raise DownloadException(ex) - # get auth url to download file + @staticmethod + def get_cosmic_from_sanger(token: str, return_err: List[str]) -> None: + """Method for download COSMIC database from sanger website.""" files = [] file: str = Settings.COSMIC["FILE"] url: str = f'{Settings.COSMIC["HOSTNAME"]}/{Settings.COSMIC["FILE"]}' - req = urllib.request.Request(url) - req.add_header('Authorization', f'Basic {token}') - req.add_header( - 'User-Agent', - '''Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) - Chrome/41.0.2228.0 Safari/537.3''' - ) + headers = { + "Authorization": f"Basic {token}", + "User-Agent": ( + "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/41.0.2228.0 Safari/537.3" + ), + } try: - res = urllib.request.urlopen(req) - auth_url: str = rapidjson.loads(res.read().decode('utf-8'))['url'] + res = requests.get(url, headers=headers) + auth_url: str = res.json()["url"] + LOG.info(f"auth_url: {auth_url}") Net.get_large_file(auth_url) - files.append('.'.join(file.split('.')[:-1])) - with gzip.open(file, 'rb') as archive, open(files[0], 'wb') as out_file: + files.append(".".join(file.split(".")[:-1])) + with gzip.open(file, "rb") as archive, open(files[0], "wb") as out_file: shutil.copyfileobj(archive, out_file) - db = CosmicDB('.') - db.setup(files, delimiter='\t', skip_header=True) - except urllib.error.HTTPError as ex: + db = CosmicDB(".") + db.setup(files, delimiter="\t", skip_header=True) + except requests.exceptions.HTTPError as ex: return_err.append(f'{Settings.COSMIC["NAME"]}: {ex}') @staticmethod - def get_fusiongdb(self, return_err: List[str]) -> None: - """Method for download FusionGDB database.""" - - pool_params = [ - (f'{Settings.FUSIONGDB["HOSTNAME"]}/{x}', True) for x in Settings.FUSIONGDB["FILES"] - ] - pool = Pool(Settings.THREAD_NUM) - pool.starmap(Net.get_large_file, pool_params) - pool.close() - pool.join() - db = FusionGDB('.') - db.setup(Settings.FUSIONGDB['FILES'], delimiter='\t', skip_header=False) + def get_cosmic_from_qiagen(token: str, return_err: List[str], outputpath: str) -> None: + """Method for download COSMIC database from QIAGEN.""" + try: + Net.get_qiagen_files(token, outputpath) + except Exception as ex: + print(ex) + # Then continue parsing out the fusion_file_id + file_id = Net.fetch_fusion_file_id(outputpath) + Net.download_qiagen_file(token, file_id, outputpath) + file: str = Settings.COSMIC["FILE"] + files = [] + + try: + files.append(".".join(file.split(".")[:-1])) + + with gzip.open(file, "rb") as archive, open(files[0], "wb") as out_file: + shutil.copyfileobj(archive, out_file) + + db = CosmicDB(".") + db.setup(files, delimiter="\t", skip_header=True) + except Exception as ex: + return_err.append(f'{Settings.COSMIC["NAME"]}: {ex}') @staticmethod def get_fusiongdb2(self, return_err: List[str]) -> None: @@ -117,16 +176,16 @@ def get_fusiongdb2(self, return_err: List[str]) -> None: url: str = f'{Settings.FUSIONGDB2["HOSTNAME"]}/{Settings.FUSIONGDB2["FILE"]}' Net.get_large_file(url) file: str = f'{Settings.FUSIONGDB2["FILE"]}' - df = pd.read_excel(file, engine='openpyxl') + df = pd.read_excel(file, engine="openpyxl") df["fusion"] = df["5'-gene (text format)"] + "--" + df["3'-gene (text format)"] - file_csv = 'fusionGDB2.csv' - df['fusion'].to_csv(file_csv, header=False, index=False, sep=',', encoding='utf-8') + file_csv = "fusionGDB2.csv" + df["fusion"].to_csv(file_csv, header=False, index=False, sep=",", encoding="utf-8") - db = FusionGDB2('.') - db.setup([file_csv], delimiter=',', skip_header=False) + db = FusionGDB2(".") + db.setup([file_csv], delimiter=",", skip_header=False) except DownloadException as ex: - return_err.append(f'FusionGDB2: {ex}') + return_err.append(f"FusionGDB2: {ex}") @staticmethod def get_mitelman(self, return_err: List[str]) -> None: @@ -134,21 +193,23 @@ def get_mitelman(self, return_err: List[str]) -> None: try: url: str = f'{Settings.MITELMAN["HOSTNAME"]}/{Settings.MITELMAN["FILE"]}' Net.get_large_file(url) - with ZipFile(Settings.MITELMAN['FILE'], 'r') as archive: - files = [x for x in archive.namelist() if "mitelman_db/MBCA.TXT.DATA" in x] + with ZipFile(Settings.MITELMAN["FILE"], "r") as archive: + files = [ + x for x in archive.namelist() if "MBCA.TXT.DATA" in x and not "MACOSX" in x + ] archive.extractall() - db = MitelmanDB('.') - db.setup(files, delimiter='\t', skip_header=False, encoding='ISO-8859-1') + db = MitelmanDB(".") + db.setup(files, delimiter="\t", skip_header=False, encoding="ISO-8859-1") except DownloadException as ex: - return_err.append(f'Mitelman: {ex}') + return_err.append(f"Mitelman: {ex}") @staticmethod def clean(): """Remove all files except *db.""" - for temp in glob.glob('*/'): + for temp in glob.glob("*/"): shutil.rmtree(temp) - for temp in glob.glob('*[!.db]'): + for temp in glob.glob("*[!.db]"): if not os.path.isdir(temp): os.remove(temp) diff --git a/fusion_report/common/page.py b/fusion_report/common/page.py index dd53433..fa87d75 100644 --- a/fusion_report/common/page.py +++ b/fusion_report/common/page.py @@ -10,8 +10,14 @@ class Page(BasePage): Attributes: __page_variables: extra variables to be displayed on the page """ - def __init__(self, title: str, view: str, - filename: str = None, page_variables: Dict[str, Any] = None) -> None: + + def __init__( + self, + title: str, + view: str, + filename: str = None, + page_variables: Dict[str, Any] = None, + ) -> None: self.__page_variables = {} if not page_variables else page_variables super().__init__(title, view, filename) diff --git a/fusion_report/common/report.py b/fusion_report/common/report.py index 6ff9bb4..5ff0d6e 100644 --- a/fusion_report/common/report.py +++ b/fusion_report/common/report.py @@ -12,12 +12,18 @@ class Report(Template): Attributes: pages: List of pages """ + def __init__(self, config_path: str, output_dir: str) -> None: self.pages: List[Page] = [] super().__init__(config_path, output_dir) - def create_page(self, title: str, view: str = 'index', - filename: str = None, page_variables: Dict[str, Any] = None) -> Page: + def create_page( + self, + title: str, + view: str = "index", + filename: str = None, + page_variables: Dict[str, Any] = None, + ) -> Page: """Creates and adds page in the list. Return: @@ -31,7 +37,7 @@ def create_page(self, title: str, view: str = 'index', page = Page(title, view, filename, page_variables) if self.index_by(filename) != -1: - raise ReportException(f'Page {page.filename} already exists!') + raise ReportException(f"Page {page.filename} already exists!") self.pages.append(page) return page @@ -47,7 +53,7 @@ def get_page(self, filename: str) -> Page: """ index = self.index_by(filename) if index == -1: - raise ReportException(f'Page {filename} not found') + raise ReportException(f"Page {filename} not found") return self.pages[index] @@ -56,13 +62,13 @@ def render(self, page: Page, extra_variables: Optional[Dict[str, Any]] = None): template_variables: Dict[str, Any] = page.get_content() # load modules - template_variables['modules'] = page.modules + template_variables["modules"] = page.modules # generate menu (html_id, menu item): List[Tuple[str, str]] - template_variables['menu'] = [] - for _, module in template_variables['modules'].items(): - for item in module['menu']: - template_variables['menu'].append((self.get_id(item), item)) + template_variables["menu"] = [] + for _, module in template_variables["modules"].items(): + for item in module["menu"]: + template_variables["menu"].append((self.get_id(item), item)) if extra_variables: template_variables = {**template_variables, **extra_variables} diff --git a/fusion_report/common/singleton.py b/fusion_report/common/singleton.py index 39fbf92..285b273 100644 --- a/fusion_report/common/singleton.py +++ b/fusion_report/common/singleton.py @@ -4,6 +4,7 @@ class Singleton(type): """Implementation of Singleton design pattern""" + _instances: Any = {} def __call__(cls, *args, **kwargs): diff --git a/fusion_report/common/template.py b/fusion_report/common/template.py index ae117ec..164fb74 100644 --- a/fusion_report/common/template.py +++ b/fusion_report/common/template.py @@ -4,12 +4,13 @@ from pathlib import Path from typing import Any, Dict -from jinja2 import Environment, FileSystemLoader, Markup - +from jinja2 import Environment, FileSystemLoader +from markupsafe import Markup from fusion_report.common.page import Page from fusion_report.config import Config from fusion_report.settings import Settings + class Template: """The class implements core methods. @@ -18,21 +19,24 @@ class Template: j2_variables: Extra variables from configuration output_dir: Output directory where the files will be generated """ + def __init__(self, config_path: str, output_dir: str) -> None: self.j2_env = Environment( - loader=FileSystemLoader([ - os.path.join(Settings.ROOT_DIR, 'templates/'), - os.path.join(Settings.ROOT_DIR, 'modules/') - ]), + loader=FileSystemLoader( + [ + os.path.join(Settings.ROOT_DIR, "templates/"), + os.path.join(Settings.ROOT_DIR, "modules/"), + ] + ), trim_blocks=True, - autoescape=True + autoescape=True, ) self.j2_variables: Config = Config().parse(config_path) self.output_dir: str = output_dir # helper functions which can be used inside partial templates - self.j2_env.globals['include_raw'] = self.include_raw - self.j2_env.globals['get_id'] = self.get_id + self.j2_env.globals["include_raw"] = self.include_raw + self.j2_env.globals["get_id"] = self.get_id # Making sure output directory exists if not os.path.exists(output_dir): @@ -42,9 +46,7 @@ def render(self, page: Page, extra_variables: Dict[str, Any]) -> None: """Renders page""" merged_variables = {**self.j2_variables.json_serialize(), **extra_variables} view = self.j2_env.get_template(page.view).render(merged_variables) - with open( - os.path.join(self.output_dir, page.filename), 'w', encoding='utf-8' - ) as file_out: + with open(os.path.join(self.output_dir, page.filename), "w", encoding="utf-8") as file_out: file_out.write(view) def include_raw(self, filename: str) -> Markup: @@ -53,15 +55,15 @@ def include_raw(self, filename: str) -> Markup: file_extension = Path(filename).suffix assert isinstance(self.j2_env.loader, FileSystemLoader) - if file_extension == '.css': + if file_extension == ".css": return Markup( ''.format( css=self.j2_env.loader.get_source(self.j2_env, filename)[0] ) ) - if file_extension == '.js': + if file_extension == ".js": return Markup( - ''.format( + "".format( js=self.j2_env.loader.get_source(self.j2_env, filename)[0] ) ) @@ -71,4 +73,4 @@ def include_raw(self, filename: str) -> Markup: @staticmethod def get_id(title: str) -> str: """Generate html id tag from page title""" - return title.lower().replace(' ', '_') + return title.lower().replace(" ", "_") diff --git a/fusion_report/config.py b/fusion_report/config.py index 75fcc47..4622fd8 100644 --- a/fusion_report/config.py +++ b/fusion_report/config.py @@ -23,14 +23,20 @@ class Config: """ def __init__(self) -> None: - self._report_title = 'nfcore/rnafusion summary report' + self._report_title = "nfcore/rnafusion summary report" self.logos: Dict[str, str] = { - 'main': base64.b64encode(open( - os.path.join(Settings.ROOT_DIR, 'templates/assets/img/fusion-report.png'), 'rb' - ).read()).decode('utf-8'), - 'rnafusion': base64.b64encode(open( - os.path.join(Settings.ROOT_DIR, 'templates/assets/img/rnafusion_logo.png'), 'rb' - ).read()).decode('utf-8') + "main": base64.b64encode( + open( + os.path.join(Settings.ROOT_DIR, "templates/assets/img/fusion-report.png"), + "rb", + ).read() + ).decode("utf-8"), + "rnafusion": base64.b64encode( + open( + os.path.join(Settings.ROOT_DIR, "templates/assets/img/rnafusion_logo.png"), + "rb", + ).read() + ).decode("utf-8"), } self._institution: Dict[str, Any] = {} self._date: str = datetime.now().strftime(Settings.DATE_FORMAT) @@ -53,17 +59,15 @@ def institution(self) -> Dict[str, Any]: @institution.setter def institution(self, institution: Dict[str, str]) -> None: - if 'name' in institution.keys(): - self._institution['name'] = institution['name'] + if "name" in institution.keys(): + self._institution["name"] = institution["name"] - if 'img' in institution.keys() and os.path.exists(institution['img']): - image = os.path.join(Settings.ROOT_DIR, institution['img']) - self._institution['img'] = base64.b64encode( - open(image, 'rb').read() - ).decode('utf-8') + if "img" in institution.keys() and os.path.exists(institution["img"]): + image = os.path.join(Settings.ROOT_DIR, institution["img"]) + self._institution["img"] = base64.b64encode(open(image, "rb").read()).decode("utf-8") - if 'url' in institution.keys(): - self._institution['url'] = institution['url'] + if "url" in institution.keys(): + self._institution["url"] = institution["url"] @property def date(self) -> str: @@ -83,10 +87,10 @@ def assets(self) -> Dict[str, List[str]]: @assets.setter def assets(self, assets) -> None: for key, value in assets.items(): - if key in ('css', 'js') and value is not None: + if key in ("css", "js") and value is not None: self.assets[key] = [x for x in value if os.path.exists(x)] - def parse(self, path) -> 'Config': + def parse(self, path) -> "Config": """ Method for parsing the configuration file. @@ -95,13 +99,13 @@ def parse(self, path) -> 'Config': """ if path: try: - with open(path, 'r', encoding='utf-8') as in_file: + with open(path, "r", encoding="utf-8") as in_file: try: data = safe_load(in_file) - self.report_title = data['report_title'] - self.institution = data['institution'] - self.date = data['date_format'] - self.assets = data['assets'] + self.report_title = data["report_title"] + self.institution = data["institution"] + self.date = data["date_format"] + self.assets = data["assets"] return self except YAMLError as ex: raise ConfigException(ex) @@ -113,9 +117,9 @@ def parse(self, path) -> 'Config': def json_serialize(self) -> Dict[str, Any]: """Helper serialization method for templating engine.""" return { - 'report_title': self.report_title, - 'logos': self.logos, - 'institution': self.institution, - 'date': self.date, - 'assets': self.assets + "report_title": self.report_title, + "logos": self.logos, + "institution": self.institution, + "date": self.date, + "assets": self.assets, } diff --git a/fusion_report/data/cosmic.py b/fusion_report/data/cosmic.py index cf06608..dba5202 100644 --- a/fusion_report/data/cosmic.py +++ b/fusion_report/data/cosmic.py @@ -12,7 +12,7 @@ class CosmicDB(Db, metaclass=Singleton): """Implementation of Cosmic Database. All core functionality is handled by parent class.""" def __init__(self, path: str) -> None: - super().__init__(path, Settings.COSMIC['NAME'], Settings.COSMIC['SCHEMA']) + super().__init__(path, Settings.COSMIC["NAME"], Settings.COSMIC["SCHEMA"]) def get_all_fusions(self) -> List[str]: """Returns all fusions from database.""" @@ -21,5 +21,9 @@ def get_all_fusions(self) -> List[str]: WHERE translocation_name != ""''' res = self.select(query) - return ['--'.join(re.findall(r'\(.*?\)', x['translocation_name'])) - .replace('(', '').replace(')', '') for x in res] + return [ + "--".join(re.findall(r"\(.*?\)", x["translocation_name"])) + .replace("(", "") + .replace(")", "") + for x in res + ] diff --git a/fusion_report/data/fusiongdb.py b/fusion_report/data/fusiongdb.py deleted file mode 100644 index 3ad3b0e..0000000 --- a/fusion_report/data/fusiongdb.py +++ /dev/null @@ -1,29 +0,0 @@ -"""FusionGDB Database""" -from typing import List - -from fusion_report.common.db import Db -from fusion_report.common.singleton import Singleton -from fusion_report.settings import Settings - - -class FusionGDB(Db, metaclass=Singleton): - """Implementation of FusionGDB Database. All core functionality is handled by parent class.""" - - def __init__(self, path: str) -> None: - super().__init__(path, Settings.FUSIONGDB['NAME'], Settings.FUSIONGDB['SCHEMA']) - - def setup(self, files: List[str], delimiter: str = '', skip_header=False, encoding='utf-8'): - super().setup(files, delimiter) - - # fixing embarrassing typo: https://github.com/nf-core/rnafusion/issues/82 - sql = '''UPDATE tcga_chitars_combined_fusion_orf_analyzed_gencode_h19v19 - SET orf = "Frame-shift" WHERE orf = "Frame-shit"''' - self.execute(sql) - - def get_all_fusions(self) -> List[str]: - """Returns all fusions from database.""" - query: str = '''SELECT DISTINCT (h_gene || "--" || t_gene) as fusion_pair - FROM tcga_chitars_combined_fusion_information_on_hg19''' - res = self.select(query) - - return [fusion['fusion_pair'] for fusion in res] diff --git a/fusion_report/data/fusiongdb2.py b/fusion_report/data/fusiongdb2.py index 46f48db..c3c58b9 100644 --- a/fusion_report/data/fusiongdb2.py +++ b/fusion_report/data/fusiongdb2.py @@ -7,15 +7,15 @@ class FusionGDB2(Db, metaclass=Singleton): - """Implementation of FusionGDB Database. All core functionality is handled by parent class.""" + """Implementation of FusionGDB2 Database. All core functionality is handled by parent class.""" def __init__(self, path: str) -> None: - super().__init__(path, Settings.FUSIONGDB2['NAME'], Settings.FUSIONGDB2['SCHEMA']) + super().__init__(path, Settings.FUSIONGDB2["NAME"], Settings.FUSIONGDB2["SCHEMA"]) def get_all_fusions(self) -> List[str]: """Returns all fusions from database.""" - query: str = '''SELECT DISTINCT fusions - FROM fusiongdb2''' + query: str = """SELECT DISTINCT fusions + FROM fusiongdb2""" res = self.select(query) - return [fusion['fusions'].strip() for fusion in res] + return [fusion["fusions"].strip() for fusion in res] diff --git a/fusion_report/data/mitelman.py b/fusion_report/data/mitelman.py index 56b72b9..bdb45a1 100644 --- a/fusion_report/data/mitelman.py +++ b/fusion_report/data/mitelman.py @@ -10,11 +10,11 @@ class MitelmanDB(Db, metaclass=Singleton): """Implementation of Mitelman Database. All core functionality is handled by parent class.""" def __init__(self, path: str) -> None: - super().__init__(path, Settings.MITELMAN['NAME'], Settings.MITELMAN['SCHEMA']) + super().__init__(path, Settings.MITELMAN["NAME"], Settings.MITELMAN["SCHEMA"]) def get_all_fusions(self) -> List[str]: """Returns all fusions from database.""" - query: str = 'SELECT DISTINCT geneshort FROM mbca WHERE geneshort LIKE "%::%"' + query: str = '''SELECT DISTINCT geneshort FROM mbca WHERE geneshort LIKE "%::%"''' res = self.select(query) - return [fusion['geneshort'].strip().replace('::', '--') for fusion in res] + return [fusion["geneshort"].strip().replace("::", "--") for fusion in res] diff --git a/fusion_report/data/schema/FusionGDB2.sql b/fusion_report/data/schema/FusionGDB2.sql index f07d3bc..7f89b0b 100644 --- a/fusion_report/data/schema/FusionGDB2.sql +++ b/fusion_report/data/schema/FusionGDB2.sql @@ -1,3 +1,3 @@ -CREATE TABLE "fusionGDB2" ( - "Fusions" varchar(50) NOT NULL DEFAULT '' +CREATE TABLE "fusiongdb2" ( + "fusions" varchar(50) NOT NULL DEFAULT '' ); diff --git a/fusion_report/download.py b/fusion_report/download.py index 5e3cb0d..7f5a7a1 100644 --- a/fusion_report/download.py +++ b/fusion_report/download.py @@ -8,9 +8,10 @@ from fusion_report.common.logger import Logger from fusion_report.common.net import Net + class Download: """Class designed for downloading any type of required database. - Currently the script is able to download: Mitelman, FusionGDB and COSMIC with provided + Currently the script is able to download: Mitelman, FusionGDB2 and COSMIC with provided credentials. Attributes: @@ -23,7 +24,10 @@ def __init__(self, params: Namespace): def validate(self, params: Namespace) -> None: """Method validating required input. In this case COSMIC credentials.""" - self.cosmic_token = Net.get_cosmic_token(params) + if params.qiagen: + self.cosmic_token = Net.get_cosmic_qiagen_token(params) + else: + self.cosmic_token = Net.get_cosmic_token(params) # making sure output directory exists if not os.path.exists(params.output): @@ -37,19 +41,21 @@ def download_all(self, params: Namespace) -> None: # MITELMAN Net.get_mitelman(self, return_err) - # FusionGDB - Net.get_fusiongdb(self, return_err) - # FusionGDB2 Net.get_fusiongdb2(self, return_err) # COSMIC - Net.get_cosmic(self.cosmic_token, return_err) + if params.qiagen: + Logger(__name__).info("Downloading resources from QIAGEN...") + Net.get_cosmic_from_qiagen(self.cosmic_token, return_err, params.output) + else: + Logger(__name__).info("Downloading resources from SANGER...") + Net.get_cosmic_from_sanger(self.cosmic_token, return_err) if len(return_err) > 0: raise DownloadException(return_err) - Logger(__name__).info('Downloading finished') + Logger(__name__).info("Downloading finished") Net.clean() # Create timestamp: diff --git a/fusion_report/modules/fusion_summary/fusion_summary.py b/fusion_report/modules/fusion_summary/fusion_summary.py index 1b0d86a..d4f4054 100644 --- a/fusion_report/modules/fusion_summary/fusion_summary.py +++ b/fusion_report/modules/fusion_summary/fusion_summary.py @@ -9,7 +9,4 @@ class CustomModule(BaseModule): def load(self) -> Dict[str, Any]: """Return module variables.""" - return { - 'fusion': self.params['fusion'], - 'menu': ['Summary'] - } + return {"fusion": self.params["fusion"], "menu": ["Summary"]} diff --git a/fusion_report/modules/fusiongdb/__init__.py b/fusion_report/modules/fusiongdb/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fusion_report/modules/fusiongdb/diseases/__init__.py b/fusion_report/modules/fusiongdb/diseases/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fusion_report/modules/fusiongdb/diseases/diseases.py b/fusion_report/modules/fusiongdb/diseases/diseases.py deleted file mode 100644 index c3314cc..0000000 --- a/fusion_report/modules/fusiongdb/diseases/diseases.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Disease module""" -from typing import Any, Dict - -from fusion_report.data.fusiongdb import FusionGDB -from fusion_report.modules.base_module import BaseModule - - -class CustomModule(BaseModule): - """Disease section in fusion page.""" - - def get_data(self) -> Dict[str, Any]: - """Gathers necessary data.""" - return FusionGDB(self.params['db_path']).select( - ''' - SELECT * FROM fgene_disease_associations - WHERE (gene = ? OR gene = ?) - AND disease_prob > 0.2001 ORDER BY disease_prob DESC - ''', - self.params['fusion'].split('--') - ) - - def load(self) -> Dict[str, Any]: - """Return module variables.""" - return { - 'data': self.get_data(), - 'menu': ['Related diseases'] - } diff --git a/fusion_report/modules/fusiongdb/diseases/partial.html b/fusion_report/modules/fusiongdb/diseases/partial.html deleted file mode 100644 index e9ab8c9..0000000 --- a/fusion_report/modules/fusiongdb/diseases/partial.html +++ /dev/null @@ -1,58 +0,0 @@ -
-
-

{{ modules['fusiongdb.diseases'].menu[0]}}

-
-

- Diseases associated with fusion partners (DisGeNet 4.0) with P > 20.01. -

- {% with table_name='related_diseases_table' %} - {% include "partials/table-buttons.html" %} - {% endwith %} - -
- - - - \ No newline at end of file diff --git a/fusion_report/modules/fusiongdb/drugs/__init__.py b/fusion_report/modules/fusiongdb/drugs/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fusion_report/modules/fusiongdb/drugs/drugs.py b/fusion_report/modules/fusiongdb/drugs/drugs.py deleted file mode 100644 index 17cf423..0000000 --- a/fusion_report/modules/fusiongdb/drugs/drugs.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Related drug module""" -from typing import Any, Dict - -from fusion_report.data.fusiongdb import FusionGDB -from fusion_report.modules.base_module import BaseModule - - -class CustomModule(BaseModule): - """Related drug section in fusion page.""" - - def get_data(self) -> Dict[str, Any]: - """Gathers necessary data.""" - - return FusionGDB(self.params['db_path']).select( - ''' - SELECT gene_symbol, drug_status, drug_bank_id, drug_name, drug_action, - fusion_uniprot_related_drugs.uniprot_acc FROM fusion_uniprot_related_drugs - INNER JOIN uniprot_gsymbol - ON fusion_uniprot_related_drugs.uniprot_acc = uniprot_gsymbol.uniprot_acc - WHERE gene_symbol = ? OR gene_symbol = ? - ''', - self.params['fusion'].split('--') - ) - - def load(self) -> Dict[str, Any]: - """Return module variables.""" - return { - 'data': self.get_data(), - 'menu': ['Targeting drugs'] - } diff --git a/fusion_report/modules/fusiongdb/drugs/partial.html b/fusion_report/modules/fusiongdb/drugs/partial.html deleted file mode 100644 index 2286c50..0000000 --- a/fusion_report/modules/fusiongdb/drugs/partial.html +++ /dev/null @@ -1,57 +0,0 @@ -
-
-

{{ modules['fusiongdb.drugs'].menu[0] }}

-
-

- Drugs targeting genes involved in this fusion gene (DrugBank Version 5.1.0 2018-04-02). -

- {% with table_name='targeting_drugs_table' %} - {% include "partials/table-buttons.html" %} - {% endwith %} -
-
- - - - \ No newline at end of file diff --git a/fusion_report/modules/fusiongdb/ppi/__init__.py b/fusion_report/modules/fusiongdb/ppi/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fusion_report/modules/fusiongdb/ppi/partial.html b/fusion_report/modules/fusiongdb/ppi/partial.html deleted file mode 100644 index 1baa1b8..0000000 --- a/fusion_report/modules/fusiongdb/ppi/partial.html +++ /dev/null @@ -1,98 +0,0 @@ - -{{ include_raw('assets/js/cytoscape-3.2.22.min.js') | safe }} -{{ include_raw('assets/js/cytoscape-cose-bilkent-4.0.0.min.js') | safe }} - -
-
-

{{ modules['fusiongdb.ppi'].menu[0] }}

-
- - -
-
-

- Protein-protein interactions with each fusion partner protein in wild-type. - Data were taken from here. -

-
-
-
-
- - - - \ No newline at end of file diff --git a/fusion_report/modules/fusiongdb/ppi/ppi.py b/fusion_report/modules/fusiongdb/ppi/ppi.py deleted file mode 100644 index 3a8f172..0000000 --- a/fusion_report/modules/fusiongdb/ppi/ppi.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Protein-Protein interaction module""" -from typing import Any, Dict, List - -from fusion_report.data.fusiongdb import FusionGDB -from fusion_report.modules.base_module import BaseModule - - -class CustomModule(BaseModule): - """Protein-Protein interaction section in fusion page.""" - - def get_data(self) -> List[Any]: - """Gathers necessary data.""" - - return FusionGDB(self.params['db_path']).select( - ''' - SELECT DISTINCT h_gene, h_gene_interactions, t_gene, t_gene_interactions - FROM fusion_ppi WHERE h_gene = ? AND t_gene = ? - ''', - self.params['fusion'].split('--') - ) - - def build_graph(self): - """Helper function that generates Network map of Protein-Protein Interactions using - Cytoscape.js. Additional module https://github.com/cytoscape/cytoscape.js-cose-bilkent. - - Returns: - List structure which is defined by the Cytoscape library - """ - data = self.get_data() - if not data: - return [] - - graph_data = [ - {'data': {'id': 'fusion'}, 'classes': 'core'}, - {'data': {'id': data[0]['h_gene']}, 'classes': 'core'}, - {'data': {'id': data[0]['t_gene']}, 'classes': 'core'}, - {'data': { - 'id': 'fusion' + data[0]['h_gene'], - 'source': 'fusion', - 'target': data[0]['h_gene'] - }, - 'classes': 'core-connection' - }, - {'data': { - 'id': 'fusion' + data[0]['t_gene'], - 'source': 'fusion', - 'target': data[0]['t_gene'] - }, - 'classes': 'core-connection' - }, - ] - - left_fusion = set(map(str.strip, data[0]['h_gene_interactions'].split(','))) - right_fusion = set(map(str.strip, data[0]['t_gene_interactions'].split(','))) - intersect = left_fusion & right_fusion - left_fusion -= intersect - right_fusion -= intersect - - # Create nodes related to left gene of the fusion - for gene in left_fusion: - graph_data.append({'data': {'id': gene}}) - graph_data.append({ - 'data': { - 'id': gene + '--' + data[0]['h_gene'], - 'source': data[0]['h_gene'], - 'target': gene - } - }) - - # Create nodes related to right gene of the fusion - for gene in right_fusion: - graph_data.append({'data': {'id': gene}}) - graph_data.append({ - 'data': { - 'id': gene + '--' + data[0]['t_gene'], - 'source': data[0]['t_gene'], - 'target': gene - } - }) - - # Some fusions have common gene that can fusion with both left and right gene. - for gene in list(intersect): - graph_data.append({'data': {'id': gene}}) - graph_data.append({ - 'data': { - 'id': 'fusion' + '--' + gene, - 'source': 'fusion', - 'target': gene - } - }) - - return graph_data - - def load(self) -> Dict[str, Any]: - """Return module variables.""" - - return { - 'data': self.build_graph(), - 'menu': ['Chimeric Protein-Protein interactions'] - } diff --git a/fusion_report/modules/fusiongdb/transcripts/__init__.py b/fusion_report/modules/fusiongdb/transcripts/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fusion_report/modules/fusiongdb/transcripts/partial.html b/fusion_report/modules/fusiongdb/transcripts/partial.html deleted file mode 100644 index 3b37f06..0000000 --- a/fusion_report/modules/fusiongdb/transcripts/partial.html +++ /dev/null @@ -1,93 +0,0 @@ -
-
-

{{ modules['fusiongdb.transcripts'].menu[0] }}

-
-

Open reading frame (ORF) analysis of fusion genes based on Ensembl gene isoform structure.

- {% with table_name='transcripts_table' %} - {% include "partials/table-buttons.html" %} - {% endwith %} -
-
- - - - \ No newline at end of file diff --git a/fusion_report/modules/fusiongdb/transcripts/transcripts.py b/fusion_report/modules/fusiongdb/transcripts/transcripts.py deleted file mode 100644 index 5ffa139..0000000 --- a/fusion_report/modules/fusiongdb/transcripts/transcripts.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Ensembl transcript module""" -from typing import Any, Dict - -from fusion_report.data.fusiongdb import FusionGDB -from fusion_report.modules.base_module import BaseModule - - -class CustomModule(BaseModule): - """Differently observed Ensembl transcripts in fusion page.""" - - def get_data(self) -> Dict[str, Any]: - """Gathers necessary data.""" - - return FusionGDB(self.params['db_path']).select( - ''' - SELECT * FROM tcga_chitars_combined_fusion_ORF_analyzed_gencode_h19v19 - WHERE h_gene = ? AND t_gene = ? - ''', - self.params['fusion'].split('--') - ) - - def load(self) -> Dict[str, Any]: - """Return module variables.""" - - return { - 'data': self.get_data(), - 'menu': ['Ensembl transcripts'] - } diff --git a/fusion_report/modules/fusiongdb/variations/__init__.py b/fusion_report/modules/fusiongdb/variations/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fusion_report/modules/fusiongdb/variations/partial.html b/fusion_report/modules/fusiongdb/variations/partial.html deleted file mode 100644 index bd3b024..0000000 --- a/fusion_report/modules/fusiongdb/variations/partial.html +++ /dev/null @@ -1,77 +0,0 @@ -
-
-

{{ modules['fusiongdb.variations'].menu[0] }}

-
-

- Fusion gene information taken from three different sources ChiTars (NAR, 2018), tumorfusions (NAR, 2018) - and Gao et al. (Cell, 2018). Genome coordinates are lifted-over GRCh37/hg19 version.
- Note: LD (Li Ding group, RV: Roel Verhaak group, ChiTaRs fusion database). -

- {% with table_name='variations_table' %} - {% include "partials/table-buttons.html" %} - {% endwith %} -
-
- - - - \ No newline at end of file diff --git a/fusion_report/modules/fusiongdb/variations/variations.py b/fusion_report/modules/fusiongdb/variations/variations.py deleted file mode 100644 index 29c7934..0000000 --- a/fusion_report/modules/fusiongdb/variations/variations.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Fusion gene variation module""" -from typing import Any, Dict - -from fusion_report.data.fusiongdb import FusionGDB -from fusion_report.modules.base_module import BaseModule - - -class CustomModule(BaseModule): - """Different fusion gene variations section in fusion page.""" - - def get_data(self) -> Dict[str, Any]: - """Gathers necessary data.""" - - return FusionGDB(self.params['db_path']).select( - ''' - SELECT * FROM tcga_chitars_combined_fusion_information_on_hg19 - WHERE h_gene = ? AND t_gene = ? - ''', - self.params['fusion'].split('--') - ) - - def load(self) -> Dict[str, Any]: - """Return module variables.""" - - return { - 'data': self.get_data(), - 'menu': ['Fusion gene variations'] - } diff --git a/fusion_report/modules/index_summary/index_summary.py b/fusion_report/modules/index_summary/index_summary.py index aee6150..c40bdb6 100644 --- a/fusion_report/modules/index_summary/index_summary.py +++ b/fusion_report/modules/index_summary/index_summary.py @@ -4,7 +4,6 @@ class CustomModule(BaseModule): - def known_vs_unknown(self) -> List[List[Any]]: """Returns list of number of known and unknown fusions. @@ -13,10 +12,7 @@ def known_vs_unknown(self) -> List[List[Any]]: """ all_fusions: int = len(self.manager.fusions) known_fusions: int = len(self.manager.get_known_fusions()) - return [ - ['known', known_fusions], - ['unknown', all_fusions - known_fusions] - ] + return [["known", known_fusions], ["unknown", all_fusions - known_fusions]] def tool_detection(self) -> List[List[Any]]: """Returns tuple tool and sum of fusions found by the tool. @@ -26,15 +22,17 @@ def tool_detection(self) -> List[List[Any]]: """ running_tools = sorted(self.manager.running_tools) counts: Dict[str, int] = dict.fromkeys(running_tools, 0) - counts['together'] = 0 + counts["together"] = 0 running_tools_count: int = len(running_tools) for fusion in self.manager.fusions: + print(fusion.name) fusion_tools = fusion.tools.keys() for tool in fusion_tools: + print(tool) counts[tool] += 1 # intersection if len(fusion_tools) == running_tools_count: - counts['together'] += 1 + counts["together"] += 1 return [[k, v] for k, v in counts.items()] @@ -60,52 +58,46 @@ def create_fusions_table(self) -> Dict[str, Any]: """ rows = [] tools = self.manager.running_tools - filter_flag = len(tools) < self.params['tool_cutoff'] + filter_flag = len(tools) < self.params["tool_cutoff"] for fusion in self.manager.fusions: row: Dict[str, Any] = {} # If number of executed fusion detection tools is lower than cutoff, filter is ignored if filter_flag: row = { - 'fusion': fusion.name, - 'found_db': fusion.dbs, - 'tools_hits': len(fusion.tools), - 'score': f'{fusion.score:.3}' + "fusion": fusion.name, + "found_db": fusion.dbs, + "tools_hits": len(fusion.tools), + "score": f"{fusion.score:.3}", } # Add only fusions that are detected by at least # default = TOOL_DETECTION_CUTOFF - if not filter_flag and len(fusion.tools) >= self.params['tool_cutoff']: + if not filter_flag and len(fusion.tools) >= self.params["tool_cutoff"]: row = { - 'fusion': fusion.name, - 'found_db': fusion.dbs, - 'tools_hits': len(fusion.tools), - 'score': f'{fusion.score:.3}' + "fusion": fusion.name, + "found_db": fusion.dbs, + "tools_hits": len(fusion.tools), + "score": f"{fusion.score:.3}", } # Add only if row is not empty if bool(row): for tool in tools: - row[tool] = 'true' if tool in sorted(fusion.tools) else 'false' + row[tool] = "true" if tool in sorted(fusion.tools) else "false" rows.append(row) - return { - 'rows': rows, - 'tools': list(sorted(tools)) - } + return {"rows": rows, "tools": list(sorted(tools))} def load(self) -> Dict[str, Any]: """Return module variables.""" return { - 'tools': self.manager.running_tools, - 'num_detected_fusions': len(self.manager.fusions), - 'num_known_fusions': len(self.manager.get_known_fusions()), - 'tool_detection_graph': self.tool_detection(), - 'known_vs_unknown_graph': self.known_vs_unknown(), - 'distribution_graph': self.detection_distribution(), - 'fusion_list': self.create_fusions_table(), - 'tool_cutoff': self.params['tool_cutoff'], - 'menu': [ - 'Dashboard fusion summary', - 'List of detected fusions' - ] + "tools": self.manager.running_tools, + "num_detected_fusions": len(self.manager.fusions), + "num_known_fusions": len(self.manager.get_known_fusions()), + "tool_detection_graph": self.tool_detection(), + "known_vs_unknown_graph": self.known_vs_unknown(), + "distribution_graph": self.detection_distribution(), + "fusion_list": self.create_fusions_table(), + "tool_cutoff": self.params["tool_cutoff"], + "menu": ["Dashboard fusion summary", "List of detected fusions"], } diff --git a/fusion_report/modules/index_summary/partial.html b/fusion_report/modules/index_summary/partial.html index 975fe60..5c1346d 100644 --- a/fusion_report/modules/index_summary/partial.html +++ b/fusion_report/modules/index_summary/partial.html @@ -204,7 +204,6 @@

{{ modules.index_summary.menu[1] }}

}}, {title:"Found in DB", field:"found_db", widthGrow:1.4, formatter:foundDBFormatter, formatterParams: { 'Mitelman': 'secondary', - 'FusionGDB': 'warning', 'FusionGDB2': 'warning', 'COSMIC': 'primary' }} diff --git a/fusion_report/modules/loader.py b/fusion_report/modules/loader.py index 9eb988f..00565e0 100644 --- a/fusion_report/modules/loader.py +++ b/fusion_report/modules/loader.py @@ -24,7 +24,7 @@ def exec(self, name: str) -> Dict[str, Any]: """ try: variables = self.__build_factory(name, self.manager, self.params).load() - variables['partial'] = os.path.join(f'{name.replace(".", "/")}', 'partial.html') + variables["partial"] = os.path.join(f'{name.replace(".", "/")}', "partial.html") return variables except AttributeError as ex: raise ModuleException(ex) @@ -37,6 +37,6 @@ def __build_factory(name: str, manager: FusionManager, params=None): an instance of CustomModule """ module_name: str = f'fusion_report.modules.{name}.{name.split(".")[-1]}' - module = __import__(module_name, fromlist=['CustomModule']) - klass = getattr(module, 'CustomModule') + module = __import__(module_name, fromlist=["CustomModule"]) + klass = getattr(module, "CustomModule") return klass(manager, params) diff --git a/fusion_report/parsers/arriba.py b/fusion_report/parsers/arriba.py index 4d7a75d..4406ae1 100644 --- a/fusion_report/parsers/arriba.py +++ b/fusion_report/parsers/arriba.py @@ -7,39 +7,39 @@ class Arriba(AbstractFusionTool): """Arriba tool parser.""" - def set_header(self, header: str, delimiter: Optional[str] = '\t'): + def set_header(self, header: str, delimiter: Optional[str] = "\t"): self.header: List[str] = header.strip().split(delimiter) def parse_multiple(self, left_fusion: str, right_fusion: str, delimiter: str) -> List[str]: if delimiter not in left_fusion and delimiter not in right_fusion: - return [f'{left_fusion}--{right_fusion}'] + return [f"{left_fusion}--{right_fusion}"] - left: List[str] = [x.split('(')[0] for x in left_fusion.split(delimiter)] - right: List[str] = [x.split('(')[0] for x in right_fusion.split(delimiter)] - fusions = [f'{a}--{b}' for a in left for b in right] + left: List[str] = [x.split("(")[0] for x in left_fusion.split(delimiter)] + right: List[str] = [x.split("(")[0] for x in right_fusion.split(delimiter)] + fusions = [f"{a}--{b}" for a in left for b in right] return fusions - def parse(self, line: str, delimiter: Optional[str] = '\t') -> List[Tuple[str, Dict[str, Any]]]: + def parse(self, line: str, delimiter: Optional[str] = "\t") -> List[Tuple[str, Dict[str, Any]]]: col: List[str] = [x.strip() for x in line.split(delimiter)] fusions = self.parse_multiple( - col[self.header.index('#gene1')], - col[self.header.index('gene2')], - ',' + col[self.header.index("#gene1")], col[self.header.index("gene2")], "," ) details: Dict[str, Any] = { - 'position': "#".join([ - col[self.header.index('breakpoint1')], - col[self.header.index('breakpoint2')] - ]), - 'reading-frame': col[self.header.index('reading_frame')], - 'type': col[self.header.index('type')], - 'split_reads1': col[self.header.index('split_reads1')], - 'split_reads2': col[self.header.index('split_reads2')], - 'discordant_mates': col[self.header.index('discordant_mates')], - 'coverage1': col[self.header.index('coverage1')], - 'coverage2': col[self.header.index('coverage2')], - 'confidence': col[self.header.index('confidence')], + "position": "#".join( + [ + col[self.header.index("breakpoint1")], + col[self.header.index("breakpoint2")], + ] + ), + "reading-frame": col[self.header.index("reading_frame")], + "type": col[self.header.index("type")], + "split_reads1": col[self.header.index("split_reads1")], + "split_reads2": col[self.header.index("split_reads2")], + "discordant_mates": col[self.header.index("discordant_mates")], + "coverage1": col[self.header.index("coverage1")], + "coverage2": col[self.header.index("coverage2")], + "confidence": col[self.header.index("confidence")], } return [(fusion, details) for fusion in fusions] diff --git a/fusion_report/parsers/dragen.py b/fusion_report/parsers/dragen.py index 0797c22..dbb0a87 100644 --- a/fusion_report/parsers/dragen.py +++ b/fusion_report/parsers/dragen.py @@ -7,18 +7,20 @@ class Dragen(AbstractFusionTool): """Dragen tool parser.""" - def set_header(self, header: str, delimiter: Optional[str] = '\t'): + def set_header(self, header: str, delimiter: Optional[str] = "\t"): self.header: List[str] = header.strip().split(delimiter) - def parse(self, line: str, delimiter: Optional[str] = '\t') -> List[Tuple[str, Dict[str, Any]]]: + def parse(self, line: str, delimiter: Optional[str] = "\t") -> List[Tuple[str, Dict[str, Any]]]: col: List[str] = [x.strip() for x in line.split(delimiter)] - fusion: str = col[self.header.index('#FusionGene')] + fusion: str = col[self.header.index("#FusionGene")] details: Dict[str, Any] = { - 'position': "#".join([ - col[self.header.index('LeftBreakpoint')], - col[self.header.index('RightBreakpoint')] - ]).replace('chr', ''), - 'score': int(col[self.header.index('Score')]), + "position": "#".join( + [ + col[self.header.index("LeftBreakpoint")], + col[self.header.index("RightBreakpoint")], + ] + ).replace("chr", ""), + "score": int(col[self.header.index("Score")]), } return [(fusion, details)] diff --git a/fusion_report/parsers/ericscript.py b/fusion_report/parsers/ericscript.py index ad4b9de..319cfcc 100644 --- a/fusion_report/parsers/ericscript.py +++ b/fusion_report/parsers/ericscript.py @@ -7,27 +7,26 @@ class Ericscript(AbstractFusionTool): """EricScript tool parser.""" - def set_header(self, header: str, delimiter: Optional[str] = '\t'): + def set_header(self, header: str, delimiter: Optional[str] = "\t"): self.header: List[str] = header.strip().split(delimiter) - def parse(self, line: str, delimiter: Optional[str] = '\t') -> List[Tuple[str, Dict[str, Any]]]: + def parse(self, line: str, delimiter: Optional[str] = "\t") -> List[Tuple[str, Dict[str, Any]]]: col: List[str] = [x.strip() for x in line.split(delimiter)] - fusion: str = "--".join([ - col[self.header.index('GeneName1')], - col[self.header.index('GeneName2')] - ]) + fusion: str = "--".join( + [col[self.header.index("GeneName1")], col[self.header.index("GeneName2")]] + ) details: Dict[str, Any] = { - 'position': ( + "position": ( f"{col[self.header.index('chr1')]}:{col[self.header.index('Breakpoint1')]}:" f"{col[self.header.index('strand1')]}#{col[self.header.index('chr2')]}:" f"{col[self.header.index('Breakpoint2')]}:{col[self.header.index('strand2')]}" ), - 'discordant_reads': int(col[self.header.index('crossingreads')]), - 'junction_reads': int(col[self.header.index('spanningreads')]), - 'fusion_type': col[self.header.index('fusiontype')], - 'gene_expr1': float(col[self.header.index('GeneExpr1')]), - 'gene_expr2': float(col[self.header.index('GeneExpr2')]), - 'gene_expr_fusion': float(col[self.header.index('GeneExpr_Fused')]) + "discordant_reads": int(col[self.header.index("crossingreads")]), + "junction_reads": int(col[self.header.index("spanningreads")]), + "fusion_type": col[self.header.index("fusiontype")], + "gene_expr1": float(col[self.header.index("GeneExpr1")]), + "gene_expr2": float(col[self.header.index("GeneExpr2")]), + "gene_expr_fusion": float(col[self.header.index("GeneExpr_Fused")]), } return [(fusion, details)] diff --git a/fusion_report/parsers/fusioncatcher.py b/fusion_report/parsers/fusioncatcher.py index 78e0fee..7026601 100644 --- a/fusion_report/parsers/fusioncatcher.py +++ b/fusion_report/parsers/fusioncatcher.py @@ -7,25 +7,29 @@ class Fusioncatcher(AbstractFusionTool): """FusionCatcher tool parser.""" - def set_header(self, header: str, delimiter: Optional[str] = '\t'): + def set_header(self, header: str, delimiter: Optional[str] = "\t"): self.header: List[str] = header.strip().split(delimiter) - def parse(self, line: str, delimiter: Optional[str] = '\t') -> List[Tuple[str, Dict[str, Any]]]: + def parse(self, line: str, delimiter: Optional[str] = "\t") -> List[Tuple[str, Dict[str, Any]]]: col: List[str] = [x.strip() for x in line.split(delimiter)] - fusion: str = '--'.join([ - col[self.header.index('Gene_1_symbol(5end_fusion_partner)')], - col[self.header.index('Gene_2_symbol(3end_fusion_partner)')] - ]) + fusion: str = "--".join( + [ + col[self.header.index("Gene_1_symbol(5end_fusion_partner)")], + col[self.header.index("Gene_2_symbol(3end_fusion_partner)")], + ] + ) details: Dict[str, Any] = { - 'position': "#".join([ - col[self.header.index('Fusion_point_for_gene_1(5end_fusion_partner)')], - col[self.header.index('Fusion_point_for_gene_2(3end_fusion_partner)')] - ]), - 'common_mapping_reads': int(col[self.header.index('Counts_of_common_mapping_reads')]), - 'spanning_pairs': int(col[self.header.index('Spanning_pairs')]), - 'spanning_unique_reads': int(col[self.header.index('Spanning_unique_reads')]), - 'longest_anchor': int(col[self.header.index('Longest_anchor_found')]), - 'fusion_type': col[self.header.index('Predicted_effect')].strip() + "position": "#".join( + [ + col[self.header.index("Fusion_point_for_gene_1(5end_fusion_partner)")], + col[self.header.index("Fusion_point_for_gene_2(3end_fusion_partner)")], + ] + ), + "common_mapping_reads": int(col[self.header.index("Counts_of_common_mapping_reads")]), + "spanning_pairs": int(col[self.header.index("Spanning_pairs")]), + "spanning_unique_reads": int(col[self.header.index("Spanning_unique_reads")]), + "longest_anchor": int(col[self.header.index("Longest_anchor_found")]), + "fusion_type": col[self.header.index("Predicted_effect")].strip(), } return [(fusion, details)] diff --git a/fusion_report/parsers/jaffa.py b/fusion_report/parsers/jaffa.py index b3d9db1..b809b8d 100644 --- a/fusion_report/parsers/jaffa.py +++ b/fusion_report/parsers/jaffa.py @@ -7,12 +7,9 @@ class Jaffa(AbstractFusionTool): """Jaffa tool parser.""" def set_header(self, header: str, delimiter: Optional[str] = ","): - self.header: List[str] = header.strip().split(delimiter) - def parse( - self, line: str, delimiter: Optional[str] = "," - ) -> List[Tuple[str, Dict[str, Any]]]: + def parse(self, line: str, delimiter: Optional[str] = ",") -> List[Tuple[str, Dict[str, Any]]]: col: List[str] = [x.strip() for x in line.split(delimiter)] fusions = col[self.header.index("fusion genes")].split(":") diff --git a/fusion_report/parsers/pizzly.py b/fusion_report/parsers/pizzly.py index 547daef..547e6bf 100644 --- a/fusion_report/parsers/pizzly.py +++ b/fusion_report/parsers/pizzly.py @@ -7,17 +7,17 @@ class Pizzly(AbstractFusionTool): """Pizzly tool parser.""" - def set_header(self, header: str, delimiter: Optional[str] = '\t'): + def set_header(self, header: str, delimiter: Optional[str] = "\t"): self.header: List[str] = header.strip().split(delimiter) - def parse(self, line: str, delimiter: Optional[str] = '\t') -> List[Tuple[str, Dict[str, Any]]]: + def parse(self, line: str, delimiter: Optional[str] = "\t") -> List[Tuple[str, Dict[str, Any]]]: col: List[str] = [x.strip() for x in line.split(delimiter)] - fusion: str = '--'.join([ - col[self.header.index('geneA.name')], col[self.header.index('geneB.name')] - ]) + fusion: str = "--".join( + [col[self.header.index("geneA.name")], col[self.header.index("geneB.name")]] + ) details: Dict[str, Any] = { - 'pair_count': int(col[self.header.index('paircount')]), - 'split_count': int(col[self.header.index('splitcount')]) + "pair_count": int(col[self.header.index("paircount")]), + "split_count": int(col[self.header.index("splitcount")]), } return [(fusion, details)] diff --git a/fusion_report/parsers/squid.py b/fusion_report/parsers/squid.py index d33942d..7223f55 100644 --- a/fusion_report/parsers/squid.py +++ b/fusion_report/parsers/squid.py @@ -7,33 +7,33 @@ class Squid(AbstractFusionTool): """Squid tool parser.""" - def set_header(self, header: str, delimiter: Optional[str] = '\t'): + def set_header(self, header: str, delimiter: Optional[str] = "\t"): self.header: List[str] = header.strip().split(delimiter) def parse_multiple(self, col: str, delimiter: str) -> List[str]: - return [fusion.replace(':', '--') for fusion in col.split(delimiter)] + return [fusion.replace(":", "--") for fusion in col.split(delimiter)] - def parse(self, line: str, delimiter: Optional[str] = '\t') -> List[Tuple[str, Dict[str, Any]]]: + def parse(self, line: str, delimiter: Optional[str] = "\t") -> List[Tuple[str, Dict[str, Any]]]: col: List[str] = [x.strip() for x in line.split(delimiter)] - if col[self.header.index('Type')].strip() == 'non-fusion-gene': - return [('', {})] + if col[self.header.index("Type")].strip() == "non-fusion-gene": + return [("", {})] - fusions = self.parse_multiple(col[self.header.index('FusedGenes')], ',') + fusions = self.parse_multiple(col[self.header.index("FusedGenes")], ",") left_breakpoint: str = ( f"{col[self.header.index('# chrom1')]}:{col[self.header.index('start1')]}" "-" f"{col[self.header.index('end1')]}:{col[self.header.index('strand1')]}" - ).replace('chr', '') + ).replace("chr", "") right_breakpoint: str = ( f"{col[self.header.index('chrom2')]}:{col[self.header.index('start2')]}" "-" f"{col[self.header.index('end2')]}:{col[self.header.index('strand2')]}" - ).replace('chr', '') + ).replace("chr", "") details: Dict[str, Any] = { - 'position': f"{left_breakpoint}#{right_breakpoint}" - if col[self.header.index('strand1')] == '+' - else f"{right_breakpoint}#{left_breakpoint}", - 'score': int(col[self.header.index('score')]) + "position": f"{left_breakpoint}#{right_breakpoint}" + if col[self.header.index("strand1")] == "+" + else f"{right_breakpoint}#{left_breakpoint}", + "score": int(col[self.header.index("score")]), } return [(fusion, details) for fusion in fusions] diff --git a/fusion_report/parsers/starfusion.py b/fusion_report/parsers/starfusion.py index 6aa7888..9a299da 100644 --- a/fusion_report/parsers/starfusion.py +++ b/fusion_report/parsers/starfusion.py @@ -7,19 +7,22 @@ class Starfusion(AbstractFusionTool): """Star-Fusion tool parser.""" - def set_header(self, header: str, delimiter: Optional[str] = '\t'): + def set_header(self, header: str, delimiter: Optional[str] = "\t"): self.header: List[str] = header.strip().split(delimiter) - def parse(self, line: str, delimiter: Optional[str] = '\t') -> List[Tuple[str, Dict[str, Any]]]: + def parse(self, line: str, delimiter: Optional[str] = "\t") -> List[Tuple[str, Dict[str, Any]]]: col: List[str] = [x.strip() for x in line.split(delimiter)] fusion: str = f"{col[self.header.index('#FusionName')]}" details: Dict[str, Any] = { - 'position': '#'.join([ - col[self.header.index('LeftBreakpoint')], col[self.header.index('RightBreakpoint')]] + "position": "#".join( + [ + col[self.header.index("LeftBreakpoint")], + col[self.header.index("RightBreakpoint")], + ] ), - 'junction_reads': int(col[self.header.index('JunctionReadCount')]), - 'spanning_reads': int(col[self.header.index('SpanningFragCount')]), - 'ffmp': float(col[self.header.index('FFPM')]) + "junction_reads": int(col[self.header.index("JunctionReadCount")]), + "spanning_reads": int(col[self.header.index("SpanningFragCount")]), + "ffmp": float(col[self.header.index("FFPM")]), } return [(fusion, details)] diff --git a/fusion_report/settings.py b/fusion_report/settings.py index af0d5fc..0203034 100644 --- a/fusion_report/settings.py +++ b/fusion_report/settings.py @@ -4,39 +4,23 @@ class Settings: - ROOT_DIR: str = os.path.dirname(os.path.abspath(__file__)) VERSION: str = "2.1.5" DATE_FORMAT: str = "%d/%m/%Y" THREAD_NUM: int = 2 FUSION_WEIGHTS: Dict[str, float] = { - "fusiongdb": 0.20, - "cosmic": 0.40, - "mitelman": 0.40, + "cosmic": 0.50, + "mitelman": 0.50, "fusiongdb2": 0.0, } COSMIC: Dict[str, str] = { "NAME": "COSMIC", - "HOSTNAME": "https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v95", + "HOSTNAME": "https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v98", "SCHEMA": "Cosmic.sql", "FILE": "CosmicFusionExport.tsv.gz", } - FUSIONGDB: Dict[str, Any] = { - "NAME": "FusionGDB", - "SCHEMA": "FusionGDB.sql", - "HOSTNAME": "https://ccsm.uth.edu/FusionGDB/tables", - "FILES": [ - "TCGA_ChiTaRS_combined_fusion_information_on_hg19.txt", - "TCGA_ChiTaRS_combined_fusion_ORF_analyzed_gencode_h19v19.txt", - "uniprot_gsymbol.txt", - "fusion_uniprot_related_drugs.txt", - "fusion_ppi.txt", - "fgene_disease_associations.txt", - ], - } - FUSIONGDB2: Dict[str, str] = { "NAME": "FusionGDB2", "SCHEMA": "FusionGDB2.sql", diff --git a/fusion_report/sync.py b/fusion_report/sync.py index 06f3d19..d89e96b 100644 --- a/fusion_report/sync.py +++ b/fusion_report/sync.py @@ -14,7 +14,6 @@ class Sync: - def __init__(self, params: Namespace): self.cosmic_token = Net.get_cosmic_token(params) @@ -26,10 +25,24 @@ def __init__(self, params: Namespace): return_err: List[str] = Manager().list() processes = [ - Process(name=Settings.FUSIONGDB['NAME'], target=Net.get_fusiongdb, args=(return_err,)), - Process(name=Settings.MITELMAN['NAME'], target=Net.get_mitelman, args=(return_err,)), - Process(name=Settings.COSMIC['NAME'], target=Net.get_cosmic, args=(self.cosmic_token, return_err,)), - Process(name=Settings.FUSIONGDB2['NAME'], target=Net.get_fusiongdb2, args=(return_err,)) + Process( + name=Settings.MITELMAN["NAME"], + target=Net.get_mitelman, + args=(return_err,), + ), + Process( + name=Settings.COSMIC["NAME"], + target=Net.get_cosmic, + args=( + self.cosmic_token, + return_err, + ), + ), + Process( + name=Settings.FUSIONGDB2["NAME"], + target=Net.get_fusiongdb2, + args=(return_err,), + ), ] for process in processes: @@ -42,5 +55,5 @@ def __init__(self, params: Namespace): raise DownloadException(return_err) time.sleep(1) - Logger(__name__).info('Cleaning up the mess') + Logger(__name__).info("Cleaning up the mess") Net.clean() diff --git a/requirements-dev.txt b/requirements-dev.txt index de82461..93ff2e0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,2 @@ -flake8 -mypy==0.701 isort setuptools \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 3d4f796..2e7dfae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,10 @@ tqdm colorlog -python-rapidjson pyyaml>=4.2b1 Jinja2>=2.10 -MarkupSafe +MarkupSafe>=2.1.1 pandas +python-rapidjson +requests openpyxl xlrd >= 1.0.0 diff --git a/setup.cfg b/setup.cfg index 6f3b82f..b04e8dd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,12 +7,3 @@ lines_between_types=1 multi_line_output=7 use_parentheses=true known_third_party=colorlog,jinja2,rapidjson,setuptools,tqdm,yaml - -[flake8] -ignore = E226,E302,E41,W503,W291 -max-line-length = 120 -exclude = tests/* -max-complexity = 10 - -[mypy] -ignore_missing_imports = True diff --git a/setup.py b/setup.py index 23b76c2..490d462 100644 --- a/setup.py +++ b/setup.py @@ -3,49 +3,43 @@ from fusion_report.settings import Settings -PACKAGE_DATA = { - 'fusion_report': [ - 'data/schema/*.sql' - 'arguments.json', - 'templates/*/*' - ] -} +PACKAGE_DATA = {"fusion_report": ["data/schema/*.sql" "arguments.json", "templates/*/*"]} -with open('README.md') as f: +with open("README.md") as f: README = f.read() -with open('requirements.txt') as f: +with open("requirements.txt") as f: INSTALL_REQUIREMENTS = f.read().splitlines() setup( - name='fusion_report', + name="fusion_report", version=Settings.VERSION, - python_requires='>=3.6.*', - description='Tool for parsing outputs from fusion detection tools.', + python_requires=">=3.6", + description="Tool for parsing outputs from fusion detection tools.", long_description=README, - author='Martin Proks', - author_email='mproksik@gmail.com', - url='https://github.com/matq007/fusion-report', - license='GPL_v3', - packages=find_packages(exclude=('tests', 'docs')), + author="Martin Proks", + author_email="mproksik@gmail.com", + url="https://github.com/matq007/fusion-report", + license="GPL_v3", + packages=find_packages(exclude=("tests", "docs")), install_requires=INSTALL_REQUIREMENTS, zip_safe=False, - scripts=['bin/fusion_report'], + scripts=["bin/fusion_report"], classifiers=[ - 'Natural Language :: English', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: POSIX', - 'Operating System :: Unix', - 'Programming Language :: Python', - 'Programming Language :: JavaScript', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - 'Topic :: Scientific/Engineering :: Visualization', + "Natural Language :: English", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Operating System :: MacOS :: MacOS X", + "Operating System :: POSIX", + "Operating System :: Unix", + "Programming Language :: Python", + "Programming Language :: JavaScript", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Visualization", ], package_data=PACKAGE_DATA, - include_package_data=True + include_package_data=True, )