Skip to content

Commit

Permalink
chore: merge dev
Browse files Browse the repository at this point in the history
  • Loading branch information
d0choa committed Jan 18, 2024
2 parents 530a931 + 84f794d commit f2fe3d0
Show file tree
Hide file tree
Showing 116 changed files with 873 additions and 270 deletions.
29 changes: 25 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,30 @@
[![status: experimental](https://github.com/GIScience/badges/raw/master/status/experimental.svg)](https://github.com/GIScience/badges#experimental)
[![docs](https://github.com/opentargets/gentropy/actions/workflows/docs.yaml/badge.svg)](https://opentargets.github.io/gentropy/)
[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
[![image](https://github.com/opentargets/gentropy/actions/workflows/release.yaml/badge.svg)](https://opentargets.github.io/gentropy/)
[![codecov](https://codecov.io/gh/opentargets/gentropy/branch/main/graph/badge.svg?token=5ixzgu8KFP)](https://codecov.io/gh/opentargets/gentropy)
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/opentargets/gentropy/main.svg)](https://results.pre-commit.ci/badge/github/opentargets/gentropy)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10527086.svg)](https://doi.org/10.5281/zenodo.10527086)

# Genetics Portal Data Pipeline (experimental)
<p align="center">
<img width=100% height=250px src="https://raw.githubusercontent.com/opentargets/gentropy/dev/docs/assets/imgs/gentropy.svg">
</p>

Open Targets Gentropy is a Python package to facilitate the interpretation and analysis of GWAS and functional genomic studies for target identification. The package contains a toolkit for the harmonisation, statistical analysis and prioritisation of genetic signals to assist drug discovery.

## Installation

We recommend to install Open Targets Gentropy using Pypi:

```bash
pip install gentropy
```

For alternative ways to install the package visit the [Documentation](https://opentargets.github.io/gentropy/installation/)

## References

- [Documentation](https://opentargets.github.io/gentropy/)
- [Issue tracker](https://github.com/opentargets/issues/issues)

## About Open Targets

Open Targets is a pre-competitive, public-private partnership that uses human genetics and genomics data to systematically identify and prioritise drug targets. Through large-scale genomic experiments and the development of innovative computational techniques, the partnership aims to help researchers select the best targets for the development of new therapies. For more information, visit the Open Targets [website](https://www.opentargets.org).
60 changes: 37 additions & 23 deletions config/datasets/ot_gcp.yaml
Original file line number Diff line number Diff line change
@@ -1,34 +1,47 @@
# Release specific configuration:
release_version: "24.01"
dev_version: XX.XX
release_folder: gs://genetics_etl_python_playground/releases/${datasets.release_version}

inputs: gs://genetics_etl_python_playground/input
outputs: gs://genetics_etl_python_playground/output/python_etl/parquet/${datasets.version}
static_assets: gs://genetics_etl_python_playground/static_assetss
outputs: gs://genetics_etl_python_playground/output/python_etl/parquet/${datasets.dev_version}

## Datasets:
gwas_catalog_dataset: gs://gwas_catalog_data
# Ingestion input files:
gwas_catalog_associations: ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_associations_ontology_annotated.tsv
gwas_catalog_studies:
- ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_download_studies.tsv
- ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_unpublished_studies.tsv
gwas_catalog_ancestries:
- ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_download_ancestries.tsv
- ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_unpublished_ancestries.tsv
gwas_catalog_sumstats_lut: ${datasets.gwas_catalog_dataset}/curated_inputs/harmonised_list.txt
gwas_catalog_study_curation: ${datasets.gwas_catalog_dataset}/manifests/gwas_catalog_study_curation.tsv
# Harmonised summary statistics list:
gwas_catalog_summary_stats_list: ${datasets.gwas_catalog_dataset}/manifests/gwas_catalog_harmonised_sumstats_list.txt
# Inclusion lists:
gwas_catalog_curated_inclusion_list: ${datasets.gwas_catalog_dataset}/manifests/gwas_catalog_curated_included_studies
gwas_catalog_summary_satistics_inclusion_list: ${datasets.gwas_catalog_dataset}/manifests/gwas_catalog_summary_statistics_included_studies
# Ingestion output folders:
gwas_catalog_study_index: ${datasets.gwas_catalog_dataset}/study_index
gwas_catalog_study_locus_folder: ${datasets.gwas_catalog_dataset}/study_locus_datasets
gwas_catalog_credible_set_folder: ${datasets.gwas_catalog_dataset}/credible_set_datasets

# Input datasets
chain_37_38: ${datasets.inputs}/v2g_input/grch37_to_grch38.over.chain
target_index: ${datasets.inputs}/v2g_input/targets_correct_tss
vep_consequences: gs://genetics-portal-data/lut/vep_consequences.tsv
anderson: gs://genetics-portal-input/v2g_input/andersson2014/enhancer_tss_associations.bed
javierre: gs://genetics-portal-input/v2g_input/javierre_2016_preprocessed.parquet
jung: gs://genetics-portal-raw/pchic_jung2019/jung2019_pchic_tableS3.csv
thurman: gs://genetics-portal-input/v2g_input/thurman2012/genomewideCorrs_above0.7_promoterPlusMinus500kb_withGeneNames_32celltypeCategories.bed8.gz
catalog_associations: ${datasets.inputs}/v2d/gwas_catalog_v1.0.2-associations_e110_r2023-12-21.tsv
catalog_studies:
# To get a complete representation of all GWAS Catalog studies, we need to
# ingest the list of unpublished studies from a different file.
- ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-studies-r2023-12-21.tsv
- ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-unpublished-studies-r2023-12-21.tsv
catalog_ancestries:
- ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-ancestries-r2023-12-21.tsv
- ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-unpublished-ancestries-r2023-12-21.tsv
catalog_sumstats_lut: ${datasets.inputs}/v2d/harmonised_list-r2023-12-21.txt

gene_interactions: ${datasets.inputs}/l2g/interaction # 23.09 data
chain_37_38: ${datasets.static_assets}/grch37_to_grch38.over.chain
vep_consequences: ${datasets.static_assets}/vep_consequences.tsv
anderson: ${datasets.static_assets}/andersson2014/enhancer_tss_associations.bed
javierre: ${datasets.static_assets}/javierre_2016_preprocessed
jung: ${datasets.static_assets}/jung2019_pchic_tableS3.csv
thurman: ${datasets.static_assets}/thurman2012/genomewideCorrs_above0.7_promoterPlusMinus500kb_withGeneNames_32celltypeCategories.bed8.gz
target_index: ${datasets.release_folder}/targets # OTP 23.12 data

gene_interactions: ${datasets.release_folder}/interaction # OTP 23.12 data
eqtl_catalogue_paths_imported: ${datasets.inputs}/preprocess/eqtl_catalogue/tabix_ftp_paths_imported.tsv

# Output datasets
gene_index: ${datasets.outputs}/gene_index
# Dev output datasets
variant_annotation: ${datasets.outputs}/variant_annotation
study_locus: ${datasets.outputs}/study_locus
summary_statistics: ${datasets.outputs}/summary_statistics
Expand All @@ -37,7 +50,7 @@ study_locus_overlap: ${datasets.outputs}/study_locus_overlap
ld_index: ${datasets.outputs}/ld_index
catalog_study_index: ${datasets.study_index}/catalog
catalog_study_locus: ${datasets.study_locus}/catalog_study_locus
gwas_catalog_study_curation: ${datasets.inputs}/v2d/GWAS_Catalog_study_curation.tsv

finngen_study_index: ${datasets.study_index}/finngen
finngen_summary_stats: ${datasets.summary_statistics}/finngen
from_sumstats_study_locus: ${datasets.study_locus}/from_sumstats
Expand All @@ -51,4 +64,5 @@ colocalisation: ${datasets.release_folder}/colocalisation
study_index: ${datasets.release_folder}/study_index
variant_index: ${datasets.release_folder}/variant_index
credible_set: ${datasets.release_folder}/credible_set
gene_index: ${datasets.release_folder}/gene_index
v2g: ${datasets.release_folder}/variant_to_gene
2 changes: 1 addition & 1 deletion config/step/ot_colocalisation.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
defaults:
- colocalisation

credible_set_path: ${datasets.study_locus}
credible_set_path: ${datasets.credible_set}
study_index_path: ${datasets.study_index}
coloc_path: ${datasets.colocalisation}
14 changes: 7 additions & 7 deletions config/step/ot_gwas_catalog_ingestion.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
defaults:
- gwas_catalog_ingestion

catalog_study_files: ${datasets.catalog_studies}
catalog_ancestry_files: ${datasets.catalog_ancestries}
catalog_associations_file: ${datasets.catalog_associations}
catalog_sumstats_lut: ${datasets.catalog_sumstats_lut}
catalog_study_files: ${datasets.gwas_catalog_studies}
catalog_ancestry_files: ${datasets.gwas_catalog_ancestries}
catalog_associations_file: ${datasets.gwas_catalog_associations}
catalog_sumstats_lut: ${datasets.gwas_catalog_sumstats_lut}
variant_annotation_path: ${datasets.variant_annotation}
catalog_studies_out: ${datasets.catalog_study_index}
catalog_associations_out: ${datasets.catalog_study_locus}
catalog_studies_out: ${datasets.gwas_catalog_study_index}
catalog_associations_out: ${datasets.gwas_catalog_study_locus_folder}/gwas_catalog_curated_associations
gwas_catalog_study_curation_file: ${datasets.gwas_catalog_study_curation}
inclusion_list_path: ???
inclusion_list_path: ${datasets.gwas_catalog_curated_inclusion_list}
6 changes: 3 additions & 3 deletions config/step/ot_gwas_catalog_study_curation.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
defaults:
- gwas_catalog_study_curation

catalog_study_files: ${datasets.catalog_studies}
catalog_ancestry_files: ${datasets.catalog_ancestries}
catalog_sumstats_lut: ${datasets.catalog_sumstats_lut}
catalog_study_files: ${datasets.gwas_catalog_studies}
catalog_ancestry_files: ${datasets.gwas_catalog_ancestries}
catalog_sumstats_lut: ${datasets.gwas_catalog_sumstats_lut}
gwas_catalog_study_curation_file: ${datasets.gwas_catalog_study_curation}
gwas_catalog_study_curation_out: ???
8 changes: 4 additions & 4 deletions config/step/ot_gwas_catalog_study_inclusion.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
defaults:
- gwas_catalog_study_inclusion

catalog_study_files: ${datasets.catalog_studies}
catalog_ancestry_files: ${datasets.catalog_ancestries}
catalog_associations_file: ${datasets.catalog_associations}
catalog_study_files: ${datasets.gwas_catalog_studies}
catalog_ancestry_files: ${datasets.gwas_catalog_ancestries}
catalog_associations_file: ${datasets.gwas_catalog_associations}
variant_annotation_path: ${datasets.variant_annotation}
gwas_catalog_study_curation_file: ${datasets.gwas_catalog_study_curation}
harmonised_study_file: ???
harmonised_study_file: ${datasets.gwas_catalog_summary_stats_list}
criteria: ???
inclusion_list_path: ???
exclusion_list_path: ???
2 changes: 1 addition & 1 deletion config/step/ot_ld_based_clumping.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defaults:
- ld_based_clumping

ld_index_path: ${datasets.ld_index}
study_locus_input_path: ???
ld_index_path: ???
study_index_path: ???
clumped_study_locus_output_path: ???
10 changes: 10 additions & 0 deletions config/step/ot_locus_to_gene_predict.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
- locus_to_gene

run_mode: predict
model_path: ${datasets.l2g_model}
predictions_path: ${datasets.l2g_predictions}
credible_set_path: ${datasets.credible_set}
variant_gene_path: ${datasets.v2g}
colocalisation_path: ${datasets.colocalisation}
study_index_path: ${datasets.study_index}
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit f2fe3d0

Please sign in to comment.