From dc70bd8e483053d96159ff7229d4b6c7f16fd311 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 18 Jun 2024 09:44:21 +0100 Subject: [PATCH] refactor: delete unnecessary config files (#647) * refactor: remove ot_pics * refactor: gwas_catalog_sumstat_preprocess config removed * refactor: ot_finngen_studies removed * refactor: ot_finngen_studies removed * refactor: window_based_clumping cleanup --- config/step/ot_finngen_studies.yaml | 4 ---- config/step/ot_finngen_sumstat_preprocess.yaml | 5 ----- config/step/ot_gwas_catalog_sumstat_preprocess.yaml | 5 ----- config/step/ot_pics.yaml | 5 ----- config/step/ot_window_based_clumping.yaml | 7 ------- src/airflow/dags/finngen_harmonisation.py | 3 ++- src/airflow/dags/finngen_preprocess.py | 2 +- src/airflow/dags/gwas_catalog_harmonisation.py | 3 ++- src/airflow/dags/gwas_catalog_preprocess.py | 7 ++++--- src/gentropy/config.py | 2 +- 10 files changed, 10 insertions(+), 33 deletions(-) delete mode 100644 config/step/ot_finngen_studies.yaml delete mode 100644 config/step/ot_finngen_sumstat_preprocess.yaml delete mode 100644 config/step/ot_gwas_catalog_sumstat_preprocess.yaml delete mode 100644 config/step/ot_pics.yaml delete mode 100644 config/step/ot_window_based_clumping.yaml diff --git a/config/step/ot_finngen_studies.yaml b/config/step/ot_finngen_studies.yaml deleted file mode 100644 index c2657bbf5..000000000 --- a/config/step/ot_finngen_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: - - finngen_studies - -finngen_study_index_out: ??? diff --git a/config/step/ot_finngen_sumstat_preprocess.yaml b/config/step/ot_finngen_sumstat_preprocess.yaml deleted file mode 100644 index ad0e93a09..000000000 --- a/config/step/ot_finngen_sumstat_preprocess.yaml +++ /dev/null @@ -1,5 +0,0 @@ -defaults: - - finngen_sumstat_preprocess - -raw_sumstats_path: ??? -out_sumstats_path: ??? diff --git a/config/step/ot_gwas_catalog_sumstat_preprocess.yaml b/config/step/ot_gwas_catalog_sumstat_preprocess.yaml deleted file mode 100644 index d0c936807..000000000 --- a/config/step/ot_gwas_catalog_sumstat_preprocess.yaml +++ /dev/null @@ -1,5 +0,0 @@ -defaults: - - gwas_catalog_sumstat_preprocess - -raw_sumstats_path: ??? -out_sumstats_path: ??? diff --git a/config/step/ot_pics.yaml b/config/step/ot_pics.yaml deleted file mode 100644 index 851c4ca06..000000000 --- a/config/step/ot_pics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -defaults: - - pics - -study_locus_ld_annotated_in: ??? -picsed_study_locus_out: ??? diff --git a/config/step/ot_window_based_clumping.yaml b/config/step/ot_window_based_clumping.yaml deleted file mode 100644 index d9a2dbd80..000000000 --- a/config/step/ot_window_based_clumping.yaml +++ /dev/null @@ -1,7 +0,0 @@ -defaults: - - window_based_clumping - -summary_statistics_input_path: ??? -study_locus_output_path: ??? -inclusion_list_path: ??? -gwas_significance: 1e-8 diff --git a/src/airflow/dags/finngen_harmonisation.py b/src/airflow/dags/finngen_harmonisation.py index e2f2d1b4a..b40561fd9 100644 --- a/src/airflow/dags/finngen_harmonisation.py +++ b/src/airflow/dags/finngen_harmonisation.py @@ -1,4 +1,5 @@ """Airflow DAG for the harmonisation part of the pipeline.""" + from __future__ import annotations import re @@ -53,7 +54,7 @@ def submit_jobs(**kwargs: Any) -> None: print("Submitting job for study: ", study_id) # noqa: T201 common.submit_pyspark_job_no_operator( cluster_name=CLUSTER_NAME, - step_id="ot_finngen_sumstat_preprocess", + step_id="finngen_sumstat_preprocess", other_args=[ f"step.raw_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{input_path}", f"step.out_sumstats_path={SUMSTATS_PARQUET}/{study_id}.parquet", diff --git a/src/airflow/dags/finngen_preprocess.py b/src/airflow/dags/finngen_preprocess.py index 41b10e58a..3a60a0cfd 100644 --- a/src/airflow/dags/finngen_preprocess.py +++ b/src/airflow/dags/finngen_preprocess.py @@ -53,7 +53,7 @@ ) as finngen_summary_stats_preprocess: study_index = common.submit_step( cluster_name=CLUSTER_NAME, - step_id="ot_finngen_studies", + step_id="finngen_studies", task_id="finngen_studies", other_args=[ f"step.finngen_study_index_out={STUDY_INDEX}", diff --git a/src/airflow/dags/gwas_catalog_harmonisation.py b/src/airflow/dags/gwas_catalog_harmonisation.py index 5713e223d..25970fa8a 100644 --- a/src/airflow/dags/gwas_catalog_harmonisation.py +++ b/src/airflow/dags/gwas_catalog_harmonisation.py @@ -1,4 +1,5 @@ """Airflow DAG for the harmonisation part of the pipeline.""" + from __future__ import annotations import re @@ -99,7 +100,7 @@ def submit_jobs(**kwargs: Any) -> None: print("Submitting job for study: ", study_id) # noqa: T201 common.submit_pyspark_job_no_operator( cluster_name=CLUSTER_NAME, - step_id="ot_gwas_catalog_sumstat_preprocess", + step_id="gwas_catalog_sumstat_preprocess", other_args=[ f"step.raw_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{input_path}", f"step.out_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{HARMONISED_SUMMARY_STATISTICS_PREFIX}/{study_id}.parquet", diff --git a/src/airflow/dags/gwas_catalog_preprocess.py b/src/airflow/dags/gwas_catalog_preprocess.py index 1814ddf2d..c4722b83b 100644 --- a/src/airflow/dags/gwas_catalog_preprocess.py +++ b/src/airflow/dags/gwas_catalog_preprocess.py @@ -1,4 +1,5 @@ """Airflow DAG for the preprocessing of GWAS Catalog's harmonised summary statistics and curated associations.""" + from __future__ import annotations from pathlib import Path @@ -131,7 +132,7 @@ def upload_harmonized_study_list( # Do PICS based finemapping: curation_pics = common.submit_step( cluster_name=CLUSTER_NAME, - step_id="ot_pics", + step_id="pics", task_id="catalog_curation_pics", other_args=[ f"step.study_locus_ld_annotated_in={CURATED_LD_CLUMPED}", @@ -167,7 +168,7 @@ def upload_harmonized_study_list( # Run window-based clumping: summary_stats_window_based_clumping = common.submit_step( cluster_name=CLUSTER_NAME, - step_id="ot_window_based_clumping", + step_id="window_based_clumping", task_id="catalog_sumstats_window_clumping", other_args=[ f"step.summary_statistics_input_path=gs://{GWAS_CATALOG_BUCKET_NAME}/{HARMONISED_SUMSTATS_PREFIX}", @@ -191,7 +192,7 @@ def upload_harmonized_study_list( # Run PICS finemapping: summary_stats_pics = common.submit_step( cluster_name=CLUSTER_NAME, - step_id="ot_pics", + step_id="pics", task_id="catalog_sumstats_pics", other_args=[ f"step.study_locus_ld_annotated_in={LD_BASED_CLUMPED}", diff --git a/src/gentropy/config.py b/src/gentropy/config.py index d80c5397c..29ef5a48f 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -380,7 +380,7 @@ class WindowBasedClumpingStepConfig(StepConfig): ) summary_statistics_input_path: str = MISSING study_locus_output_path: str = MISSING - gwas_significance: float = 5e-8 + gwas_significance: float = 1e-8 distance: int = 500_000 collect_locus: bool = False collect_locus_distance: int = 500_000