Skip to content

Commit

Permalink
refactor: delete unnecessary config files (#647)
Browse files Browse the repository at this point in the history
* refactor: remove ot_pics

* refactor: gwas_catalog_sumstat_preprocess config removed

* refactor: ot_finngen_studies removed

* refactor: ot_finngen_studies removed

* refactor: window_based_clumping cleanup
  • Loading branch information
d0choa authored Jun 18, 2024
1 parent d796b68 commit dc70bd8
Show file tree
Hide file tree
Showing 10 changed files with 10 additions and 33 deletions.
4 changes: 0 additions & 4 deletions config/step/ot_finngen_studies.yaml

This file was deleted.

5 changes: 0 additions & 5 deletions config/step/ot_finngen_sumstat_preprocess.yaml

This file was deleted.

5 changes: 0 additions & 5 deletions config/step/ot_gwas_catalog_sumstat_preprocess.yaml

This file was deleted.

5 changes: 0 additions & 5 deletions config/step/ot_pics.yaml

This file was deleted.

7 changes: 0 additions & 7 deletions config/step/ot_window_based_clumping.yaml

This file was deleted.

3 changes: 2 additions & 1 deletion src/airflow/dags/finngen_harmonisation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Airflow DAG for the harmonisation part of the pipeline."""

from __future__ import annotations

import re
Expand Down Expand Up @@ -53,7 +54,7 @@ def submit_jobs(**kwargs: Any) -> None:
print("Submitting job for study: ", study_id) # noqa: T201
common.submit_pyspark_job_no_operator(
cluster_name=CLUSTER_NAME,
step_id="ot_finngen_sumstat_preprocess",
step_id="finngen_sumstat_preprocess",
other_args=[
f"step.raw_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{input_path}",
f"step.out_sumstats_path={SUMSTATS_PARQUET}/{study_id}.parquet",
Expand Down
2 changes: 1 addition & 1 deletion src/airflow/dags/finngen_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
) as finngen_summary_stats_preprocess:
study_index = common.submit_step(
cluster_name=CLUSTER_NAME,
step_id="ot_finngen_studies",
step_id="finngen_studies",
task_id="finngen_studies",
other_args=[
f"step.finngen_study_index_out={STUDY_INDEX}",
Expand Down
3 changes: 2 additions & 1 deletion src/airflow/dags/gwas_catalog_harmonisation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Airflow DAG for the harmonisation part of the pipeline."""

from __future__ import annotations

import re
Expand Down Expand Up @@ -99,7 +100,7 @@ def submit_jobs(**kwargs: Any) -> None:
print("Submitting job for study: ", study_id) # noqa: T201
common.submit_pyspark_job_no_operator(
cluster_name=CLUSTER_NAME,
step_id="ot_gwas_catalog_sumstat_preprocess",
step_id="gwas_catalog_sumstat_preprocess",
other_args=[
f"step.raw_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{input_path}",
f"step.out_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{HARMONISED_SUMMARY_STATISTICS_PREFIX}/{study_id}.parquet",
Expand Down
7 changes: 4 additions & 3 deletions src/airflow/dags/gwas_catalog_preprocess.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Airflow DAG for the preprocessing of GWAS Catalog's harmonised summary statistics and curated associations."""

from __future__ import annotations

from pathlib import Path
Expand Down Expand Up @@ -131,7 +132,7 @@ def upload_harmonized_study_list(
# Do PICS based finemapping:
curation_pics = common.submit_step(
cluster_name=CLUSTER_NAME,
step_id="ot_pics",
step_id="pics",
task_id="catalog_curation_pics",
other_args=[
f"step.study_locus_ld_annotated_in={CURATED_LD_CLUMPED}",
Expand Down Expand Up @@ -167,7 +168,7 @@ def upload_harmonized_study_list(
# Run window-based clumping:
summary_stats_window_based_clumping = common.submit_step(
cluster_name=CLUSTER_NAME,
step_id="ot_window_based_clumping",
step_id="window_based_clumping",
task_id="catalog_sumstats_window_clumping",
other_args=[
f"step.summary_statistics_input_path=gs://{GWAS_CATALOG_BUCKET_NAME}/{HARMONISED_SUMSTATS_PREFIX}",
Expand All @@ -191,7 +192,7 @@ def upload_harmonized_study_list(
# Run PICS finemapping:
summary_stats_pics = common.submit_step(
cluster_name=CLUSTER_NAME,
step_id="ot_pics",
step_id="pics",
task_id="catalog_sumstats_pics",
other_args=[
f"step.study_locus_ld_annotated_in={LD_BASED_CLUMPED}",
Expand Down
2 changes: 1 addition & 1 deletion src/gentropy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ class WindowBasedClumpingStepConfig(StepConfig):
)
summary_statistics_input_path: str = MISSING
study_locus_output_path: str = MISSING
gwas_significance: float = 5e-8
gwas_significance: float = 1e-8
distance: int = 500_000
collect_locus: bool = False
collect_locus_distance: int = 500_000
Expand Down

0 comments on commit dc70bd8

Please sign in to comment.