Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge dev #508

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
ci:
autoupdate_commit_msg: "chore: pre-commit autoupdate"
autofix_commit_msg: "chore: pre-commit auto fixes [...]"
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.13
rev: v0.2.2
hooks:
- id: ruff
args:
Expand Down Expand Up @@ -29,7 +32,7 @@ repos:
- id: debug-statements
- id: check-docstring-first
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.33.0
rev: v1.35.1
hooks:
- id: yamllint

Expand Down Expand Up @@ -93,6 +96,6 @@ repos:
- id: beautysh

- repo: https://github.com/jsh9/pydoclint
rev: 0.3.8
rev: 0.4.1
hooks:
- id: pydoclint
1 change: 1 addition & 0 deletions config/step/ot_finngen_finemapping_ingestion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ defaults:

finngen_finemapping_results_path: ${datasets.finngen_finemapping_results_path}
finngen_finemapping_summaries_path: ${datasets.finngen_finemapping_summaries_path}
finngen_release_prefix: ${datasets.finngen_release_prefix}
finngen_finemapping_out: ${datasets.finngen_finemapping_out}
2 changes: 1 addition & 1 deletion docs/development/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ For more details on each of these steps, see the sections below.
### Tests

- Test study fixture in `tests/conftest.py` (example: `mock_study_index_finngen` in that module)
- Test sample data in `tests/data_samples` (example: `tests/data_samples/finngen_studies_sample.json`)
- Test sample data in `tests/data_samples` (example: `tests/gentropy/data_samples/finngen_studies_sample.json`)
- Test definition in `tests/` (example: `tests/dataset/test_study_index.py` → `test_study_index_finngen_creation`)
6 changes: 3 additions & 3 deletions docs/src_snippets/howto/python_api/b_create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def create_from_parquet(session: Session) -> SummaryStatistics:

# --8<-- [end:create_from_parquet_import]

path = "tests/data_samples/sumstats_sample/GCST005523_chr18.parquet"
path = "tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet"
# --8<-- [start:create_from_parquet]
summary_stats = SummaryStatistics.from_parquet(session, path)
# --8<-- [end:create_from_parquet]
Expand All @@ -31,7 +31,7 @@ def create_from_source(session: Session) -> SummaryStatistics:
from gentropy.datasource.finngen.summary_stats import FinnGenSummaryStats

# --8<-- [end:create_from_source_import]
path = "tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz"
path = "tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz"
# --8<-- [start:create_from_source]
summary_stats = FinnGenSummaryStats.from_source(session.spark, path)
# --8<-- [end:create_from_source]
Expand All @@ -46,7 +46,7 @@ def create_from_pandas() -> SummaryStatistics:

# --8<-- [end:create_from_pandas_import]

path = "tests/data_samples/sumstats_sample/GCST005523_chr18.parquet"
path = "tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet"
custom_summary_stats_pandas_df = ps.read_parquet(path)
# --8<-- [start:create_from_pandas]

Expand Down
90 changes: 54 additions & 36 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ pytest-cov = "^4.1.0"
pytest-sugar = ">=0.9.5,<1.1.0"
dbldatagen = "^0.3.1"
pyparsing = "^3.1.1"
pytest = "^7.4.4"
pytest = ">=7.4.4,<9.0.0"
pytest-xdist = "^3.5.0"


Expand All @@ -72,7 +72,7 @@ ipykernel = "^6.28.0"
google-cloud-dataproc = "^5.8.0"
apache-airflow = "^2.8.0"
apache-airflow-providers-google = "^10.13.1"
pydoclint = "^0.3.8"
pydoclint = ">=0.3.8,<0.5.0"
prettier = "^0.0.7"
deptry = "^0.12.0"
python-semantic-release = ">=8.7,<10.0"
Expand Down Expand Up @@ -127,6 +127,7 @@ exclude = ["dist"]
[tool.pytest.ini_options]
addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml"
pythonpath = [".", "./src/airflow/dags"]
testpaths = ["tests/gentropy", "src/gentropy/"]

# Semi-strict mode for mypy
[tool.mypy]
Expand Down
13 changes: 13 additions & 0 deletions src/airflow/dags/finngen_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
LD_CLUMPED = f"{FINNGEN_BUCKET}/study_locus_datasets/finngen_ld_clumped"
PICSED_CREDIBLE_SET = f"{FINNGEN_BUCKET}/credible_set_datasets/finngen_pics"

FINNGEN_FINEMAPPING = (
"gs://genetics_etl_python_playground/input/Finngen_susie_finemapping_r10/full"
)
FINNGEN_FM_SUMMARIES = "gs://genetics_etl_python_playground/input/Finngen_susie_finemapping_r10/Finngen_susie_credset_summary_r10.tsv"
FINNGEN_PREFIX = "FINNGEN_R10_"
FINNGEN_FM_OUT = "gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/finngen_susie_processed"

with DAG(
dag_id=Path(__file__).stem,
description="Open Targets Genetics — Finngen preprocess",
Expand All @@ -31,6 +38,12 @@
cluster_name=CLUSTER_NAME,
step_id="ot_finngen_finemapping_ingestion",
task_id="finngen_finemapping_ingestion",
other_args=[
f"step.finngen_finemapping_out={FINNGEN_FM_OUT}",
f"step.finngen_release_prefix={FINNGEN_PREFIX}",
f"step.finngen_finemapping_results_path={FINNGEN_FINEMAPPING}",
f"step.finngen_finemapping_summaries_path={FINNGEN_FM_SUMMARIES}",
],
# This allows to attempt running the task when above step fails do to failifexists
trigger_rule=TriggerRule.ALL_DONE,
)
Expand Down
20 changes: 20 additions & 0 deletions src/gentropy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,19 @@ class FinngenSumstatPreprocessConfig(StepConfig):
_target_: str = "gentropy.finngen_sumstat_preprocess.FinnGenSumstatPreprocessStep"


@dataclass
class FinngenFinemappingConfig(StepConfig):
"""FinnGen fine mapping ingestion step configuration."""

finngen_finemapping_results_path: str = MISSING
finngen_finemapping_summaries_path: str = MISSING
finngen_release_prefix: str = MISSING
finngen_finemapping_out: str = MISSING
_target_: str = (
"gentropy.finngen_finemapping_ingestion.FinnGenFinemappingIngestionStep"
)


@dataclass
class LDIndexConfig(StepConfig):
"""LD index step configuration."""
Expand Down Expand Up @@ -353,6 +366,13 @@ def register_config() -> None:
name="finngen_sumstat_preprocess",
node=FinngenSumstatPreprocessConfig,
)

cs.store(
group="step",
name="finngen_finemapping_ingestion",
node=FinngenFinemappingConfig,
)

cs.store(group="step", name="pics", node=PICSConfig)
cs.store(group="step", name="variant_annotation", node=VariantAnnotationConfig)
cs.store(group="step", name="variant_index", node=VariantIndexConfig)
Expand Down
Loading
Loading