Skip to content

Commit

Permalink
feat: add qc step (#675)
Browse files Browse the repository at this point in the history
* feat: add qc step

* fix: remove .df

* fix: fix in name

* fix: fix v3

* Update src/gentropy/sumstat_qc_step.py

Co-authored-by: Daniel Suveges <[email protected]>

* Update src/gentropy/sumstat_qc_step.py

Co-authored-by: Daniel Suveges <[email protected]>

* fix: optimisation of code

---------

Co-authored-by: Daniel Suveges <[email protected]>
  • Loading branch information
addramir and DSuveges authored Jul 9, 2024
1 parent 5f97232 commit 4a5f69f
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/gentropy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,16 @@ class FinemapperConfig(StepConfig):
_target_: str = "gentropy.susie_finemapper.SusieFineMapperStep"


@dataclass
class GWASQCStep(StepConfig):
"""GWAS QC step configuration."""

gwas_path: str = MISSING
output_path: str = MISSING
studyid: str = MISSING
_target_: str = "gentropy.sumstat_qc_step.SummaryStatisticsQCStep"


@dataclass
class Config:
"""Application configuration."""
Expand Down
37 changes: 37 additions & 0 deletions src/gentropy/sumstat_qc_step.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Step to calculate quality control metrics on the provided GWAS study."""

from __future__ import annotations

from gentropy.common.session import Session
from gentropy.dataset.summary_statistics import SummaryStatistics
from gentropy.method.sumstat_quality_controls import SummaryStatisticsQC


class SummaryStatisticsQCStep:
"""Step to run GWAS QC."""

def __init__(
self,
session: Session,
gwas_path: str,
output_path: str,
studyid: str,
) -> None:
"""Calculating quality control metrics on the provided GWAS study.
Args:
session (Session): Spark session
gwas_path (str): Path to the GWAS summary statistics.
output_path (str): Output path for the QC results.
studyid (str): Study ID for the QC.
"""
gwas = SummaryStatistics.from_parquet(session, path=gwas_path)

(
SummaryStatisticsQC.get_quality_control_metrics(
gwas=gwas, limit=100_000_000, min_count=100, n_total=100000
)
.write.mode(session.write_mode)
.parquet(output_path + "/qc_results_" + studyid)
)

0 comments on commit 4a5f69f

Please sign in to comment.