Skip to content

Commit

Permalink
feat: exclude region for StudyLocus object (#646)
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel-Considine authored Jun 17, 2024
1 parent 79a6cb5 commit d796b68
Showing 1 changed file with 36 additions and 1 deletion.
37 changes: 36 additions & 1 deletion src/gentropy/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
calculate_neglog_pvalue,
order_array_of_structs_by_field,
)
from gentropy.common.utils import get_logsum
from gentropy.common.utils import get_logsum, parse_region
from gentropy.dataset.dataset import Dataset
from gentropy.dataset.study_locus_overlap import StudyLocusOverlap
from gentropy.method.clump import LDclumping
Expand Down Expand Up @@ -574,6 +574,41 @@ def clump(self: StudyLocus) -> StudyLocus:
)
return self

def exclude_region(
self: StudyLocus, region: str, exclude_overlap: bool = False
) -> StudyLocus:
"""Exclude a region from the StudyLocus dataset.
Args:
region (str): region given in "chr##:#####-####" format
exclude_overlap (bool): If True, excludes StudyLocus windows with any overlap with the region.
Returns:
StudyLocus: filtered StudyLocus object.
"""
(chromosome, start_position, end_position) = parse_region(region)
if exclude_overlap:
filter_condition = ~(
(f.col("chromosome") == chromosome)
& (
(f.col("locusStart") <= end_position)
& (f.col("locusEnd") >= start_position)
)
)
else:
filter_condition = ~(
(f.col("chromosome") == chromosome)
& (
(f.col("position") >= start_position)
& (f.col("position") <= end_position)
)
)

return StudyLocus(
_df=self.df.filter(filter_condition),
_schema=StudyLocus.get_schema(),
)

def _qc_no_population(self: StudyLocus) -> StudyLocus:
"""Flag associations where the study doesn't have population information to resolve LD.
Expand Down

0 comments on commit d796b68

Please sign in to comment.