Skip to content

Commit

Permalink
test: adapt TestCommonProteinCodingFeatureLogic
Browse files Browse the repository at this point in the history
  • Loading branch information
ireneisdoomed committed Dec 13, 2024
1 parent b270c9e commit f507fe1
Showing 1 changed file with 52 additions and 32 deletions.
84 changes: 52 additions & 32 deletions tests/gentropy/dataset/test_l2g_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,11 @@ def sample_variant_index_schema() -> StructType:
ArrayType(
StructType(
[
StructField("distanceFromFootprint", LongType(), True),
StructField("distanceFromTss", LongType(), True),
StructField("targetId", StringType(), True),
StructField("isEnsemblCanonical", BooleanType(), True),
StructField("biotype", StringType(), True),
]
)
),
Expand Down Expand Up @@ -624,13 +626,17 @@ def _setup(
[
{
"distanceFromTss": 10,
"distanceFromFootprint": 0,
"targetId": "gene1",
"isEnsemblCanonical": True,
"biotype": "protein_coding",
},
{
"distanceFromTss": 2,
"distanceFromFootprint": 0,
"targetId": "gene2",
"isEnsemblCanonical": True,
"biotype": "protein_coding",
},
],
),
Expand All @@ -643,8 +649,10 @@ def _setup(
[
{
"distanceFromTss": 5,
"distanceFromFootprint": 0,
"targetId": "gene1",
"isEnsemblCanonical": True,
"biotype": "protein_coding",
},
],
),
Expand Down Expand Up @@ -928,9 +936,8 @@ class TestCommonProteinCodingFeatureLogic:
[
(
[
{"studyLocusId": "1", "geneId": "gene1", "isProteinCoding500kb": 1},
{"studyLocusId": "1", "geneId": "gene2", "isProteinCoding500kb": 1},
{"studyLocusId": "1", "geneId": "gene3", "isProteinCoding500kb": 0},
{"studyLocusId": "1", "geneId": "gene1", "isProteinCoding": 1.0},
{"studyLocusId": "1", "geneId": "gene2", "isProteinCoding": 0.0},
]
),
],
Expand All @@ -944,25 +951,28 @@ def test_is_protein_coding_feature_logic(
observed_df = (
is_protein_coding_feature_logic(
study_loci_to_annotate=self.sample_study_locus,
gene_index=self.sample_gene_index,
feature_name="isProteinCoding500kb",
genomic_window=500000,
variant_index=self.sample_variant_index,
feature_name="isProteinCoding",
)
.select("studyLocusId", "geneId", "isProteinCoding500kb")
.select("studyLocusId", "geneId", "isProteinCoding")
.orderBy("studyLocusId", "geneId")
)

expected_df = (
spark.createDataFrame(expected_data)
.select("studyLocusId", "geneId", "isProteinCoding500kb")
.select("studyLocusId", "geneId", "isProteinCoding")
.orderBy("studyLocusId", "geneId")
)
assert (
observed_df.collect() == expected_df.collect()
), "Expected and observed DataFrames do not match."

@pytest.fixture(autouse=True)
def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> None:
def _setup(
self: TestCommonProteinCodingFeatureLogic,
spark: SparkSession,
sample_variant_index_schema: StructType,
) -> None:
"""Set up sample data for the test."""
# Sample study locus data
self.sample_study_locus = StudyLocus(
Expand All @@ -974,39 +984,47 @@ def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> No
"studyId": "study1",
"chromosome": "1",
"position": 1000000,
"locus": [
{
"variantId": "var1",
},
],
},
],
StudyLocus.get_schema(),
),
_schema=StudyLocus.get_schema(),
)

# Sample gene index data with biotype
self.sample_gene_index = GeneIndex(
self.sample_variant_index = VariantIndex(
_df=spark.createDataFrame(
[
{
"geneId": "gene1",
"chromosome": "1",
"tss": 950000,
"biotype": "protein_coding",
},
{
"geneId": "gene2",
"chromosome": "1",
"tss": 1050000,
"biotype": "protein_coding",
},
{
"geneId": "gene3",
"chromosome": "1",
"tss": 1010000,
"biotype": "non_coding",
},
(
"var1",
"chrom",
1,
"A",
"T",
[
{
"distanceFromFootprint": 0,
"distanceFromTss": 10,
"targetId": "gene1",
"biotype": "protein_coding",
"isEnsemblCanonical": True,
},
{
"distanceFromFootprint": 0,
"distanceFromTss": 20,
"targetId": "gene2",
"biotype": "non_coding",
"isEnsemblCanonical": True,
},
],
),
],
GeneIndex.get_schema(),
sample_variant_index_schema,
),
_schema=GeneIndex.get_schema(),
_schema=VariantIndex.get_schema(),
)


Expand Down Expand Up @@ -1067,8 +1085,10 @@ def _setup(
[
{
"distanceFromTss": 10,
"distanceFromFootprint": 0,
"targetId": "gene1",
"isEnsemblCanonical": True,
"biotype": "protein_coding",
},
],
)
Expand Down

0 comments on commit f507fe1

Please sign in to comment.