diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index f19c28623..c4178ba37 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -15,7 +15,6 @@ from gentropy.common.session import Session from gentropy.dataset.biosample_index import BiosampleIndex from gentropy.dataset.colocalisation import Colocalisation -from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.l2g_gold_standard import L2GGoldStandard @@ -25,6 +24,7 @@ from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from gentropy.dataset.summary_statistics import SummaryStatistics +from gentropy.dataset.target_index import TargetIndex from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.eqtl_catalogue.finemapping import EqtlCatalogueFinemapping from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex @@ -379,7 +379,7 @@ def mock_summary_statistics( @pytest.fixture() def mock_ld_index(spark: SparkSession) -> LDIndex: - """Mock gene index.""" + """Mock ld index.""" ld_schema = LDIndex.get_schema() data_spec = ( @@ -519,9 +519,9 @@ def sample_target_index(spark: SparkSession) -> DataFrame: @pytest.fixture() -def mock_gene_index(spark: SparkSession) -> GeneIndex: - """Mock gene index dataset.""" - gi_schema = GeneIndex.get_schema() +def mock_target_index(spark: SparkSession) -> TargetIndex: + """Mock target index dataset.""" + gi_schema = TargetIndex.get_schema() data_spec = ( dg.DataGenerator( @@ -540,7 +540,7 @@ def mock_gene_index(spark: SparkSession) -> GeneIndex: .withColumnSpec("strand", percentNulls=0.1) ) - return GeneIndex(_df=data_spec.build(), _schema=gi_schema) + return TargetIndex(_df=data_spec.build(), _schema=gi_schema) @pytest.fixture() diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index feb8e449a..0ae9fea85 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -21,7 +21,7 @@ ) from gentropy.dataset.colocalisation import Colocalisation -from gentropy.dataset.gene_index import GeneIndex +from gentropy.dataset.target_index import TargetIndex from gentropy.dataset.l2g_features.colocalisation import ( EQtlColocClppMaximumFeature, EQtlColocClppMaximumNeighbourhoodFeature, @@ -116,7 +116,7 @@ def test_feature_factory_return_type( mock_colocalisation: Colocalisation, mock_study_index: StudyIndex, mock_variant_index: VariantIndex, - mock_gene_index: GeneIndex, + mock_target_index: TargetIndex, ) -> None: """Test that every feature factory returns a L2GFeature dataset.""" loader = L2GFeatureInputLoader( @@ -124,7 +124,7 @@ def test_feature_factory_return_type( study_index=mock_study_index, variant_index=mock_variant_index, study_locus=mock_study_locus, - gene_index=mock_gene_index, + target_index=mock_target_index, ) feature_dataset = feature_class.compute( study_loci_to_annotate=mock_study_locus, @@ -136,9 +136,9 @@ def test_feature_factory_return_type( @pytest.fixture(scope="module") -def sample_gene_index(spark: SparkSession) -> GeneIndex: - """Create a sample gene index for testing.""" - return GeneIndex( +def sample_target_index(spark: SparkSession) -> TargetIndex: + """Create a sample target index for testing.""" + return TargetIndex( _df=spark.createDataFrame( [ { @@ -157,9 +157,9 @@ def sample_gene_index(spark: SparkSession) -> GeneIndex: "chromosome": "1", }, ], - GeneIndex.get_schema(), + TargetIndex.get_schema(), ), - _schema=GeneIndex.get_schema(), + _schema=TargetIndex.get_schema(), ) @@ -294,7 +294,7 @@ def test__common_colocalisation_feature_logic( def test_extend_missing_colocalisation_to_neighbourhood_genes( self: TestCommonColocalisationFeatureLogic, spark: SparkSession, - sample_gene_index: GeneIndex, + sample_target_index: TargetIndex, sample_variant_index: VariantIndex, ) -> None: """Test the extend_missing_colocalisation_to_neighbourhood_genes function.""" @@ -316,7 +316,7 @@ def test_extend_missing_colocalisation_to_neighbourhood_genes( feature_name="eQtlColocH4Maximum", local_features=local_features, variant_index=sample_variant_index, - gene_index=sample_gene_index, + target_index=sample_target_index, study_locus=self.sample_study_locus, ).select("studyLocusId", "geneId", "eQtlColocH4Maximum") expected_df = spark.createDataFrame( @@ -329,7 +329,7 @@ def test_extend_missing_colocalisation_to_neighbourhood_genes( def test_common_neighbourhood_colocalisation_feature_logic( self: TestCommonColocalisationFeatureLogic, spark: SparkSession, - sample_gene_index: GeneIndex, + sample_target_index: TargetIndex, sample_variant_index: VariantIndex, ) -> None: """Test the common logic of the neighbourhood colocalisation features.""" @@ -343,7 +343,7 @@ def test_common_neighbourhood_colocalisation_feature_logic( colocalisation=self.sample_colocalisation, study_index=self.sample_studies, study_locus=self.sample_study_locus, - gene_index=sample_gene_index, + target_index=sample_target_index, variant_index=sample_variant_index, ).withColumn(feature_name, f.round(f.col(feature_name), 3)) # expected max is 0.81 @@ -561,7 +561,7 @@ def test_common_neighbourhood_distance_feature_logic( common_neighbourhood_distance_feature_logic( self.sample_study_locus, variant_index=self.sample_variant_index, - gene_index=self.sample_gene_index, + target_index=self.sample_target_index, feature_name=feature_name, distance_type=self.distance_type, genomic_window=10, @@ -653,7 +653,7 @@ def _setup( ), _schema=VariantIndex.get_schema(), ) - self.sample_gene_index = GeneIndex( + self.sample_target_index = TargetIndex( _df=spark.createDataFrame( [ { @@ -675,9 +675,9 @@ def _setup( "biotype": "non_coding", }, ], - GeneIndex.get_schema(), + TargetIndex.get_schema(), ), - _schema=GeneIndex.get_schema(), + _schema=TargetIndex.get_schema(), ) @@ -760,7 +760,7 @@ def test_common_vep_feature_logic( def test_common_neighbourhood_vep_feature_logic( self: TestCommonVepFeatureLogic, spark: SparkSession, - sample_gene_index: GeneIndex, + sample_target_index: TargetIndex, sample_variant_index: VariantIndex, ) -> None: """Test the logic of the function that extracts the maximum severity score for a gene given the maximum of the maximum scores for all protein coding genes in the vicinity.""" @@ -769,7 +769,7 @@ def test_common_neighbourhood_vep_feature_logic( common_neighbourhood_vep_feature_logic( self.sample_study_locus, variant_index=sample_variant_index, - gene_index=sample_gene_index, + target_index=sample_target_index, feature_name=feature_name, ) .withColumn(feature_name, f.round(f.col(feature_name), 2)) @@ -859,7 +859,7 @@ def test_common_genecount_feature_logic( """Test the common logic of the gene count features.""" observed_df = common_genecount_feature_logic( study_loci_to_annotate=self.sample_study_locus, - gene_index=self.sample_gene_index, + target_index=self.sample_target_index, feature_name=feature_name, genomic_window=500000, protein_coding_only=protein_coding_only, @@ -892,7 +892,7 @@ def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None: ), _schema=StudyLocus.get_schema(), ) - self.sample_gene_index = GeneIndex( + self.sample_target_index = TargetIndex( _df=spark.createDataFrame( [ { @@ -914,9 +914,9 @@ def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None: "biotype": "non_coding", }, ], - GeneIndex.get_schema(), + TargetIndex.get_schema(), ), - _schema=GeneIndex.get_schema(), + _schema=TargetIndex.get_schema(), ) @@ -944,7 +944,7 @@ def test_is_protein_coding_feature_logic( observed_df = ( is_protein_coding_feature_logic( study_loci_to_annotate=self.sample_study_locus, - gene_index=self.sample_gene_index, + target_index=self.sample_target_index, feature_name="isProteinCoding500kb", genomic_window=500000, ) @@ -981,8 +981,8 @@ def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> No _schema=StudyLocus.get_schema(), ) - # Sample gene index data with biotype - self.sample_gene_index = GeneIndex( + # Sample target index data with biotype + self.sample_target_index = TargetIndex( _df=spark.createDataFrame( [ { @@ -1004,9 +1004,9 @@ def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> No "biotype": "non_coding", }, ], - GeneIndex.get_schema(), + TargetIndex.get_schema(), ), - _schema=GeneIndex.get_schema(), + _schema=TargetIndex.get_schema(), ) diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index 6677d123e..8d63bc5ee 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -16,11 +16,11 @@ ) from gentropy.dataset.colocalisation import Colocalisation -from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.target_index import TargetIndex from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader if TYPE_CHECKING: @@ -54,7 +54,7 @@ def test_study_locus( colocalisation=self.sample_colocalisation, study_index=self.sample_study_index, study_locus=self.sample_study_locus, - gene_index=self.sample_gene_index, + target_index=self.sample_target_index, ) fm = L2GFeatureMatrix.from_features_list( self.sample_study_locus, features_list, loader @@ -170,7 +170,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: ), _schema=Colocalisation.get_schema(), ) - self.sample_gene_index = GeneIndex( + self.sample_target_index = TargetIndex( _df=spark.createDataFrame( [ ("g1", "X", "protein_coding", 200), @@ -183,7 +183,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: "tss", ], ), - _schema=GeneIndex.get_schema(), + _schema=TargetIndex.get_schema(), ) diff --git a/tests/gentropy/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py index 05b652752..22391e8ea 100644 --- a/tests/gentropy/dataset/test_study_index.py +++ b/tests/gentropy/dataset/test_study_index.py @@ -7,8 +7,8 @@ from pyspark.sql import functions as f from gentropy.dataset.biosample_index import BiosampleIndex -from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.target_index import TargetIndex def test_study_index_creation(mock_study_index: StudyIndex) -> None: @@ -188,9 +188,9 @@ def create_study_index(drop_column: str) -> StudyIndex: self.study_index_no_gene = create_study_index("geneId") self.study_index_no_biosample_id = create_study_index("biosampleFromSourceId") - self.gene_index = GeneIndex( + self.target_index = TargetIndex( _df=spark.createDataFrame(self.GENE_DATA, self.GENE_COLUMNS), - _schema=GeneIndex.get_schema(), + _schema=TargetIndex.get_schema(), ) self.biosample_index = BiosampleIndex( _df=spark.createDataFrame(self.BIOSAMPLE_DATA, self.BIOSAMPLE_COLUMNS), @@ -199,7 +199,7 @@ def create_study_index(drop_column: str) -> StudyIndex: def test_gene_validation_type(self: TestQTLValidation) -> None: """Testing if the target validation runs and returns the expected type.""" - validated = self.study_index.validate_target(self.gene_index) + validated = self.study_index.validate_target(self.target_index) assert isinstance(validated, StudyIndex) def test_biosample_validation_type(self: TestQTLValidation) -> None: @@ -211,7 +211,7 @@ def test_biosample_validation_type(self: TestQTLValidation) -> None: def test_qtl_validation_correctness(self: TestQTLValidation, test: str) -> None: """Testing if the QTL validation only flags the expected studies.""" if test == "gene": - validated = self.study_index.validate_target(self.gene_index).persist() + validated = self.study_index.validate_target(self.target_index).persist() bad_study = "s2" if test == "biosample": validated = self.study_index.validate_biosample( @@ -252,7 +252,7 @@ def test_qtl_validation_drop_relevant_column( """Testing what happens if an expected column is not present.""" if drop == "gene": if test == "gene": - validated = self.study_index_no_gene.validate_target(self.gene_index) + validated = self.study_index_no_gene.validate_target(self.target_index) if test == "biosample": validated = self.study_index_no_gene.validate_biosample( self.biosample_index @@ -260,7 +260,7 @@ def test_qtl_validation_drop_relevant_column( if drop == "biosample": if test == "gene": validated = self.study_index_no_biosample_id.validate_target( - self.gene_index + self.target_index ) if test == "biosample": validated = self.study_index_no_biosample_id.validate_biosample( diff --git a/tests/gentropy/dataset/test_summary_statistics.py b/tests/gentropy/dataset/test_summary_statistics.py index b1b06442b..033fba663 100644 --- a/tests/gentropy/dataset/test_summary_statistics.py +++ b/tests/gentropy/dataset/test_summary_statistics.py @@ -17,7 +17,7 @@ def test_summary_statistics__creation( mock_summary_statistics: SummaryStatistics, ) -> None: - """Test gene index creation with mock gene index.""" + """Test summary statistics creation with mock summary statistics.""" assert isinstance(mock_summary_statistics, SummaryStatistics) diff --git a/tests/gentropy/dataset/test_target_index.py b/tests/gentropy/dataset/test_target_index.py index e4ae8e581..070bf7d8a 100644 --- a/tests/gentropy/dataset/test_target_index.py +++ b/tests/gentropy/dataset/test_target_index.py @@ -4,29 +4,29 @@ from pyspark.sql import DataFrame -from gentropy.dataset.gene_index import GeneIndex +from gentropy.dataset.target_index import TargetIndex -def test_gene_index_creation(mock_gene_index: GeneIndex) -> None: - """Test gene index creation with mock gene index.""" - assert isinstance(mock_gene_index, GeneIndex) +def test_target_index_creation(mock_target_index: TargetIndex) -> None: + """Test target index creation with mock target index.""" + assert isinstance(mock_target_index, TargetIndex) -def test_gene_index_location_lut(mock_gene_index: GeneIndex) -> None: - """Test gene index location lut.""" - assert isinstance(mock_gene_index.locations_lut(), DataFrame) +def test_target_index_location_lut(mock_target_index: TargetIndex) -> None: + """Test target index location lut.""" + assert isinstance(mock_target_index.locations_lut(), DataFrame) -def test_gene_index_symbols_lut(mock_gene_index: GeneIndex) -> None: - """Test gene index symbols lut.""" - assert isinstance(mock_gene_index.symbols_lut(), DataFrame) +def test_target_index_symbols_lut(mock_target_index: TargetIndex) -> None: + """Test target index symbols lut.""" + assert isinstance(mock_target_index.symbols_lut(), DataFrame) -def test_gene_index_filter_by_biotypes(mock_gene_index: GeneIndex) -> None: - """Test gene index filter by biotypes.""" +def test_target_index_filter_by_biotypes(mock_target_index: TargetIndex) -> None: + """Test target index filter by biotypes.""" assert isinstance( - mock_gene_index.filter_by_biotypes( + mock_target_index.filter_by_biotypes( biotypes=["protein_coding", "3prime_overlapping_ncRNA", "antisense"] ), - GeneIndex, + TargetIndex, ) diff --git a/tests/gentropy/dataset/test_variant_index.py b/tests/gentropy/dataset/test_variant_index.py index 43c409ea6..11f1c966f 100644 --- a/tests/gentropy/dataset/test_variant_index.py +++ b/tests/gentropy/dataset/test_variant_index.py @@ -15,7 +15,7 @@ def test_variant_index_creation(mock_variant_index: VariantIndex) -> None: - """Test gene index creation with mock gene index.""" + """Test variant index creation with mock variant index.""" assert isinstance(mock_variant_index, VariantIndex) diff --git a/tests/gentropy/datasource/intervals/test_andersson.py b/tests/gentropy/datasource/intervals/test_andersson.py index 69575b7c3..1820c8322 100644 --- a/tests/gentropy/datasource/intervals/test_andersson.py +++ b/tests/gentropy/datasource/intervals/test_andersson.py @@ -6,8 +6,8 @@ from pyspark.sql import DataFrame, SparkSession from gentropy.common.Liftover import LiftOverSpark -from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals +from gentropy.dataset.target_index import TargetIndex from gentropy.datasource.intervals.andersson import IntervalsAndersson @@ -26,13 +26,13 @@ def test_read_andersson(sample_intervals_andersson: DataFrame) -> None: def test_andersson_intervals_from_source( sample_intervals_andersson: DataFrame, - mock_gene_index: GeneIndex, + mock_target_index: TargetIndex, liftover_chain_37_to_38: LiftOverSpark, ) -> None: """Test AnderssonIntervals creation with mock data.""" assert isinstance( IntervalsAndersson.parse( - sample_intervals_andersson, mock_gene_index, liftover_chain_37_to_38 + sample_intervals_andersson, mock_target_index, liftover_chain_37_to_38 ), Intervals, ) diff --git a/tests/gentropy/datasource/intervals/test_javierre.py b/tests/gentropy/datasource/intervals/test_javierre.py index 886a28c52..4fdd9db7a 100644 --- a/tests/gentropy/datasource/intervals/test_javierre.py +++ b/tests/gentropy/datasource/intervals/test_javierre.py @@ -6,8 +6,8 @@ from pyspark.sql import DataFrame, SparkSession from gentropy.common.Liftover import LiftOverSpark -from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals +from gentropy.dataset.target_index import TargetIndex from gentropy.datasource.intervals.javierre import IntervalsJavierre @@ -26,13 +26,13 @@ def test_read_javierre(sample_intervals_javierre: DataFrame) -> None: def test_javierre_intervals_from_source( sample_intervals_javierre: DataFrame, - mock_gene_index: GeneIndex, + mock_target_index: TargetIndex, liftover_chain_37_to_38: LiftOverSpark, ) -> None: """Test JavierreIntervals creation with mock data.""" assert isinstance( IntervalsJavierre.parse( - sample_intervals_javierre, mock_gene_index, liftover_chain_37_to_38 + sample_intervals_javierre, mock_target_index, liftover_chain_37_to_38 ), Intervals, ) diff --git a/tests/gentropy/datasource/intervals/test_jung.py b/tests/gentropy/datasource/intervals/test_jung.py index e391b8f96..bac6918b8 100644 --- a/tests/gentropy/datasource/intervals/test_jung.py +++ b/tests/gentropy/datasource/intervals/test_jung.py @@ -6,8 +6,8 @@ from pyspark.sql import DataFrame, SparkSession from gentropy.common.Liftover import LiftOverSpark -from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals +from gentropy.dataset.target_index import TargetIndex from gentropy.datasource.intervals.jung import IntervalsJung @@ -24,13 +24,13 @@ def test_read_jung(sample_intervals_jung: DataFrame) -> None: def test_jung_intervals_from_source( sample_intervals_jung: DataFrame, - mock_gene_index: GeneIndex, + mock_target_index: TargetIndex, liftover_chain_37_to_38: LiftOverSpark, ) -> None: """Test JungIntervals creation with mock data.""" assert isinstance( IntervalsJung.parse( - sample_intervals_jung, mock_gene_index, liftover_chain_37_to_38 + sample_intervals_jung, mock_target_index, liftover_chain_37_to_38 ), Intervals, ) diff --git a/tests/gentropy/datasource/intervals/test_thurman.py b/tests/gentropy/datasource/intervals/test_thurman.py index 616e1abec..a6f4074b0 100644 --- a/tests/gentropy/datasource/intervals/test_thurman.py +++ b/tests/gentropy/datasource/intervals/test_thurman.py @@ -6,8 +6,8 @@ from pyspark.sql import DataFrame, SparkSession from gentropy.common.Liftover import LiftOverSpark -from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals +from gentropy.dataset.target_index import TargetIndex from gentropy.datasource.intervals.thurman import IntervalsThurman @@ -26,13 +26,13 @@ def test_read_thurman(sample_intervals_thurman: DataFrame) -> None: def test_thurman_intervals_from_source( sample_intervals_thurman: DataFrame, - mock_gene_index: GeneIndex, + mock_target_index: TargetIndex, liftover_chain_37_to_38: LiftOverSpark, ) -> None: """Test IntervalsThurman creation with mock data.""" assert isinstance( IntervalsThurman.parse( - sample_intervals_thurman, mock_gene_index, liftover_chain_37_to_38 + sample_intervals_thurman, mock_target_index, liftover_chain_37_to_38 ), Intervals, ) diff --git a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py index 79f9d925a..3c8ff1aed 100644 --- a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py +++ b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py @@ -29,8 +29,8 @@ from pyspark.sql.session import SparkSession from gentropy.dataset.colocalisation import Colocalisation - from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.study_locus import StudyLocus + from gentropy.dataset.target_index import TargetIndex def test_open_targets_as_l2g_gold_standard( @@ -162,7 +162,7 @@ def test_build_feature_matrix( mock_study_locus: StudyLocus, mock_colocalisation: Colocalisation, mock_study_index: StudyIndex, - mock_gene_index: GeneIndex, + mock_target_index: TargetIndex, ) -> None: """Test building feature matrix with the eQtlColocH4Maximum feature.""" features_list = ["eQtlColocH4Maximum", "isProteinCoding"] @@ -170,7 +170,7 @@ def test_build_feature_matrix( colocalisation=mock_colocalisation, study_index=mock_study_index, study_locus=mock_study_locus, - gene_index=mock_gene_index, + target_index=mock_target_index, ) fm = mock_study_locus.build_feature_matrix(features_list, loader) assert isinstance( diff --git a/tests/gentropy/datasource/open_targets/test_target.py b/tests/gentropy/datasource/open_targets/test_target.py index 091dcea53..b32886a4b 100644 --- a/tests/gentropy/datasource/open_targets/test_target.py +++ b/tests/gentropy/datasource/open_targets/test_target.py @@ -4,10 +4,12 @@ from pyspark.sql import DataFrame -from gentropy.dataset.gene_index import GeneIndex +from gentropy.dataset.target_index import TargetIndex from gentropy.datasource.open_targets.target import OpenTargetsTarget -def test_open_targets_as_gene_index(sample_target_index: DataFrame) -> None: - """Test gene index from source.""" - assert isinstance(OpenTargetsTarget.as_gene_index(sample_target_index), GeneIndex) +def test_open_targets_as_target_index(sample_target_index: DataFrame) -> None: + """Test target index from source.""" + assert isinstance( + OpenTargetsTarget.as_target_index(sample_target_index), TargetIndex + ) diff --git a/tests/gentropy/test_schemas.py b/tests/gentropy/test_schemas.py index 1b06076d0..500fbcd69 100644 --- a/tests/gentropy/test_schemas.py +++ b/tests/gentropy/test_schemas.py @@ -17,8 +17,8 @@ if TYPE_CHECKING: from _pytest.fixtures import FixtureRequest - from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_prediction import L2GPrediction + from gentropy.dataset.target_index import TargetIndex SCHEMA_DIR = "src/gentropy/assets/schemas" @@ -75,23 +75,23 @@ def test_schema_columns_camelcase(schema_json: str) -> None: class TestValidateSchema: - """Test validate_schema method using L2GPrediction (unnested) and GeneIndex (nested) as a testing dataset.""" + """Test validate_schema method using L2GPrediction (unnested) and TargetIndex (nested) as a testing dataset.""" @pytest.fixture() def mock_dataset_instance( self: TestValidateSchema, request: FixtureRequest - ) -> L2GPrediction | GeneIndex: + ) -> L2GPrediction | TargetIndex: """Meta fixture to return the value of any requested fixture.""" return request.getfixturevalue(request.param) @pytest.mark.parametrize( "mock_dataset_instance", - ["mock_l2g_predictions", "mock_gene_index"], + ["mock_l2g_predictions", "mock_target_index"], indirect=True, ) def test_validate_schema_extra_field( self: TestValidateSchema, - mock_dataset_instance: L2GPrediction | GeneIndex, + mock_dataset_instance: L2GPrediction | TargetIndex, ) -> None: """Test that validate_schema raises an error if the observed schema has an extra field.""" with pytest.raises(SchemaValidationError, match="extraField"): @@ -101,12 +101,12 @@ def test_validate_schema_extra_field( @pytest.mark.parametrize( "mock_dataset_instance", - ["mock_l2g_predictions", "mock_gene_index"], + ["mock_l2g_predictions", "mock_target_index"], indirect=True, ) def test_validate_schema_missing_field( self: TestValidateSchema, - mock_dataset_instance: L2GPrediction | GeneIndex, + mock_dataset_instance: L2GPrediction | TargetIndex, ) -> None: """Test that validate_schema raises an error if the observed schema is missing a required field, geneId in this case.""" with pytest.raises(SchemaValidationError, match="geneId"): @@ -114,12 +114,12 @@ def test_validate_schema_missing_field( @pytest.mark.parametrize( "mock_dataset_instance", - ["mock_l2g_predictions", "mock_gene_index"], + ["mock_l2g_predictions", "mock_target_index"], indirect=True, ) def test_validate_schema_duplicated_field( self: TestValidateSchema, - mock_dataset_instance: L2GPrediction | GeneIndex, + mock_dataset_instance: L2GPrediction | TargetIndex, ) -> None: """Test that validate_schema raises an error if the observed schema has a duplicated field, geneId in this case.""" with pytest.raises(SchemaValidationError, match="geneId"): @@ -129,12 +129,12 @@ def test_validate_schema_duplicated_field( @pytest.mark.parametrize( "mock_dataset_instance", - ["mock_l2g_predictions", "mock_gene_index"], + ["mock_l2g_predictions", "mock_target_index"], indirect=True, ) def test_validate_schema_different_datatype( self: TestValidateSchema, - mock_dataset_instance: L2GPrediction | GeneIndex, + mock_dataset_instance: L2GPrediction | TargetIndex, ) -> None: """Test that validate_schema raises an error if any field in the observed schema has a different type than expected.""" with pytest.raises(SchemaValidationError, match="geneId"):