opentargets · pre-commit-ci · Jan 14, 2025 · Jan 14, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ ci:
   skip: [poetry-lock]
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.4
+    rev: v0.9.1
     hooks:
       - id: ruff
         args:
@@ -46,7 +46,7 @@ repos:
       - id: python-check-blanket-noqa
 
   - repo: https://github.com/hadialqattan/pycln
-    rev: v2.4.0
+    rev: v2.5.0
     hooks:
       - id: pycln
         args: [--all]
@@ -58,14 +58,14 @@ repos:
         exclude: "CHANGELOG.md"
 
   - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
-    rev: v9.18.0
+    rev: v9.20.0
     hooks:
       - id: commitlint
         additional_dependencies: ["@commitlint/[email protected]"]
         stages: [commit-msg]
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: "v1.13.0"
+    rev: "v1.14.1"
     hooks:
       - id: mypy
         args:
@@ -98,12 +98,12 @@ repos:
       - id: beautysh
 
   - repo: https://github.com/jsh9/pydoclint
-    rev: 0.5.9
+    rev: 0.6.0
     hooks:
       - id: pydoclint
 
   - repo: https://github.com/python-poetry/poetry
-    rev: "1.8.0"
+    rev: "2.0.1"
     hooks:
       - id: poetry-check
       - id: poetry-lock

diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py
@@ -4,9 +4,10 @@
 
 import re
 import sys
+from collections.abc import Iterable
 from functools import reduce, wraps
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, TypeVar
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
 
 import pyspark.sql.functions as f
 import pyspark.sql.types as t

diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py
@@ -3,7 +3,8 @@
 
 from __future__ import annotations
 
-from typing import Any, Iterator, Mapping
+from typing import Any
+from collections.abc import Iterator, Mapping
 
 from gentropy.dataset.l2g_features.colocalisation import (
     EQtlColocClppMaximumFeature,

diff --git a/tests/gentropy/dataset/test_colocalisation.py b/tests/gentropy/dataset/test_colocalisation.py
@@ -63,7 +63,9 @@ def test_append_study_metadata_right(
         assert (
             observed_df.select(f"{colocalisation_side}GeneId").collect()[0][0]
             == expected_geneId
-        ), f"Expected {colocalisation_side}GeneId {expected_geneId}, but got {observed_df.select(f'{colocalisation_side}GeneId').collect()[0][0]}"
+        ), (
+            f"Expected {colocalisation_side}GeneId {expected_geneId}, but got {observed_df.select(f'{colocalisation_side}GeneId').collect()[0][0]}"
+        )
 
     @pytest.fixture(autouse=True)
     def _setup(self: TestAppendStudyMetadata, spark: SparkSession) -> None:

diff --git a/tests/gentropy/dataset/test_dataset.py b/tests/gentropy/dataset/test_dataset.py
@@ -42,9 +42,9 @@ def test_initialize_without_schema(self: TestDataset, spark: SparkSession) -> No
         """Test if Dataset derived class collects the schema from assets if schema is not provided."""
         df = spark.createDataFrame([(1,)], schema=MockDataset.get_schema())
         ds = MockDataset(_df=df)
-        assert (
-            ds.schema == MockDataset.get_schema()
-        ), "Schema should be inferred from df"
+        assert ds.schema == MockDataset.get_schema(), (
+            "Schema should be inferred from df"
+        )
 
     def test_passing_incorrect_types(self: TestDataset, spark: SparkSession) -> None:
         """Test if passing incorrect object types to Dataset raises an error."""
@@ -97,6 +97,6 @@ def test_process_class_params(spark: SparkSession) -> None:
     }
     class_params, spark_params = Dataset._process_class_params(params)
     assert "_df" in class_params, "Class params should contain _df"
-    assert (
-        "recursiveFileLookup" in spark_params
-    ), "Spark params should contain recursiveFileLookup"
+    assert "recursiveFileLookup" in spark_params, (
+        "Spark params should contain recursiveFileLookup"
+    )
diff --git a/tests/gentropy/dataset/test_l2g.py b/tests/gentropy/dataset/test_l2g.py
@@ -29,9 +29,9 @@ def test_process_gene_interactions(sample_otp_interactions: DataFrame) -> None:
     """Tests processing of gene interactions from OTP."""
     expected_cols = ["geneIdA", "geneIdB", "score"]
     observed_df = L2GGoldStandard.process_gene_interactions(sample_otp_interactions)
-    assert (
-        observed_df.columns == expected_cols
-    ), "Gene interactions has a different schema."
+    assert observed_df.columns == expected_cols, (
+        "Gene interactions has a different schema."
+    )
 
 
 def test_predictions(mock_l2g_predictions: L2GPrediction) -> None:
@@ -171,9 +171,9 @@ def test_l2g_feature_constructor_with_schema_mismatch(
         ),
         with_gold_standard=False,
     )
-    assert (
-        fm._df.schema["distanceTssMean"].dataType == FloatType()
-    ), "Feature `distanceTssMean` is not being casted to FloatType. Check L2GFeatureMatrix constructor."
+    assert fm._df.schema["distanceTssMean"].dataType == FloatType(), (
+        "Feature `distanceTssMean` is not being casted to FloatType. Check L2GFeatureMatrix constructor."
+    )
 
 
 def test_calculate_feature_missingness_rate(
@@ -185,9 +185,9 @@ def test_calculate_feature_missingness_rate(
     assert isinstance(observed_missingness, dict)
     assert mock_l2g_feature_matrix.features_list is not None and len(
         observed_missingness
-    ) == len(
-        mock_l2g_feature_matrix.features_list
-    ), "Missing features in the missingness rate dictionary."
-    assert (
-        observed_missingness == expected_missingness
-    ), "Missingness rate is incorrect."
+    ) == len(mock_l2g_feature_matrix.features_list), (
+        "Missing features in the missingness rate dictionary."
+    )
+    assert observed_missingness == expected_missingness, (
+        "Missingness rate is incorrect."
+    )
diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py
@@ -287,9 +287,9 @@ def test__common_colocalisation_feature_logic(
                 },
             ],
         ).select("studyLocusId", "geneId", "eQtlColocH4Maximum")
-        assert (
-            observed_df.collect() == expected_df.collect()
-        ), "The feature values are not as expected."
+        assert observed_df.collect() == expected_df.collect(), (
+            "The feature values are not as expected."
+        )
 
     def test_extend_missing_colocalisation_to_neighbourhood_genes(
         self: TestCommonColocalisationFeatureLogic,
@@ -322,9 +322,9 @@ def test_extend_missing_colocalisation_to_neighbourhood_genes(
         expected_df = spark.createDataFrame(
             [{"geneId": "gene3", "studyLocusId": "1", "eQtlColocH4Maximum": 0.0}]
         ).select("studyLocusId", "geneId", "eQtlColocH4Maximum")
-        assert (
-            observed_df.collect() == expected_df.collect()
-        ), "The feature values are not as expected."
+        assert observed_df.collect() == expected_df.collect(), (
+            "The feature values are not as expected."
+        )
 
     def test_common_neighbourhood_colocalisation_feature_logic(
         self: TestCommonColocalisationFeatureLogic,
@@ -361,9 +361,9 @@ def test_common_neighbourhood_colocalisation_feature_logic(
                 },
             ],
         ).select("geneId", "studyLocusId", "eQtlColocH4MaximumNeighbourhood")
-        assert (
-            observed_df.collect() == expected_df.collect()
-        ), "The expected and observed dataframes do not match."
+        assert observed_df.collect() == expected_df.collect(), (
+            "The expected and observed dataframes do not match."
+        )
 
     @pytest.fixture(autouse=True)
     def _setup(self: TestCommonColocalisationFeatureLogic, spark: SparkSession) -> None:
@@ -547,9 +547,9 @@ def test_common_distance_feature_logic(
             .select("studyLocusId", "geneId", feature_name)
             .orderBy(feature_name)
         )
-        assert (
-            observed_df.collect() == expected_df.collect()
-        ), f"Expected and observed dataframes are not equal for feature {feature_name}."
+        assert observed_df.collect() == expected_df.collect(), (
+            f"Expected and observed dataframes are not equal for feature {feature_name}."
+        )
 
     def test_common_neighbourhood_distance_feature_logic(
         self: TestCommonDistanceFeatureLogic,
@@ -576,9 +576,9 @@ def test_common_neighbourhood_distance_feature_logic(
             ),  # 0.91/0.91
             ["geneId", "studyLocusId", feature_name],
         ).orderBy(feature_name)
-        assert (
-            observed_df.collect() == expected_df.collect()
-        ), "Output doesn't meet the expectation."
+        assert observed_df.collect() == expected_df.collect(), (
+            "Output doesn't meet the expectation."
+        )
 
     @pytest.fixture(autouse=True)
     def _setup(
@@ -753,9 +753,9 @@ def test_common_vep_feature_logic(
             .orderBy(feature_name)
             .select("studyLocusId", "geneId", feature_name)
         )
-        assert (
-            observed_df.collect() == expected_df.collect()
-        ), f"Expected and observed dataframes are not equal for feature {feature_name}."
+        assert observed_df.collect() == expected_df.collect(), (
+            f"Expected and observed dataframes are not equal for feature {feature_name}."
+        )
 
         def test_common_neighbourhood_vep_feature_logic(
             self: TestCommonVepFeatureLogic,
@@ -787,9 +787,9 @@ def test_common_neighbourhood_vep_feature_logic(
                 .orderBy(feature_name)
                 .select("studyLocusId", "geneId", feature_name)
             )
-            assert (
-                observed_df.collect() == expected_df.collect()
-            ), "Output doesn't meet the expectation."
+            assert observed_df.collect() == expected_df.collect(), (
+                "Output doesn't meet the expectation."
+            )
 
     @pytest.fixture(autouse=True)
     def _setup(self: TestCommonVepFeatureLogic, spark: SparkSession) -> None:
@@ -870,9 +870,9 @@ def test_common_genecount_feature_logic(
             .orderBy("studyLocusId", "geneId")
         )
 
-        assert (
-            observed_df.collect() == expected_df.collect()
-        ), f"Expected and observed dataframes do not match for feature {feature_name}."
+        assert observed_df.collect() == expected_df.collect(), (
+            f"Expected and observed dataframes do not match for feature {feature_name}."
+        )
 
     @pytest.fixture(autouse=True)
     def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None:
@@ -957,9 +957,9 @@ def test_is_protein_coding_feature_logic(
             .select("studyLocusId", "geneId", "isProteinCoding500kb")
             .orderBy("studyLocusId", "geneId")
         )
-        assert (
-            observed_df.collect() == expected_df.collect()
-        ), "Expected and observed DataFrames do not match."
+        assert observed_df.collect() == expected_df.collect(), (
+            "Expected and observed DataFrames do not match."
+        )
 
     @pytest.fixture(autouse=True)
     def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> None:

diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py
@@ -60,9 +60,9 @@ def test_study_locus(
             self.sample_study_locus, features_list, loader
         )
         for feature in features_list:
-            assert (
-                feature in fm._df.columns
-            ), f"Feature {feature} not found in feature matrix."
+            assert feature in fm._df.columns, (
+                f"Feature {feature} not found in feature matrix."
+            )
 
     def test_gold_standard(
         self: TestFromFeaturesList,
@@ -78,9 +78,9 @@ def test_gold_standard(
             self.sample_gold_standard, features_list, loader
         )
         for feature in features_list:
-            assert (
-                feature in fm._df.columns
-            ), f"Feature {feature} not found in feature matrix."
+            assert feature in fm._df.columns, (
+                f"Feature {feature} not found in feature matrix."
+            )
 
     @pytest.fixture(autouse=True)
     def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None:

diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py
@@ -517,9 +517,9 @@ def test_filter_ld_set(spark: SparkSession) -> None:
         observed_data, ["studyLocusId", "ldSet"]
     ).withColumn("ldSet", StudyLocus.filter_ld_set(f.col("ldSet"), 0.5))
     expected_tags_in_ld = 0
-    assert (
-        observed_df.filter(f.size("ldSet") > 1).count() == expected_tags_in_ld
-    ), "Expected tags in ld set differ from observed."
+    assert observed_df.filter(f.size("ldSet") > 1).count() == expected_tags_in_ld, (
+        "Expected tags in ld set differ from observed."
+    )
 
 
 def test_annotate_locus_statistics_boundaries(
@@ -860,9 +860,9 @@ def test_build_feature_matrix(
         study_locus=mock_study_locus,
     )
     fm = mock_study_locus.build_feature_matrix(features_list, loader)
-    assert isinstance(
-        fm, L2GFeatureMatrix
-    ), "Feature matrix should be of type L2GFeatureMatrix"
+    assert isinstance(fm, L2GFeatureMatrix), (
+        "Feature matrix should be of type L2GFeatureMatrix"
+    )
 
 
 class TestStudyLocusRedundancyFlagging:

diff --git a/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py b/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py
@@ -28,15 +28,15 @@ def test_ontology_parser(self: TestOntologyParger, spark: SparkSession) -> None:
             self.SAMPLE_EFO_PATH, spark
         ).retain_rows_with_ancestor_id(["CL_0000000"])
 
-        assert isinstance(
-            cell_ontology, BiosampleIndex
-        ), "Cell ontology subset is not parsed correctly to BiosampleIndex."
-        assert isinstance(
-            uberon, BiosampleIndex
-        ), "Uberon subset is not parsed correctly to BiosampleIndex."
-        assert isinstance(
-            efo_cell_line, BiosampleIndex
-        ), "EFO cell line subset is not parsed correctly to BiosampleIndex."
+        assert isinstance(cell_ontology, BiosampleIndex), (
+            "Cell ontology subset is not parsed correctly to BiosampleIndex."
+        )
+        assert isinstance(uberon, BiosampleIndex), (
+            "Uberon subset is not parsed correctly to BiosampleIndex."
+        )
+        assert isinstance(efo_cell_line, BiosampleIndex), (
+            "EFO cell line subset is not parsed correctly to BiosampleIndex."
+        )
 
     def test_merge_biosample_indices(
         self: TestOntologyParger, spark: SparkSession
@@ -49,6 +49,6 @@ def test_merge_biosample_indices(
         efo = extract_ontology_from_json(self.SAMPLE_EFO_PATH, spark)
 
         merged = cell_ontology.merge_indices([uberon, efo])
-        assert isinstance(
-            merged, BiosampleIndex
-        ), "Merging of biosample indices is not correct."
+        assert isinstance(merged, BiosampleIndex), (
+            "Merging of biosample indices is not correct."
+        )