Skip to content

Commit

Permalink
feat: adding FoldX ddG scaling
Browse files Browse the repository at this point in the history
  • Loading branch information
DSuveges committed Dec 13, 2024
1 parent e262aac commit bef7252
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
34 changes: 33 additions & 1 deletion src/gentropy/dataset/variant_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def annotate_with_amino_acid_consequences(
.drop("uniprotAccession", "aminoAcidChange", "annotations")
# Dropping potentially exploded variant rows:
.distinct()
.withColumn("rank", f.rank().over(w))
.withColumn("rank", f.row_number().over(w))
.filter(f.col("rank") == 1)
.drop("rank"),
_schema=self.get_schema(),
Expand Down Expand Up @@ -425,6 +425,7 @@ def resolve_predictor_methods(
.when(method == "SpliceAI", score)
.when(method == "VEP", score)
.when(method == "GERP", cls._normalise_gerp(score))
.when(method == "FoldX", cls._normalise_foldx(score))
)

@staticmethod
Expand All @@ -451,6 +452,37 @@ def _rescaleColumnValue(
maximum - minimum
) + minimum

@classmethod
def _normalise_foldx(
cls: type[InSilicoPredictorNormaliser], score: Column
) -> Column:
"""Normalise FoldX ddG energies.
ΔΔG Range:
- 0 to ±0.5 kcal/mol: Usually considered negligible or within the noise of predictions. The mutation has minimal or no effect on stability.
- ±0.5 to ±1.5 kcal/mol: Moderate effect on stability. Such mutations might cause noticeable changes, depending on the context of the protein's function.
- > ±1.5 kcal/mol: Significant impact on stability. Positive values indicate structural disruption or destabilization, while negative values suggest substantial stabilization.
ΔΔG > +2.0 kcal/mol: Likely to cause a significant structural disruption, such as unfolding, local instability, or loss of functional conformation.
Args:
score (Column): column with ddG values
Returns:
Column: Normalised energies
"""
return (
f.when(score >= 2, f.lit(1.0))
.when(score >= 1.5, cls._rescaleColumnValue(score, 1.5, 2.0, 0.75, 1.00))
.when(score >= 0.5, cls._rescaleColumnValue(score, 0.5, 1.5, 0.25, 0.75))
.when(score >= -0.5, cls._rescaleColumnValue(score, -0.5, 0.5, -0.25, 0.25))
.when(
score >= -1.5, cls._rescaleColumnValue(score, -1.5, -0.5, -0.75, -0.25)
)
.when(score >= -2, cls._rescaleColumnValue(score, -2, -1.5, -1, -0.75))
.when(score < -2, f.lit(-1))
)

@classmethod
def _normalise_cadd(
cls: type[InSilicoPredictorNormaliser],
Expand Down
10 changes: 9 additions & 1 deletion src/gentropy/datasource/open_targets/foldex_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from gentropy.common.spark_helpers import enforce_schema
from gentropy.dataset.amino_acid_variants import AminoAcidVariants
from gentropy.dataset.variant_index import InSilicoPredictorNormaliser


class OpenTargetsFoldX:
Expand All @@ -29,7 +30,7 @@ def get_foldx_prediction(score_column: Column) -> Column:
Column: struct with the right shape of the in silico predictors.
"""
return f.struct(
f.lit("foldX").alias("method"),
f.lit("FoldX").alias("method"),
score_column.cast(t.FloatType()).alias("score"),
)

Expand Down Expand Up @@ -67,6 +68,13 @@ def ingest_foldx_data(
"inSilicoPredictors"
)
)
# Normalise FoldX free energy changes:
.withColumn(
"inSilicoPredictors",
InSilicoPredictorNormaliser.normalise_in_silico_predictors(
f.col("inSilicoPredictors")
),
)
),
_schema=AminoAcidVariants.get_schema(),
)
Expand Down

0 comments on commit bef7252

Please sign in to comment.