Skip to content

Commit

Permalink
Merge pull request #122 from ihmeuw-msca/fix/pandas-mypy-errors
Browse files Browse the repository at this point in the history
Fix/pandas mypy errors
  • Loading branch information
blsmxiu47 authored Dec 23, 2024
2 parents de40176 + 5448971 commit 9a8b5c1
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 24 deletions.
7 changes: 4 additions & 3 deletions src/onemod/stage/model_stages/rover_stage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# mypy: ignore-errors
"""ModRover covariate selection stage.
Notes
Expand Down Expand Up @@ -144,7 +143,9 @@ def _get_rover_summaries(self) -> pd.DataFrame:

# Merge with subsets and add t-statistic
summaries_df = summaries_df.merge(subsets, on="subset_id", how="left")
summaries_df["abs_t_stat"] = summaries_df.eval("abs(coef / coef_sd)")
summaries_df["abs_t_stat"] = (
summaries_df["coef"].abs() / summaries_df["coef_sd"]
)
return summaries_df

def _get_selected_covs(self, summaries: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -185,8 +186,8 @@ def _get_subset_selected_covs(
.mean()
.sort_values(ascending=False)
.reset_index()
.eval(f"selected = abs_t_stat >= {self.config.t_threshold}")
)
t_stats["selected"] = t_stats["abs_t_stat"] >= self.config.t_threshold

# Add/remove covariates based on min_covs/max_covs
if (
Expand Down
24 changes: 12 additions & 12 deletions src/onemod/utils/residual.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# mypy: ignore-errors
import numpy as np
import pandas as pd


Expand All @@ -15,49 +15,49 @@ def get_residual_binomial(
data: pd.DataFrame, pred: str, obs: str, weights: str
) -> pd.DataFrame:
result = pd.DataFrame(index=data.index)
result["residual"] = data.eval(
f"({obs} - {pred}) / ({pred} * (1 - {pred}))"
result["residual"] = (data[obs] - data[pred]) / (
data[pred] * (1 - data[pred])
)
result["residual_se"] = data.eval(
f"1 / sqrt({pred} * (1 - {pred}) * {weights})"
result["residual_se"] = 1 / np.sqrt(
data[pred] * (1 - data[pred]) * data[weights]
)
return result

@staticmethod
def predict_binomial(
data: pd.DataFrame, pred: str, residual: str = "residual"
) -> pd.Series:
return data.eval(f"{pred} + {residual} * {pred} * (1 - {pred})")
return data[pred] + data[residual] * data[pred] * (1 - data[pred])

@staticmethod
def get_residual_poisson(
data: pd.DataFrame, pred: str, obs: str, weights: str
) -> pd.DataFrame:
result = pd.DataFrame(index=data.index)
result["residual"] = data.eval(f"{obs} / {pred} - 1")
result["residual_se"] = data.eval(f"1 / sqrt({pred} * {weights})")
result["residual"] = data[obs] / data[pred] - 1
result["residual_se"] = 1 / np.sqrt(data[pred] * data[weights])
return result

@staticmethod
def predict_poisson(
data: pd.DataFrame, pred: str, residual: str = "residual"
) -> pd.Series:
return data.eval(f"({residual} + 1) * {pred}")
return (data[residual] + 1) * data[pred]

@staticmethod
def get_residual_gaussian(
data: pd.DataFrame, pred: str, obs: str, weights: str
) -> pd.DataFrame:
result = pd.DataFrame(index=data.index)
result["residual"] = data.eval(f"{obs} - {pred}")
result["residual_se"] = data.eval(f"1 / sqrt({weights})")
result["residual"] = data[obs] - data[pred]
result["residual_se"] = 1 / np.sqrt(data[weights])
return result

@staticmethod
def predict_gaussian(
data: pd.DataFrame, pred: str, residual: str = "residual"
) -> pd.Series:
return data.eval(f"{pred} + {residual}")
return data[pred] + data[residual]

def __call__(self, *args, **kwargs) -> pd.DataFrame:
return self.get_residual(*args, **kwargs)
24 changes: 15 additions & 9 deletions src/onemod/utils/uncertainty.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# mypy: ignore-errors
import numpy as np
import pandas as pd
from msca.c2fun import c2fun_dict
Expand Down Expand Up @@ -48,8 +47,10 @@ def get_ci_coverage(
data["lwr"] = norm.ppf(lwr, loc=data[pred], scale=data[pred_sd])
data["upr"] = norm.ppf(upr, loc=data[pred], scale=data[pred_sd])

coverage = data.eval(f"{truth} >= lwr and {truth} <= upr").mean()
return coverage
data["coverage_bool"] = (data[truth] >= data["lwr"]) & (
data[truth] <= data["upr"]
)
return data["coverage_bool"].mean()


def get_pi_coverage(
Expand Down Expand Up @@ -105,8 +106,10 @@ def get_pi_coverage(
upr = 1.0 - lwr
residual["lwr"] = norm.ppf(lwr, loc=0.0, scale=residual["total_sd"])
residual["upr"] = norm.ppf(upr, loc=0.0, scale=residual["total_sd"])
coverage = residual.eval("residual >= lwr and residual <= upr").mean()
return coverage
residual["coverage_bool"] = (residual["residual"] >= residual["lwr"]) & (
residual["residual"] <= residual["upr"]
)
return residual["coverage_bool"].mean()


def calibrate_pred_sd(
Expand Down Expand Up @@ -166,13 +169,16 @@ def equation(alpha: float) -> float:
# deviation is bounded by the range of the random variable divided by 2.
# So, we want to find an alpha such that the maximum absolute value of
# the Person residual is less than 1.
residual_squared = residual["residual"] ** 2
residual_se_squared = residual["residual_se"] ** 2
adjusted_residual = residual_squared - residual_se_squared

alpha_upr = 1.1 * np.sqrt(
np.max(
residual.eval("residual ** 2 - residual_se ** 2")
/ data[pred_sd] ** 2
)
np.max(adjusted_residual / (data[pred_sd] ** 2))
)

alpha = brentq(equation, 0.0, alpha_upr)

else:
alpha = 0.0

Expand Down

0 comments on commit 9a8b5c1

Please sign in to comment.