Skip to content

Commit

Permalink
Merge pull request #297 from pegasystems/Adding_AGB_Monitoring_Support
Browse files Browse the repository at this point in the history
Picking up ADM model type and using in summaries
  • Loading branch information
operdeck authored Dec 13, 2024
2 parents 8472f06 + 30ffc95 commit 4431274
Show file tree
Hide file tree
Showing 11 changed files with 199 additions and 206 deletions.
10 changes: 8 additions & 2 deletions python/pdstools/adm/ADMDatamart.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ def __init__(
self.aggregates = Aggregates(datamart=self)
self.agb = AGB(datamart=self)
self.generate = Reports(datamart=self)
self.cdh_guidelines = CDHGuidelines()
self.cdh_guidelines = (
CDHGuidelines()
) # not sure if this should be part of the ADM DM

self.model_data = self._validate_model_data(
model_df, query=query, extract_pyname_keys=extract_pyname_keys
Expand Down Expand Up @@ -310,6 +312,11 @@ def _validate_model_data(
if "Treatment" in schema.names():
self.context_keys.append("Treatment")

# Model technique (NaiveBayes or GradientBoost) added in '24 (US-648869 and related)
if "ModelTechnique" not in schema.names():
df = df.with_columns(
ModelTechnique=pl.lit(None),
)
self.context_keys = [k for k in self.context_keys if k in schema.names()]

df = df.with_columns(
Expand Down Expand Up @@ -399,7 +406,6 @@ def apply_predictor_categorization(
categorization() if callable(categorization) else categorization
)


if df is not None:
return df.with_columns(PredictorCategory=categorization_expr)

Expand Down
80 changes: 75 additions & 5 deletions python/pdstools/adm/Aggregates.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,15 +460,20 @@ def name_normalizer(x):
.agg(
pl.col("SnapshotTime").min().cast(pl.Date).alias("DateRange Min"),
pl.col("SnapshotTime").max().cast(pl.Date).alias("DateRange Max"),
pl.col("Positives").sum(),
pl.col("ResponseCount").sum(),
pl.sum(["Positives", "ResponseCount"]),
(cdh_utils.weighted_performance_polars() * 100).alias("Performance"),
pl.col("Configuration").cast(pl.Utf8),
pl.col("Configuration")
.cast(pl.Utf8)
.str.to_uppercase()
.is_in([x.upper() for x in self.cdh_guidelines.standard_configurations])
.alias("isNBADModelConfiguration"),
(pl.col("ModelTechnique") == "GradientBoost")
.any(ignore_nulls=False)
.alias("usesAGB"),
(pl.col("ModelTechnique") == "GradientBoost")
.all(ignore_nulls=False)
.alias("usesAGBOnly"),
actionIdentifierExpr.drop_nulls()
.n_unique()
.alias("Total Number of Actions"),
Expand Down Expand Up @@ -561,7 +566,70 @@ def name_normalizer(x):
)
)

def predictor_last_snapshot(self) -> Optional[pl.DataFrame]:
def summary_by_configuration(self) -> pl.DataFrame:
"""
Generates a summary of the ADM model configurations.
Returns
-------
pl.DataFrame
A Polars DataFrame containing the configuration summary.
"""

action_dim_agg = [pl.col("Name").n_unique().alias("Actions")]
if "Treatment" in self.datamart.context_keys:
action_dim_agg += [
pl.col("Treatment").n_unique().alias("Unique Treatments")
]
else:
action_dim_agg += [pl.lit(0).alias("Unique Treatments")]

if "Issue" in self.datamart.context_keys:
action_dim_agg += [
pl.col("Issue").cast(pl.String).unique().alias("Used for (Issues)")
]

group_by_cols = ["Configuration"] + [
c for c in ["Channel", "Direction"] if c in self.datamart.context_keys
]
configuration_summary = (
self.last(table="model_data")
.group_by(group_by_cols)
.agg(
[
pl.when((pl.col("ModelTechnique") == "GradientBoost").any())
.then(pl.lit("Yes"))
.when(pl.col("ModelTechnique").is_null().any())
.then(pl.lit("Unknown"))
.otherwise(pl.lit("No"))
.alias("AGB")
]
+ [
pl.col("ModelID").n_unique(),
]
+ action_dim_agg
+ [pl.sum(["ResponseCount", "Positives"])],
)
.with_columns(
[
# pl.col("Configuration")
# .is_in(standardNBADNames.keys())
# .alias("Standard in NBAD Framework"),
(pl.col("ModelID") / pl.col("Actions"))
.round(2)
.alias("ModelsPerAction"),
]
)
.sort(group_by_cols)
)
if "Issue" in self.datamart.context_keys:
configuration_summary = configuration_summary.with_columns(
pl.col("Used for (Issues)").list.unique().list.sort().list.join(", ")
)

return configuration_summary

def predictors_overview(self) -> Optional[pl.DataFrame]:
"""
Generate a summary of the last snapshot of predictor data.
Expand All @@ -580,7 +648,7 @@ def predictor_last_snapshot(self) -> Optional[pl.DataFrame]:

predictor_summary = (
self.last(table="predictor_data")
.filter(pl.col("PredictorName") != "Classifier")
.filter(pl.col("PredictorName") != "Classifier") # TODO not name, there is a type
.join(
self.last(table="model_data")
.select(["ModelID"] + model_identifiers)
Expand Down Expand Up @@ -624,7 +692,7 @@ def predictor_last_snapshot(self) -> Optional[pl.DataFrame]:
)

return predictor_summary
except ValueError:
except ValueError: # really? swallowing?
return None

def overall_summary(
Expand Down Expand Up @@ -730,6 +798,8 @@ def overall_summary(
# TODO there was something about OmniAdaptiveModel here - but I don't recall what was the issue
pl.col("usesNBAD").any(),
pl.col("usesNBADOnly").all(),
pl.col("usesAGB").any(),
pl.col("usesAGBOnly").all(),
# pl.lit(usesNBAD).alias("usesNBAD"),
# ((pl.len() > 0) & pl.lit(usesNBAD and usesNBADOnly)).alias(
# "usesNBADOnly"
Expand Down
4 changes: 2 additions & 2 deletions python/pdstools/adm/Reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,8 +565,8 @@ def excel_report(
}

if self.datamart.predictor_data is not None:
tabs["predictor_last_snapshot"] = (
self.datamart.aggregates.predictor_last_snapshot()
tabs["predictors_overview"] = (
self.datamart.aggregates.predictors_overview()
)

if predictor_binning and self.datamart.predictor_data is not None:
Expand Down
Loading

0 comments on commit 4431274

Please sign in to comment.