Merge pull request #297 from pegasystems/Adding_AGB_Monitoring_Support

Picking up ADM model type and using in summaries
pegasystems · Dec 13, 2024 · 4431274 · 4431274
2 parents 8472f06 + 30ffc95
commit 4431274
Show file tree

Hide file tree

Showing 11 changed files with 199 additions and 206 deletions.
diff --git a/python/pdstools/adm/ADMDatamart.py b/python/pdstools/adm/ADMDatamart.py
@@ -120,7 +120,9 @@ def __init__(
         self.aggregates = Aggregates(datamart=self)
         self.agb = AGB(datamart=self)
         self.generate = Reports(datamart=self)
-        self.cdh_guidelines = CDHGuidelines()
+        self.cdh_guidelines = (
+            CDHGuidelines()
+        )  # not sure if this should be part of the ADM DM
 
         self.model_data = self._validate_model_data(
             model_df, query=query, extract_pyname_keys=extract_pyname_keys
@@ -310,6 +312,11 @@ def _validate_model_data(
         if "Treatment" in schema.names():
             self.context_keys.append("Treatment")
 
+        # Model technique (NaiveBayes or GradientBoost) added in '24 (US-648869 and related)
+        if "ModelTechnique" not in schema.names():
+            df = df.with_columns(
+                ModelTechnique=pl.lit(None),
+            )
         self.context_keys = [k for k in self.context_keys if k in schema.names()]
 
         df = df.with_columns(
@@ -399,7 +406,6 @@ def apply_predictor_categorization(
             categorization() if callable(categorization) else categorization
         )
 
-
         if df is not None:
             return df.with_columns(PredictorCategory=categorization_expr)
 

diff --git a/python/pdstools/adm/Aggregates.py b/python/pdstools/adm/Aggregates.py
@@ -460,15 +460,20 @@ def name_normalizer(x):
             .agg(
                 pl.col("SnapshotTime").min().cast(pl.Date).alias("DateRange Min"),
                 pl.col("SnapshotTime").max().cast(pl.Date).alias("DateRange Max"),
-                pl.col("Positives").sum(),
-                pl.col("ResponseCount").sum(),
+                pl.sum(["Positives", "ResponseCount"]),
                 (cdh_utils.weighted_performance_polars() * 100).alias("Performance"),
                 pl.col("Configuration").cast(pl.Utf8),
                 pl.col("Configuration")
                 .cast(pl.Utf8)
                 .str.to_uppercase()
                 .is_in([x.upper() for x in self.cdh_guidelines.standard_configurations])
                 .alias("isNBADModelConfiguration"),
+                (pl.col("ModelTechnique") == "GradientBoost")
+                .any(ignore_nulls=False)
+                .alias("usesAGB"),
+                (pl.col("ModelTechnique") == "GradientBoost")
+                .all(ignore_nulls=False)
+                .alias("usesAGBOnly"),
                 actionIdentifierExpr.drop_nulls()
                 .n_unique()
                 .alias("Total Number of Actions"),
@@ -561,7 +566,70 @@ def name_normalizer(x):
             )
         )
 
-    def predictor_last_snapshot(self) -> Optional[pl.DataFrame]:
+    def summary_by_configuration(self) -> pl.DataFrame:
+        """
+        Generates a summary of the ADM model configurations.
+
+        Returns
+        -------
+        pl.DataFrame
+            A Polars DataFrame containing the configuration summary.
+        """
+
+        action_dim_agg = [pl.col("Name").n_unique().alias("Actions")]
+        if "Treatment" in self.datamart.context_keys:
+            action_dim_agg += [
+                pl.col("Treatment").n_unique().alias("Unique Treatments")
+            ]
+        else:
+            action_dim_agg += [pl.lit(0).alias("Unique Treatments")]
+
+        if "Issue" in self.datamart.context_keys:
+            action_dim_agg += [
+                pl.col("Issue").cast(pl.String).unique().alias("Used for (Issues)")
+            ]
+
+        group_by_cols = ["Configuration"] + [
+            c for c in ["Channel", "Direction"] if c in self.datamart.context_keys
+        ]
+        configuration_summary = (
+            self.last(table="model_data")
+            .group_by(group_by_cols)
+            .agg(
+                [
+                    pl.when((pl.col("ModelTechnique") == "GradientBoost").any())
+                    .then(pl.lit("Yes"))
+                    .when(pl.col("ModelTechnique").is_null().any())
+                    .then(pl.lit("Unknown"))
+                    .otherwise(pl.lit("No"))
+                    .alias("AGB")
+                ]
+                + [
+                    pl.col("ModelID").n_unique(),
+                ]
+                + action_dim_agg
+                + [pl.sum(["ResponseCount", "Positives"])],
+            )
+            .with_columns(
+                [
+                    # pl.col("Configuration")
+                    # .is_in(standardNBADNames.keys())
+                    # .alias("Standard in NBAD Framework"),
+                    (pl.col("ModelID") / pl.col("Actions"))
+                    .round(2)
+                    .alias("ModelsPerAction"),
+                ]
+            )
+            .sort(group_by_cols)
+        )
+        if "Issue" in self.datamart.context_keys:
+            configuration_summary = configuration_summary.with_columns(
+                pl.col("Used for (Issues)").list.unique().list.sort().list.join(", ")
+            )
+
+        return configuration_summary
+
+    def predictors_overview(self) -> Optional[pl.DataFrame]:
         """
         Generate a summary of the last snapshot of predictor data.
 
@@ -580,7 +648,7 @@ def predictor_last_snapshot(self) -> Optional[pl.DataFrame]:
 
             predictor_summary = (
                 self.last(table="predictor_data")
-                .filter(pl.col("PredictorName") != "Classifier")
+                .filter(pl.col("PredictorName") != "Classifier") # TODO not name, there is a type
                 .join(
                     self.last(table="model_data")
                     .select(["ModelID"] + model_identifiers)
@@ -624,7 +692,7 @@ def predictor_last_snapshot(self) -> Optional[pl.DataFrame]:
             )
 
             return predictor_summary
-        except ValueError:
+        except ValueError:  # really? swallowing?
             return None
 
     def overall_summary(
@@ -730,6 +798,8 @@ def overall_summary(
                 # TODO there was something about OmniAdaptiveModel here - but I don't recall what was the issue
                 pl.col("usesNBAD").any(),
                 pl.col("usesNBADOnly").all(),
+                pl.col("usesAGB").any(),
+                pl.col("usesAGBOnly").all(),
                 # pl.lit(usesNBAD).alias("usesNBAD"),
                 # ((pl.len() > 0) & pl.lit(usesNBAD and usesNBADOnly)).alias(
                 #     "usesNBADOnly"

diff --git a/python/pdstools/adm/Reports.py b/python/pdstools/adm/Reports.py
@@ -565,8 +565,8 @@ def excel_report(
         }
 
         if self.datamart.predictor_data is not None:
-            tabs["predictor_last_snapshot"] = (
-                self.datamart.aggregates.predictor_last_snapshot()
+            tabs["predictors_overview"] = (
+                self.datamart.aggregates.predictors_overview()
             )
 
         if predictor_binning and self.datamart.predictor_data is not None: