rename pred_from_base_models_with_uncertainty to predict_with_uncerta…

…inty and add init_score
interpretml · Nov 20, 2024 · d6eb799 · d6eb799
1 parent 9653720
commit d6eb799
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -596,7 +596,7 @@ We also build on top of many great packages. Please check them out!
 - [Interpreting Machine Learning Models with InterpretML](https://www.youtube.com/watch?v=ERNuFfsknhk)
 - [Machine Learning Model Interpretability using AzureML & InterpretML (Explainable Boosting Machine)](https://www.youtube.com/watch?v=0ocVtXU8o1I)
 - [A Case Study of Using Explainable Boosting Machines](https://uncch.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=063d6839-e8db-40e0-8df4-b0fc012e709b&start=0)
-- [From SHAP to EBM: Explain your Gradient Boosting Models in Python](https://media.ccc.de/v/sps24-56262-from-shap-to-ebm-explain-you)
+- [From SHAP to EBM: Explain your Gradient Boosting Models in Python](https://www.youtube.com/watch?v=hnZjw77-1rE)
 
 # External links
 

diff --git a/docs/interpret/python/examples/interpretable-regression-synthetic.ipynb b/docs/interpret/python/examples/interpretable-regression-synthetic.ipynb
@@ -63,7 +63,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The synthetic dataset has a significant number of smooth functions. To handle these smoothly varying relationships effectively, we incorporate a parameter called 'smoothing_rounds' in the EBM fitting process. 'smoothing_rounds' initiates the boosting process in a non-greedy manner by selecting random split points when constructing the internal decision trees. This strategy helps to avoid initial overfitting and sets up baseline smooth partial responses before changing to using a greedy approach that is better for fitting any remaining sharp transitions in the partial responses. We've set the 'cyclic_progress' value to 0.0, which means that following the smoothing rounds, all boosting rounds are dedicated to the greedy approach. The cyclic rounds will not make boosting progress, but will instead be used to periodically update the internal gain calculations. We also use the reg_alpha regularization parameter to further smooth the results. EBMs additionally support reg_lambda and max_delta_step, which might be useful in some cases.\n",
+    "The synthetic dataset has a significant number of smooth functions. To handle these smoothly varying relationships effectively, we incorporate a parameter called 'smoothing_rounds' in the EBM fitting process. 'smoothing_rounds' initiates the boosting process in a non-greedy manner by selecting random split points when constructing the internal decision trees. This strategy helps to avoid initial overfitting and sets up baseline smooth partial responses before changing to using a greedy approach that is better for fitting any remaining sharp transitions in the partial responses. We also use the reg_alpha regularization parameter to further smooth the results. EBMs additionally support reg_lambda and max_delta_step, which might be useful in some cases.\n",
     "\n",
     "For some datasets with large outliers, increasing the validation_size and/or taking the median model from the outer bags might be helpful as described here:\n",
     "https://github.com/interpretml/interpret/issues/548"
@@ -77,7 +77,7 @@
    "source": [
     "from interpret.glassbox import ExplainableBoostingRegressor\n",
     "\n",
-    "ebm = ExplainableBoostingRegressor(names, types, interactions=3, smoothing_rounds=5000, cyclic_progress=0.0, reg_alpha=10.0)\n",
+    "ebm = ExplainableBoostingRegressor(names, types, interactions=3, smoothing_rounds=5000, reg_alpha=10.0)\n",
     "ebm.fit(X_train, y_train)"
    ]
   },

diff --git a/python/interpret-core/interpret/glassbox/_ebm/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm/_ebm.py
@@ -2253,14 +2253,16 @@ def scale(self, term, factor):
 
         return self
 
-    def pred_from_base_models_with_uncertainty(self, instances):
+    def predict_with_uncertainty(self, X, init_score=None):
         """Gets raw scores and uncertainties from the bagged base models.
         Generates predictions by averaging outputs across all bagged models, and estimates
         uncertainty using the standard deviation of predictions across bags.
 
         Args:
-            instances: ndarray of shape (n_samples, n_features)
+            X: ndarray of shape (n_samples, n_features)
                 The input samples to predict on.
+            init_score: Optional. Either a model that can generate scores or per-sample initialization score.
+                If samples scores it should be the same length as X.
 
         Returns:
             ndarray of shape (n_samples, 2)
@@ -2269,9 +2271,15 @@ def pred_from_base_models_with_uncertainty(self, instances):
         """
         check_is_fitted(self, "has_fitted_")
 
-        X, n_samples = preclean_X(
-            instances, self.feature_names_in_, self.feature_types_in_
+        init_score, X, n_samples = clean_init_score_and_X(
+            self.link_,
+            self.link_param_,
+            init_score,
+            X,
+            self.feature_names_in_,
+            self.feature_types_in_,
         )
+
         preds_per_bag = np.zeros((n_samples, len(self.bagged_intercept_)))
         # Get predictions from each bagged model
         for bag_index in range(len(self.bagged_intercept_)):
@@ -2285,6 +2293,7 @@ def pred_from_base_models_with_uncertainty(self, instances):
                 intercept=self.bagged_intercept_[bag_index],
                 term_scores=[scores[bag_index] for scores in self.bagged_scores_],
                 term_features=self.term_features_,
+                init_score=init_score,
             )
             preds_per_bag[:, bag_index] = scores
 

diff --git a/python/interpret-core/tests/glassbox/ebm/test_ebm.py b/python/interpret-core/tests/glassbox/ebm/test_ebm.py
@@ -1180,12 +1180,12 @@ def test_ebm_uncertainty():
     )
     clf.fit(X, y)
 
-    result = clf.pred_from_base_models_with_uncertainty(X)
+    result = clf.predict_with_uncertainty(X)
     assert result.shape == (len(X), 2), "Should return (n_samples, 2) shape"
 
     clf2 = ExplainableBoostingClassifier(outer_bags=5, random_state=42)
     clf2.fit(X, y)
-    result_same_seed = clf2.pred_from_base_models_with_uncertainty(X)
+    result_same_seed = clf2.predict_with_uncertainty(X)
     assert np.array_equal(
         result,
         result_same_seed,