replaced partial_plot function param 'data' with 'frame', and kept th…

…e old param as deprecated
h2oai · Oct 31, 2023 · 445b209 · 445b209
1 parent cd38506
commit 445b209
Show file tree

Hide file tree

Showing 13 changed files with 53 additions and 53 deletions.
diff --git a/h2o-py/h2o/model/model_base.py b/h2o-py/h2o/model/model_base.py
@@ -1251,15 +1251,15 @@ def _get_metrics(o, train, valid, xval):
             metrics["train"] = output["training_metrics"]
         return metrics
 
-    @deprecated_params({'save_to_file': 'save_plot_path'})
-    def partial_plot(self, data, cols=None, destination_key=None, nbins=20, weight_column=None,
+    @deprecated_params({'data': 'frame', 'save_to_file': 'save_plot_path'})
+    def partial_plot(self, frame, cols=None, destination_key=None, nbins=20, weight_column=None,
                      plot=True, plot_stddev=True, figsize=(7, 10), server=False, include_na=False, user_splits=None,
                      col_pairs_2dpdp=None, save_plot_path=None, row_index=None, targets=None):
         """
         Create partial dependence plot which gives a graphical depiction of the marginal effect of a variable on the
         response. The effect of a variable is measured in change in the mean response.
 
-        :param H2OFrame data: An H2OFrame object used for scoring and constructing the plot.
+        :param H2OFrame frame: An H2OFrame object used for scoring and constructing the plot.
         :param cols: Feature(s) for which partial dependence will be calculated.
         :param destination_key: A key reference to the created partial dependence tables in H2O.
         :param nbins: Number of bins used. For categorical columns make sure the number of bins exceed the level count. If you enable ``add_missing_NA``, the returned length will be nbin+1.
@@ -1277,7 +1277,7 @@ def partial_plot(self, data, cols=None, destination_key=None, nbins=20, weight_c
 
         :returns: Plot and list of calculated mean response tables for each feature requested + the resulting plot (can be accessed using ``result.figure()``).
         """
-        if not isinstance(data, h2o.H2OFrame): raise ValueError("Data must be an instance of H2OFrame.")
+        if not isinstance(frame, h2o.H2OFrame): raise ValueError("frame must be an instance of H2OFrame.")
         num_1dpdp = 0
         num_2dpdp = 0
         if cols is not None:
@@ -1301,22 +1301,22 @@ def partial_plot(self, data, cols=None, destination_key=None, nbins=20, weight_c
         # Check cols specified exist in frame data
         if cols is not None:
             for xi in cols:
-                if xi not in data.names:
+                if xi not in frame.names:
                     raise H2OValueError("Column %s does not exist in the training frame." % xi)
         if col_pairs_2dpdp is not None:
             for oneP in col_pairs_2dpdp:
-                if oneP[0] not in data.names:
+                if oneP[0] not in frame.names:
                     raise H2OValueError("Column %s does not exist in the training frame." % oneP[0])
-                if oneP[1] not in data.names:
+                if oneP[1] not in frame.names:
                     raise H2OValueError("Column %s does not exist in the training frame." % oneP[1])
                 if oneP[0] is oneP[1]:
                     raise H2OValueError("2D pdp must be with different columns.")
         if isinstance(weight_column, int) and not (weight_column == -1):
             raise H2OValueError("Weight column should be a column name in your data frame.")
         elif isinstance(weight_column, str): # index is a name
-            if weight_column not in data.names:
+            if weight_column not in frame.names:
                 raise H2OValueError("Column %s does not exist in the data frame" % weight_column)
-            weight_column = data.names.index(weight_column)
+            weight_column = frame.names.index(weight_column)
 
         if row_index is not None:
             if not isinstance(row_index, int):
@@ -1334,7 +1334,7 @@ def partial_plot(self, data, cols=None, destination_key=None, nbins=20, weight_c
         kwargs = {}
         kwargs["cols"] = cols
         kwargs["model_id"] = self.model_id
-        kwargs["frame_id"] = data.frame_id
+        kwargs["frame_id"] = frame.frame_id
         kwargs["nbins"] = nbins
         kwargs["destination_key"] = destination_key
         kwargs["weight_column_index"] = weight_column
@@ -1344,7 +1344,7 @@ def partial_plot(self, data, cols=None, destination_key=None, nbins=20, weight_c
         if targets:
             kwargs["targets"] = targets
 
-        self.__generate_user_splits(user_splits, data, kwargs)
+        self.__generate_user_splits(user_splits, frame, kwargs)
         json = H2OJob(h2o.api("POST /3/PartialDependence/", data=kwargs),  job_type="PartialDependencePlot").poll()
         json = h2o.api("GET /3/PartialDependence/%s" % json.dest_key)
 
@@ -1353,7 +1353,7 @@ def partial_plot(self, data, cols=None, destination_key=None, nbins=20, weight_c
 
         # Plot partial dependence plots using matplotlib
         return self.__generate_partial_plots(num_1dpdp, num_2dpdp, plot, server, pps, figsize, 
-                                             col_pairs_2dpdp, data, nbins,
+                                             col_pairs_2dpdp, frame, nbins,
                                              kwargs["user_cols"], kwargs["num_user_splits"], 
                                              plot_stddev, cols, save_plot_path, row_index, targets, include_na)
 

diff --git a/h2o-py/tests/testdir_algos/gbm/pyunit_gbm_pojo_import.py b/h2o-py/tests/testdir_algos/gbm/pyunit_gbm_pojo_import.py
@@ -30,8 +30,8 @@ def prostate_pojo_import():
     assert_frame_equal(preds_original.as_data_frame(), preds_imported.as_data_frame())
 
     # 2. check we can get PDPs
-    pdp_original = model.partial_plot(data=prostate, cols=['AGE'], server=True, plot=False)
-    pdp_imported = model_imported.partial_plot(data=prostate, cols=['AGE'], server=True, plot=False)
+    pdp_original = model.partial_plot(frame=prostate, cols=['AGE'], server=True, plot=False)
+    pdp_imported = model_imported.partial_plot(frame=prostate, cols=['AGE'], server=True, plot=False)
     assert_frame_equal(pdp_original[0].as_data_frame(), pdp_imported[0].as_data_frame())
 
 

diff --git a/...testdir_algos/glm/pyunit_plot_functions__add_saving_parameter_and_decorate_plot_result.py b/...testdir_algos/glm/pyunit_plot_functions__add_saving_parameter_and_decorate_plot_result.py
@@ -151,8 +151,8 @@ def partial_plots():
     with TemporaryDirectory() as tmpdir:
         path1 = "{}/plot1.png".format(tmpdir)
         path2 = "{}/plot2.png".format(tmpdir)
-        test_plot_result_saving(gbm_model.partial_plot(data=data, cols=['AGE'], server=True, plot=True, row_index=1), path2,
-                                gbm_model.partial_plot(data=data, cols=['AGE'], server=True, plot=True, row_index=1, save_plot_path=path1), path1)
+        test_plot_result_saving(gbm_model.partial_plot(frame=data, cols=['AGE'], server=True, plot=True, row_index=1), path2,
+                                gbm_model.partial_plot(frame=data, cols=['AGE'], server=True, plot=True, row_index=1, save_plot_path=path1), path1)
 
 
 def partial_plots_multinomial():
@@ -178,9 +178,9 @@ def partial_plots_multinomial():
 
         test_plot_result_saving(model.plot(), path2, model.plot(save_plot_path=path1), path1)
 
-        test_plot_result_saving(model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=True, plot=True,
+        test_plot_result_saving(model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=True, plot=True,
                                                    server=True), path2,
-                                model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=True, plot=True,
+                                model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=True, plot=True,
                                                    server=True, save_to_file=path1), path1)
 
 def roc_pr_curve():

diff --git a/h2o-py/tests/testdir_jira/pyunit_pubdev_7705.py b/h2o-py/tests/testdir_jira/pyunit_pubdev_7705.py
@@ -15,8 +15,8 @@ def partial_plot_row_index():
     gbm_model.train(x=x, y=y, training_frame=data)
 
     # Generate Partial Dependence for row index -1 and row index 0, they should differ
-    pdp = gbm_model.partial_plot(data=data, cols=['RACE'], plot=False, plot_stddev=False, row_index=-1)
-    pdp0 = gbm_model.partial_plot(data=data, cols=['RACE'], plot=False, plot_stddev=False, row_index=0)
+    pdp = gbm_model.partial_plot(frame=data, cols=['RACE'], plot=False, plot_stddev=False, row_index=-1)
+    pdp0 = gbm_model.partial_plot(frame=data, cols=['RACE'], plot=False, plot_stddev=False, row_index=0)
     assert not(pyunit_utils.equal_two_arrays(pdp[0][1], pdp0[0][1], throw_error=False))
 
 

diff --git a/h2o-py/tests/testdir_jira/pyunit_pubdev_7949_pdp.py b/h2o-py/tests/testdir_jira/pyunit_pubdev_7949_pdp.py
@@ -20,7 +20,7 @@ def test_pdp_user_splits_no_cardinality_check():
     user_splits = {
         "AGE": ["64", "75"]
     }
-    pdp = gbm_model.partial_plot(data=data, cols=['AGE'], user_splits=user_splits, plot=False)
+    pdp = gbm_model.partial_plot(frame=data, cols=['AGE'], user_splits=user_splits, plot=False)
     assert len(pdp[0].cell_values) == 2
 
 

diff --git a/h2o-py/tests/testdir_misc/pyunit_partial_plots.py b/h2o-py/tests/testdir_misc/pyunit_partial_plots.py
@@ -23,7 +23,7 @@ def partial_plot_test():
     gbm_model.train(x=x, y=y, training_frame=data)
 
     # Plot Partial Dependence for one feature then for both
-    pdp1 = gbm_model.partial_plot(data=data, cols=['AGE'], server=True, plot=True)
+    pdp1 = gbm_model.partial_plot(frame=data, cols=['AGE'], server=True, plot=True)
     # Manual test
     h2o_mean_response_pdp1 = pdp1[0]["mean_response"]
     h2o_stddev_response_pdp1 = pdp1[0]["stddev_response"]
@@ -34,7 +34,7 @@ def partial_plot_test():
     assert h2o_stddev_response_pdp1 == pdp_manual[1]
     assert h2o_std_error_mean_response_pdp1 == pdp_manual[2]
 
-    pdp2=gbm_model.partial_plot(data=data, cols=['AGE', 'RACE'], server=True, plot=False)
+    pdp2=gbm_model.partial_plot(frame=data, cols=['AGE', 'RACE'], server=True, plot=False)
     # Manual test
     h2o_mean_response_pdp2 = pdp2[0]["mean_response"]
     h2o_stddev_response_pdp2 = pdp2[0]["stddev_response"]
@@ -56,7 +56,7 @@ def partial_plot_test():
     assert h2o_std_error_mean_response_pdp2_race == pdp_manual[2]
 
     # Plot Partial Dependence for one row 
-    pdp_row = gbm_model.partial_plot(data=data, cols=['AGE'], server=True, plot=True, row_index=1)
+    pdp_row = gbm_model.partial_plot(frame=data, cols=['AGE'], server=True, plot=True, row_index=1)
     # Manual test
     h2o_mean_response_pdp_row = pdp_row[0]["mean_response"]
     h2o_stddev_response_pdp_row = pdp_row[0]["stddev_response"]

diff --git a/h2o-py/tests/testdir_misc/pyunit_partial_plots_multinomial.py b/h2o-py/tests/testdir_misc/pyunit_partial_plots_multinomial.py
@@ -32,55 +32,55 @@ def partial_plot_test():
     # one class target
     cols = ["petal_len"]
     targets = ["Iris-setosa"]
-    pdp_petal_len_se = model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=False, 
+    pdp_petal_len_se = model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=False, 
                                           plot=True, server=True)
     print(pdp_petal_len_se)
 
-    pdp_petal_len_se_std = model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=True, 
+    pdp_petal_len_se_std = model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=True, 
                                               plot=True, server=True)
     print(pdp_petal_len_se_std)
 
     # two clasess target
     targets = ["Iris-setosa", "Iris-virginica"]
-    pdp_petal_len_se_vi = model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=False, 
+    pdp_petal_len_se_vi = model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=False, 
                                              plot=True, server=True)
     print(pdp_petal_len_se_vi)
 
-    pdp_petal_len_se_vi_std = model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=True, 
+    pdp_petal_len_se_vi_std = model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=True, 
                                                  plot=True, server=True)
     print(pdp_petal_len_se_vi_std)
 
     # three classes target
     targets = ["Iris-setosa", "Iris-virginica", "Iris-versicolor"]
-    pdp_petal_len_se_vi_ve_std = model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=True, 
+    pdp_petal_len_se_vi_ve_std = model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=True, 
                                                     plot=True, server=True)
     print(pdp_petal_len_se_vi_ve_std)
 
     # two columns and three classes target
     cols = ["sepal_len", "petal_len"]
-    pdp_petal_len_sepal_len_se_vi_ve_std = model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=True, 
+    pdp_petal_len_sepal_len_se_vi_ve_std = model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=True, 
                                                               plot=True, server=True)
     print(pdp_petal_len_sepal_len_se_vi_ve_std)
 
     # three columns and three classes target  
     cols = ["sepal_len","petal_len", "sepal_wid"]
-    pdp_petal_len_sepal_len_sepal_wid_se_vi_ve = model.partial_plot(data=iris, cols=cols, targets=targets, 
+    pdp_petal_len_sepal_len_sepal_wid_se_vi_ve = model.partial_plot(frame=iris, cols=cols, targets=targets, 
                                                                     plot_stddev=False, plot=True, server=True)
     print(pdp_petal_len_sepal_len_sepal_wid_se_vi_ve)
 
-    pdp_petal_len_sepal_len_sepal_wid_se_vi_ve_std = model.partial_plot(data=iris, cols=cols, targets=targets, 
+    pdp_petal_len_sepal_len_sepal_wid_se_vi_ve_std = model.partial_plot(frame=iris, cols=cols, targets=targets, 
                                                                         plot_stddev=True, plot=True, server=True)
     print(pdp_petal_len_sepal_len_sepal_wid_se_vi_ve_std)
 
     # categorical column - nonsense column, just for testing
     cols = ["random_cat"]
     targets = ["Iris-setosa"]
-    pdp_petal_len_cat = model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=False, plot=True, 
+    pdp_petal_len_cat = model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=False, plot=True, 
                                            server=True)
     print(pdp_petal_len_cat)
 
     targets = ["Iris-setosa", "Iris-versicolor"]
-    pdp_petal_len_cat_std = model.partial_plot(data=iris, cols=cols, targets=targets, plot_stddev=True, plot=True, 
+    pdp_petal_len_cat_std = model.partial_plot(frame=iris, cols=cols, targets=targets, plot_stddev=True, plot=True, 
                                                server=True)
     print(pdp_petal_len_cat_std)
 

diff --git a/h2o-py/tests/testdir_misc/pyunit_pubdev_5706_usersplits_pdp.py b/h2o-py/tests/testdir_misc/pyunit_pubdev_5706_usersplits_pdp.py
@@ -34,10 +34,10 @@ def partial_plot_test_with_user_splits():
     # pdp without weight or NA
     with pyunit_utils.TemporaryDirectory() as tmpdir:
         file, filename = tempfile.mkstemp(suffix=".png", dir=tmpdir)
-        pdpOrig = gbm_model.partial_plot(data=data,cols=['AGE', 'RACE', 'DPROS'],server=True, plot=True, save_to_file=filename)
+        pdpOrig = gbm_model.partial_plot(frame=data,cols=['AGE', 'RACE', 'DPROS'],server=True, plot=True, save_to_file=filename)
         assert os.path.getsize(filename) > 0
 
-    pdpUserSplit = gbm_model.partial_plot(data=data,cols=['AGE', 'RACE', 'DPROS'],server=True, plot=True,
+    pdpUserSplit = gbm_model.partial_plot(frame=data,cols=['AGE', 'RACE', 'DPROS'],server=True, plot=True,
                                           user_splits=user_splits)
 
     # compare results

diff --git a/h2o-py/tests/testdir_misc/pyunit_pubdev_5761_pdp_NA.py b/h2o-py/tests/testdir_misc/pyunit_pubdev_5761_pdp_NA.py
@@ -47,16 +47,16 @@ def partial_plot_test():
     gbm_model.train(x=x, y=y, training_frame=data)
 
     # pdp without weight or NA
-    pdpOrig = gbm_model.partial_plot(data=data,cols=['AGE', 'RACE'],server=True, plot=True)
+    pdpOrig = gbm_model.partial_plot(frame=data,cols=['AGE', 'RACE'],server=True, plot=True)
     # pdp with constant weight and NA
-    pdpcWNA = gbm_model.partial_plot(data=data, cols=['AGE', 'RACE'], server=True, plot=True,
+    pdpcWNA = gbm_model.partial_plot(frame=data, cols=['AGE', 'RACE'], server=True, plot=True,
                                      weight_column="constWeight", include_na=True)
 
     # compare results
     pyunit_utils.assert_H2OTwoDimTable_equal_upto(pdpOrig[0], pdpcWNA[0], pdpOrig[0].col_header, tolerance=1e-10)
     pyunit_utils.assert_H2OTwoDimTable_equal_upto(pdpOrig[1], pdpcWNA[1], pdpOrig[1].col_header, tolerance=1e-10)
     # pdp with changing weight NA
-    pdpvWNA = gbm_model.partial_plot(data=data, cols=['AGE', 'RACE'], server=True, plot=True,
+    pdpvWNA = gbm_model.partial_plot(frame=data, cols=['AGE', 'RACE'], server=True, plot=True,
                                      weight_column="variWeight", include_na=True)
     ageList = pyunit_utils.extract_col_value_H2OTwoDimTable(pdpvWNA[0], "age")
     raceList = pyunit_utils.extract_col_value_H2OTwoDimTable(pdpvWNA[1], "race")

diff --git a/h2o-py/tests/testdir_misc/pyunit_pubdev_5921_na_prints_large.py b/h2o-py/tests/testdir_misc/pyunit_pubdev_5921_na_prints_large.py
@@ -24,11 +24,11 @@ def partial_plot_test():
     gbm_model.train(x=x, y=y, training_frame=data)
 
     # pdp with weight and no NA
-    pdpw = gbm_model.partial_plot(data=test, cols=["Input_miss", "Distance"], server=True, plot=False,
+    pdpw = gbm_model.partial_plot(frame=test, cols=["Input_miss", "Distance"], server=True, plot=False,
                                   weight_column=WC)
 
     # pdp with weight and NA
-    pdpwNA = gbm_model.partial_plot(data=test, cols=["Input_miss", "Distance"], server=True, plot=False,
+    pdpwNA = gbm_model.partial_plot(frame=test, cols=["Input_miss", "Distance"], server=True, plot=False,
                                     weight_column=WC, include_na = True)
     input_miss_list = pyunit_utils.extract_col_value_H2OTwoDimTable(pdpwNA[0], "input_miss")
     assert math.isnan(input_miss_list[-1]), "Expected last element to be nan but is not."
@@ -47,4 +47,4 @@ def partial_plot_test():
 if __name__ == "__main__":
   pyunit_utils.standalone_test(partial_plot_test)
 else:
-  partial_plot_test()
+  partial_plot_test()
diff --git a/h2o-py/tests/testdir_misc/pyunit_pubdev_6438_2D_pdp.py b/h2o-py/tests/testdir_misc/pyunit_pubdev_6438_2D_pdp.py
@@ -32,12 +32,12 @@ def partial_plot_test_with_user_splits():
                           67.63157894736842, 69.52631578947368, 71.42105263157895, 73.3157894736842,
                           75.21052631578948, 77.10526315789474]
     user_splits['RACE'] = ["Black", "White"]
-    pdpUserSplit2D = gbm_model.partial_plot(data=data,server=True, plot=True, user_splits=user_splits, 
+    pdpUserSplit2D = gbm_model.partial_plot(frame=data,server=True, plot=True, user_splits=user_splits, 
                                             col_pairs_2dpdp=[['AGE', 'PSA'], ['AGE', 'RACE']], save_to_file=filename)  
-    pdpUserSplit1D2D = gbm_model.partial_plot(data=data, cols=['AGE', 'RACE', 'DCAPS'], server=True, plot=True, 
+    pdpUserSplit1D2D = gbm_model.partial_plot(frame=data, cols=['AGE', 'RACE', 'DCAPS'], server=True, plot=True, 
                                               user_splits=user_splits, 
                                               col_pairs_2dpdp=[['AGE', 'PSA'], ['AGE', 'RACE']], save_to_file=filename)
-    pdpUserSplit1D = gbm_model.partial_plot(data=data,cols=['AGE', 'RACE', 'DCAPS'], server=True, plot=True, 
+    pdpUserSplit1D = gbm_model.partial_plot(frame=data,cols=['AGE', 'RACE', 'DCAPS'], server=True, plot=True, 
                                             user_splits=user_splits, save_to_file=filename)
     if os.path.isfile(filename):
         os.remove(filename)

diff --git a/h2o-py/tests/testdir_misc/pyunit_pubdev_6775_2D_pdp_xgboost.py b/h2o-py/tests/testdir_misc/pyunit_pubdev_6775_2D_pdp_xgboost.py
@@ -20,9 +20,9 @@ def partial_plot_test_with_no_user_splits_no_1DPDP():
     gbm_model.train(x=x, y=y, training_frame=data)
 
     # pdp without weight or NA
-    pdp2dOnly = gbm_model.partial_plot(data=data, server=True, plot=False, 
+    pdp2dOnly = gbm_model.partial_plot(frame=data, server=True, plot=False, 
         col_pairs_2dpdp=[['AGE', 'PSA'],['AGE', 'RACE']])
-    pdp1D2D = gbm_model.partial_plot(data=data, cols=['AGE', 'RACE', 'DCAPS'], server=True, plot=False,
+    pdp1D2D = gbm_model.partial_plot(frame=data, cols=['AGE', 'RACE', 'DCAPS'], server=True, plot=False,
                                               col_pairs_2dpdp=[['AGE', 'PSA'], ['AGE', 'RACE']])
     # compare results 2D pdp 
     pyunit_utils.assert_H2OTwoDimTable_equal_upto(pdp2dOnly[0], pdp1D2D[3],