diff --git a/h2o-algos/src/main/java/hex/ensemble/Metalearner.java b/h2o-algos/src/main/java/hex/ensemble/Metalearner.java index 35b24d3ece42..3973349b1f74 100644 --- a/h2o-algos/src/main/java/hex/ensemble/Metalearner.java +++ b/h2o-algos/src/main/java/hex/ensemble/Metalearner.java @@ -118,6 +118,7 @@ protected void setCommonParams(P parms) { parms._offset_column = _model._parms._offset_column; parms._main_model_time_budget_factor = _model._parms._main_model_time_budget_factor; parms._custom_metric_func = _model._parms._custom_metric_func; + parms._gainslift_bins = _model._parms._gainslift_bins; } protected void setCrossValidationParams(P parms) { diff --git a/h2o-algos/src/main/java/hex/gam/MetricBuilderGAM.java b/h2o-algos/src/main/java/hex/gam/MetricBuilderGAM.java index 9d0374f44713..138873af3fc6 100644 --- a/h2o-algos/src/main/java/hex/gam/MetricBuilderGAM.java +++ b/h2o-algos/src/main/java/hex/gam/MetricBuilderGAM.java @@ -156,6 +156,7 @@ public ModelMetrics makeModelMetrics(Model m, Frame f, Frame adaptedFrame, Frame Vec weights = f.vec(gamM._parms._weights_column); if (resp != null && fractionalbinomial != _glmf._family) { gl = new GainsLift(preds.lastVec(), resp, weights); + gl._groups = m._parms._gainslift_bins; gl.exec(gamM._output._job); } } diff --git a/h2o-algos/src/main/java/hex/glm/GLMMetricBuilder.java b/h2o-algos/src/main/java/hex/glm/GLMMetricBuilder.java index e6418a0ba5ea..32e9b0855eb7 100644 --- a/h2o-algos/src/main/java/hex/glm/GLMMetricBuilder.java +++ b/h2o-algos/src/main/java/hex/glm/GLMMetricBuilder.java @@ -246,6 +246,7 @@ protected void computeAIC(GLMModel gm) { Vec weights = f.vec(m._parms._weights_column); if (resp != null && Family.fractionalbinomial != _glmf._family) { // don't calculate for frac binomial gl = new GainsLift(preds.lastVec(), resp, weights); + gl._groups = m._parms._gainslift_bins; gl.exec(m._output._job); } } diff --git a/h2o-algos/src/main/java/hex/schemas/DeepLearningV3.java b/h2o-algos/src/main/java/hex/schemas/DeepLearningV3.java index 3b27c6671d77..998b8b432776 100755 --- a/h2o-algos/src/main/java/hex/schemas/DeepLearningV3.java +++ b/h2o-algos/src/main/java/hex/schemas/DeepLearningV3.java @@ -103,6 +103,7 @@ public static final class DeepLearningParametersV3 extends ModelParametersSchema "export_checkpoints_dir", "auc_type", "custom_metric_func", + "gainslift_bins", }; diff --git a/h2o-algos/src/main/java/hex/schemas/GAMV3.java b/h2o-algos/src/main/java/hex/schemas/GAMV3.java index eb15ebd27d84..12e9bc6b5799 100644 --- a/h2o-algos/src/main/java/hex/schemas/GAMV3.java +++ b/h2o-algos/src/main/java/hex/schemas/GAMV3.java @@ -83,7 +83,8 @@ public static final class GAMParametersV3 extends ModelParametersSchemaV3 { diff --git a/h2o-docs/src/product/data-science/algo-params/gainslift_bins.rst b/h2o-docs/src/product/data-science/algo-params/gainslift_bins.rst index 304be8abd4eb..38300c4b1203 100644 --- a/h2o-docs/src/product/data-science/algo-params/gainslift_bins.rst +++ b/h2o-docs/src/product/data-science/algo-params/gainslift_bins.rst @@ -1,7 +1,7 @@ ``gainslift_bins`` ------------------ -- Available in: GBM, DRF, Naïve-Bayes, XGBoost +- Available in: Deeplearning, DRF, GAM, GBM, GLM, Naïve-Bayes, Stacked Ensemble, XGBoost - Hyperparameter: no Description diff --git a/h2o-py/h2o/estimators/deeplearning.py b/h2o-py/h2o/estimators/deeplearning.py index c2121652992d..8c71881eea64 100644 --- a/h2o-py/h2o/estimators/deeplearning.py +++ b/h2o-py/h2o/estimators/deeplearning.py @@ -129,6 +129,7 @@ def __init__(self, export_checkpoints_dir=None, # type: Optional[str] auc_type="auto", # type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] custom_metric_func=None, # type: Optional[str] + gainslift_bins=-1, # type: int ): """ :param model_id: Destination id for this model; auto-generated if not specified. @@ -437,6 +438,10 @@ def __init__(self, :param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName` Defaults to ``None``. :type custom_metric_func: str, optional + :param gainslift_bins: Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic + binning. + Defaults to ``-1``. + :type gainslift_bins: int """ super(H2ODeepLearningEstimator, self).__init__() self._parms = {} @@ -530,6 +535,7 @@ def __init__(self, self.export_checkpoints_dir = export_checkpoints_dir self.auc_type = auc_type self.custom_metric_func = custom_metric_func + self.gainslift_bins = gainslift_bins @property def training_frame(self): @@ -3239,6 +3245,20 @@ def custom_metric_func(self, custom_metric_func): assert_is_type(custom_metric_func, None, str) self._parms["custom_metric_func"] = custom_metric_func + @property + def gainslift_bins(self): + """ + Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. + + Type: ``int``, defaults to ``-1``. + """ + return self._parms.get("gainslift_bins") + + @gainslift_bins.setter + def gainslift_bins(self, gainslift_bins): + assert_is_type(gainslift_bins, None, int) + self._parms["gainslift_bins"] = gainslift_bins + class H2OAutoEncoderEstimator(H2ODeepLearningEstimator): diff --git a/h2o-py/h2o/estimators/gam.py b/h2o-py/h2o/estimators/gam.py index a358045968c0..4f08534c465b 100644 --- a/h2o-py/h2o/estimators/gam.py +++ b/h2o-py/h2o/estimators/gam.py @@ -106,6 +106,7 @@ def __init__(self, keep_gam_cols=False, # type: bool store_knot_locations=False, # type: bool auc_type="auto", # type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] + gainslift_bins=-1, # type: int ): """ :param model_id: Destination id for this model; auto-generated if not specified. @@ -373,6 +374,10 @@ def __init__(self, :param auc_type: Set default multinomial AUC type. Defaults to ``"auto"``. :type auc_type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] + :param gainslift_bins: Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic + binning. + Defaults to ``-1``. + :type gainslift_bins: int """ super(H2OGeneralizedAdditiveEstimator, self).__init__() self._parms = {} @@ -445,6 +450,7 @@ def __init__(self, self.keep_gam_cols = keep_gam_cols self.store_knot_locations = store_knot_locations self.auc_type = auc_type + self.gainslift_bins = gainslift_bins @property def training_frame(self): @@ -1444,6 +1450,20 @@ def auc_type(self, auc_type): assert_is_type(auc_type, None, Enum("auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo")) self._parms["auc_type"] = auc_type + @property + def gainslift_bins(self): + """ + Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. + + Type: ``int``, defaults to ``-1``. + """ + return self._parms.get("gainslift_bins") + + @gainslift_bins.setter + def gainslift_bins(self, gainslift_bins): + assert_is_type(gainslift_bins, None, int) + self._parms["gainslift_bins"] = gainslift_bins + Lambda = deprecated_property('Lambda', lambda_) def _additional_used_columns(self, parms): diff --git a/h2o-py/h2o/estimators/glm.py b/h2o-py/h2o/estimators/glm.py index 92615270c958..fd4062c78b81 100644 --- a/h2o-py/h2o/estimators/glm.py +++ b/h2o-py/h2o/estimators/glm.py @@ -115,6 +115,7 @@ def __init__(self, fix_tweedie_variance_power=True, # type: bool dispersion_learning_rate=0.5, # type: float influence=None, # type: Optional[Literal["dfbetas"]] + gainslift_bins=-1, # type: int ): """ :param model_id: Destination id for this model; auto-generated if not specified. @@ -411,6 +412,10 @@ def __init__(self, excluded in the dataset. Defaults to ``None``. :type influence: Literal["dfbetas"], optional + :param gainslift_bins: Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic + binning. + Defaults to ``-1``. + :type gainslift_bins: int """ super(H2OGeneralizedLinearEstimator, self).__init__() self._parms = {} @@ -491,6 +496,7 @@ def __init__(self, self.fix_tweedie_variance_power = fix_tweedie_variance_power self.dispersion_learning_rate = dispersion_learning_rate self.influence = influence + self.gainslift_bins = gainslift_bins @property def training_frame(self): @@ -2379,6 +2385,20 @@ def influence(self, influence): assert_is_type(influence, None, Enum("dfbetas")) self._parms["influence"] = influence + @property + def gainslift_bins(self): + """ + Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. + + Type: ``int``, defaults to ``-1``. + """ + return self._parms.get("gainslift_bins") + + @gainslift_bins.setter + def gainslift_bins(self, gainslift_bins): + assert_is_type(gainslift_bins, None, int) + self._parms["gainslift_bins"] = gainslift_bins + Lambda = deprecated_property('Lambda', lambda_) def get_regression_influence_diagnostics(self): diff --git a/h2o-py/h2o/estimators/stackedensemble.py b/h2o-py/h2o/estimators/stackedensemble.py index 6213b26e4125..6a2117c007b2 100644 --- a/h2o-py/h2o/estimators/stackedensemble.py +++ b/h2o-py/h2o/estimators/stackedensemble.py @@ -88,6 +88,7 @@ def __init__(self, keep_levelone_frame=False, # type: bool export_checkpoints_dir=None, # type: Optional[str] auc_type="auto", # type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] + gainslift_bins=-1, # type: int ): """ :param model_id: Destination id for this model; auto-generated if not specified. @@ -173,6 +174,10 @@ def __init__(self, :param auc_type: Set default multinomial AUC type. Defaults to ``"auto"``. :type auc_type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] + :param gainslift_bins: Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic + binning. + Defaults to ``-1``. + :type gainslift_bins: int """ super(H2OStackedEnsembleEstimator, self).__init__() self._parms = {} @@ -197,6 +202,7 @@ def __init__(self, self.keep_levelone_frame = keep_levelone_frame self.export_checkpoints_dir = export_checkpoints_dir self.auc_type = auc_type + self.gainslift_bins = gainslift_bins self._parms["_rest_version"] = 99 @property @@ -905,6 +911,20 @@ def auc_type(self, auc_type): assert_is_type(auc_type, None, Enum("auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo")) self._parms["auc_type"] = auc_type + @property + def gainslift_bins(self): + """ + Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. + + Type: ``int``, defaults to ``-1``. + """ + return self._parms.get("gainslift_bins") + + @gainslift_bins.setter + def gainslift_bins(self, gainslift_bins): + assert_is_type(gainslift_bins, None, int) + self._parms["gainslift_bins"] = gainslift_bins + def metalearner(self): """Print the metalearner of an H2OStackedEnsembleEstimator. diff --git a/h2o-py/tests/testdir_misc/pyunit_gainslift_bins.py b/h2o-py/tests/testdir_misc/pyunit_gainslift_bins.py new file mode 100644 index 000000000000..02d3a57a0670 --- /dev/null +++ b/h2o-py/tests/testdir_misc/pyunit_gainslift_bins.py @@ -0,0 +1,138 @@ +import sys + +sys.path.insert(1, "../../") +import h2o +import pandas as pd +from tests import pyunit_utils +from h2o.estimators import * + +eps = 1e-10 + + +def fast_estimator(estimator, **kwargs): + additional_args = dict(seed=123456) + if estimator in (H2ORandomForestEstimator, H2OGradientBoostingEstimator, H2OXGBoostEstimator): + additional_args["ntrees"] = 5 + if estimator == H2ODeepLearningEstimator: + additional_args["hidden"] = [5] + if estimator == H2OGeneralizedAdditiveEstimator: + additional_args["gam_columns"] = ["age"] + + return estimator(**additional_args, **kwargs) + + +def ks_score(mod, data, y): + from scipy.stats import ks_2samp + + df = pd.DataFrame() + df["label"] = data[y].as_data_frame().iloc[:, 0] + df["probs"] = mod.predict(data)["p1"].as_data_frame().iloc[:, 0] + + label_0 = df[df["label"] == 0] + label_1 = df[df["label"] == 1] + + ks = ks_2samp(label_0["probs"], label_1["probs"]) + + return ks.statistic + + +def get_ks(model, data): + """ + This is needed for getting the KS metric for the data. + + Using model.kolmogorov_smirnov() would work for most models for training data but not for + DRF which reports OOB stats... + """ + perf = model.model_performance(data) + return max(perf.gains_lift()["kolmogorov_smirnov"]) + + +def assert_eq(a, b): + if abs(a - b) >= eps: + print("Expected: {}, Actual: {}, diff: {}".format(b, a, a - b)) + return abs(a - b) < eps + + +def assert_not_eq(a, b): + if abs(a - b) <= eps: + print("Expected: {}, Actual: {}, diff: {}".format(b, a, a - b)) + return abs(a - b) > eps + + +def test_helper(Estimator): + df = h2o.import_file(pyunit_utils.locate("smalldata/gbm_test/titanic.csv")) + y = "survived" + df[y] = df[y].asfactor() + + mod_default = fast_estimator(Estimator) + mod_default.train(y=y, training_frame=df) + + assert_not_eq(get_ks(mod_default, df), ks_score(mod_default, df, y)) # default histogram is not precise enough + + mod_glbins = fast_estimator(Estimator, gainslift_bins=df.nrow) + mod_glbins.train(y=y, training_frame=df) + + assert_eq(get_ks(mod_glbins, df), ks_score(mod_glbins, df, y)) # should result in precise statistics + + +def test_deeplearning(): + test_helper(H2ODeepLearningEstimator) + + +def test_drf(): + test_helper(H2ORandomForestEstimator) + + +def test_gam(): + test_helper(H2OGeneralizedAdditiveEstimator) + + +def test_gbm(): + test_helper(H2OGradientBoostingEstimator) + + +def test_glm(): + test_helper(H2OGeneralizedLinearEstimator) + + +def test_xgboost(): + test_helper(H2OXGBoostEstimator) + + +def test_stacked_ensemble(): + df = h2o.import_file(pyunit_utils.locate("smalldata/gbm_test/titanic.csv")) + y = "survived" + df[y] = df[y].asfactor() + + base_kwargs = dict(nfolds=3, keep_cross_validation_predictions=True) + + base_models = [ + fast_estimator(H2ORandomForestEstimator, **base_kwargs), + fast_estimator(H2OGradientBoostingEstimator, **base_kwargs), + fast_estimator(H2ODeepLearningEstimator, **base_kwargs), + fast_estimator(H2OGeneralizedLinearEstimator, **base_kwargs), + ] + + for est in base_models: + est.train(y=y, training_frame=df) + + mod_default = fast_estimator(H2OStackedEnsembleEstimator, base_models=base_models) + mod_default.train(y=y, training_frame=df) + + assert_not_eq(get_ks(mod_default, df), ks_score(mod_default, df, y)) # default histogram is not precise enough + + mod_glbins = fast_estimator(H2OStackedEnsembleEstimator, base_models=base_models, gainslift_bins=df.nrow) + mod_glbins.train(y=y, training_frame=df) + + assert_eq(get_ks(mod_glbins, df), ks_score(mod_glbins, df, y)) # should result in precise statistics + + +pyunit_utils.run_tests([ + test_deeplearning, + test_drf, + test_gam, + test_gbm, + test_glm, + test_xgboost, + test_stacked_ensemble, +]) diff --git a/h2o-r/h2o-package/R/deeplearning.R b/h2o-r/h2o-package/R/deeplearning.R index b958d9a3f028..1b6ed6c3aad8 100644 --- a/h2o-r/h2o-package/R/deeplearning.R +++ b/h2o-r/h2o-package/R/deeplearning.R @@ -141,6 +141,7 @@ #' @param auc_type Set default multinomial AUC type. Must be one of: "AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", #' "WEIGHTED_OVO". Defaults to AUTO. #' @param custom_metric_func Reference to custom evaluation function, format: `language:keyName=funcName` +#' @param gainslift_bins Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. Defaults to -1. #' @param verbose \code{Logical}. Print scoring history to the console (Metrics per epoch). Defaults to FALSE. #' @seealso \code{\link{predict.H2OModel}} for prediction #' @examples @@ -243,6 +244,7 @@ h2o.deeplearning <- function(x, export_checkpoints_dir = NULL, auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"), custom_metric_func = NULL, + gainslift_bins = -1, verbose = FALSE) { # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object @@ -446,6 +448,8 @@ h2o.deeplearning <- function(x, parms$auc_type <- auc_type if (!missing(custom_metric_func)) parms$custom_metric_func <- custom_metric_func + if (!missing(gainslift_bins)) + parms$gainslift_bins <- gainslift_bins # Error check and build model model <- .h2o.modelJob('deeplearning', parms, h2oRestApiVersion=3, verbose=verbose) @@ -539,6 +543,7 @@ h2o.deeplearning <- function(x, export_checkpoints_dir = NULL, auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"), custom_metric_func = NULL, + gainslift_bins = -1, segment_columns = NULL, segment_models_id = NULL, parallelism = 1) @@ -746,6 +751,8 @@ h2o.deeplearning <- function(x, parms$auc_type <- auc_type if (!missing(custom_metric_func)) parms$custom_metric_func <- custom_metric_func + if (!missing(gainslift_bins)) + parms$gainslift_bins <- gainslift_bins # Build segment-models specific parameters segment_parms <- list() diff --git a/h2o-r/h2o-package/R/gam.R b/h2o-r/h2o-package/R/gam.R index b4fd3611a608..2e620b24e7a0 100644 --- a/h2o-r/h2o-package/R/gam.R +++ b/h2o-r/h2o-package/R/gam.R @@ -140,6 +140,7 @@ #' knots_for_gam. Default to false. Defaults to FALSE. #' @param auc_type Set default multinomial AUC type. Must be one of: "AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", #' "WEIGHTED_OVO". Defaults to AUTO. +#' @param gainslift_bins Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. Defaults to -1. #' @examples #' \dontrun{ #' h2o.init() @@ -220,7 +221,8 @@ h2o.gam <- function(x, scale = NULL, keep_gam_cols = FALSE, store_knot_locations = FALSE, - auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO")) + auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"), + gainslift_bins = -1) { # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object training_frame <- .validate.H2OFrame(training_frame, required=TRUE) @@ -382,6 +384,8 @@ h2o.gam <- function(x, parms$store_knot_locations <- store_knot_locations if (!missing(auc_type)) parms$auc_type <- auc_type + if (!missing(gainslift_bins)) + parms$gainslift_bins <- gainslift_bins if( !missing(interactions) ) { # interactions are column names => as-is @@ -477,6 +481,7 @@ h2o.gam <- function(x, keep_gam_cols = FALSE, store_knot_locations = FALSE, auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"), + gainslift_bins = -1, segment_columns = NULL, segment_models_id = NULL, parallelism = 1) @@ -643,6 +648,8 @@ h2o.gam <- function(x, parms$store_knot_locations <- store_knot_locations if (!missing(auc_type)) parms$auc_type <- auc_type + if (!missing(gainslift_bins)) + parms$gainslift_bins <- gainslift_bins if( !missing(interactions) ) { # interactions are column names => as-is diff --git a/h2o-r/h2o-package/R/glm.R b/h2o-r/h2o-package/R/glm.R index 7f8091a8e61a..002dbf0aebd7 100644 --- a/h2o-r/h2o-package/R/glm.R +++ b/h2o-r/h2o-package/R/glm.R @@ -154,6 +154,7 @@ #' Defaults to 0.5. #' @param influence If set to dfbetas will calculate the difference in beta when a datarow is included and excluded in the #' dataset. Must be one of: "dfbetas". +#' @param gainslift_bins Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. Defaults to -1. #' @return A subclass of \code{\linkS4class{H2OModel}} is returned. The specific subclass depends on the machine #' learning task at hand (if it's binomial classification, then an \code{\linkS4class{H2OBinomialModel}} is #' returned, if it's regression then a \code{\linkS4class{H2ORegressionModel}} is returned). The default print- @@ -274,7 +275,8 @@ h2o.glm <- function(x, generate_variable_inflation_factors = FALSE, fix_tweedie_variance_power = TRUE, dispersion_learning_rate = 0.5, - influence = c("dfbetas")) + influence = c("dfbetas"), + gainslift_bins = -1) { # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object training_frame <- .validate.H2OFrame(training_frame, required=TRUE) @@ -455,6 +457,8 @@ h2o.glm <- function(x, parms$dispersion_learning_rate <- dispersion_learning_rate if (!missing(influence)) parms$influence <- influence + if (!missing(gainslift_bins)) + parms$gainslift_bins <- gainslift_bins if( !missing(interactions) ) { # interactions are column names => as-is @@ -558,6 +562,7 @@ h2o.glm <- function(x, fix_tweedie_variance_power = TRUE, dispersion_learning_rate = 0.5, influence = c("dfbetas"), + gainslift_bins = -1, segment_columns = NULL, segment_models_id = NULL, parallelism = 1) @@ -743,6 +748,8 @@ h2o.glm <- function(x, parms$dispersion_learning_rate <- dispersion_learning_rate if (!missing(influence)) parms$influence <- influence + if (!missing(gainslift_bins)) + parms$gainslift_bins <- gainslift_bins if( !missing(interactions) ) { # interactions are column names => as-is diff --git a/h2o-r/h2o-package/R/stackedensemble.R b/h2o-r/h2o-package/R/stackedensemble.R index 30774938a8e8..85dabd16db04 100644 --- a/h2o-r/h2o-package/R/stackedensemble.R +++ b/h2o-r/h2o-package/R/stackedensemble.R @@ -52,6 +52,7 @@ #' @param export_checkpoints_dir Automatically export generated models to this directory. #' @param auc_type Set default multinomial AUC type. Must be one of: "AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", #' "WEIGHTED_OVO". Defaults to AUTO. +#' @param gainslift_bins Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. Defaults to -1. #' @examples #' \dontrun{ #' library(h2o) @@ -125,7 +126,8 @@ h2o.stackedEnsemble <- function(x, score_training_samples = 10000, keep_levelone_frame = FALSE, export_checkpoints_dir = NULL, - auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO")) + auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"), + gainslift_bins = -1) { # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object training_frame <- .validate.H2OFrame(training_frame, required=is.null(blending_frame)) @@ -201,6 +203,8 @@ h2o.stackedEnsemble <- function(x, parms$export_checkpoints_dir <- export_checkpoints_dir if (!missing(auc_type)) parms$auc_type <- auc_type + if (!missing(gainslift_bins)) + parms$gainslift_bins <- gainslift_bins if (!missing(metalearner_params)) parms$metalearner_params <- as.character(toJSON(metalearner_params, pretty = TRUE)) @@ -235,6 +239,7 @@ h2o.stackedEnsemble <- function(x, keep_levelone_frame = FALSE, export_checkpoints_dir = NULL, auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"), + gainslift_bins = -1, segment_columns = NULL, segment_models_id = NULL, parallelism = 1) @@ -315,6 +320,8 @@ h2o.stackedEnsemble <- function(x, parms$export_checkpoints_dir <- export_checkpoints_dir if (!missing(auc_type)) parms$auc_type <- auc_type + if (!missing(gainslift_bins)) + parms$gainslift_bins <- gainslift_bins if (!missing(metalearner_params)) parms$metalearner_params <- as.character(toJSON(metalearner_params, pretty = TRUE))