Skip to content

Commit

Permalink
GH-15780 - add weak_learner_parameters to API
Browse files Browse the repository at this point in the history
  • Loading branch information
valenad1 committed Nov 21, 2023
1 parent 9712ad8 commit 73003e8
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 4 deletions.
4 changes: 4 additions & 0 deletions h2o-algos/src/main/java/hex/schemas/AdaBoostV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public static final class AdaBoostParametersV3 extends ModelParametersSchemaV3<A
"nlearners",
"weak_learner",
"learn_rate",
"weak_learner_params",
"seed",
};

Expand All @@ -35,6 +36,9 @@ public static final class AdaBoostParametersV3 extends ModelParametersSchemaV3<A
@API(help="Learning rate (from 0.0 to 1.0)", gridable = true)
public double learn_rate;

@API(help = "Customized parameters for the weak_learner algorithm.", gridable=true)
public String weak_learner_params;

@API(help = "Seed for pseudo random number generator (if applicable)", gridable = true)
public long seed;
}
Expand Down
4 changes: 3 additions & 1 deletion h2o-algos/src/test/java/hex/adaboost/AdaBoostTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import hex.deeplearning.DeepLearningModel;
import hex.genmodel.algos.tree.SharedTreeSubgraph;
import hex.glm.GLMModel;
import hex.tree.SharedTreeModel;
import hex.tree.drf.DRFModel;
import hex.tree.gbm.GBMModel;
import org.junit.Before;
Expand Down Expand Up @@ -629,7 +630,7 @@ public void testTrainWithCustomWeakLearnersGBM() {
p._nlearners = nlearners;
p._weak_learner = AdaBoostModel.Algorithm.GBM;
p._response_column = response;
p._weak_learner_params = "{ntrees:3}";
p._weak_learner_params = "{ntrees:3, 'histogram_type': 'UniformAdaptive'}";

AdaBoost adaBoost = new AdaBoost(p);
AdaBoostModel adaBoostModel = adaBoost.trainModel().get();
Expand All @@ -640,6 +641,7 @@ public void testTrainWithCustomWeakLearnersGBM() {
System.out.println("GBM model = " + i);
GBMModel gbmModel = DKV.getGet(adaBoostModel._output.models[i]);
assertEquals(3, gbmModel._output._ntrees);
assertEquals(SharedTreeModel.SharedTreeParameters.HistogramType.UniformAdaptive, gbmModel._parms._histogram_type);
}
Frame score = adaBoostModel.score(train);
Scope.track(score);
Expand Down
13 changes: 11 additions & 2 deletions h2o-bindings/bin/custom/R/gen_adaboost.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
def update_param(name, param):
if name == 'weak_learner_params':
param['default_value'] = None
return param
return None # param untouched

extensions = dict(
skip_default_set_params_for=['training_frame', 'ignored_columns', 'response_column',
'max_confusion_matrix_size', 'distribution', 'offset_column'],
'max_confusion_matrix_size', 'distribution', 'offset_column', 'weak_learner_params'],
set_required_params="""
parms$training_frame <- training_frame
args <- .verify_dataxy(training_frame, x, y)
parms$ignored_columns <- args$x_ignore
parms$response_column <- args$y
""",
set_params="""
if (!missing(weak_learner_params))
parms$weak_learner_params <- as.character(toJSON(weak_learner_params, pretty = TRUE, auto_unbox = TRUE))
"""
)


doc = dict(
preamble="""
Build an AdaBoost model
Expand Down
37 changes: 36 additions & 1 deletion h2o-bindings/bin/custom/python/gen_adaboost.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,43 @@
options = dict(
def update_param(name, param):
if name == 'weak_learner_params':
param['type'] = 'KeyValue'
param['default_value'] = None
return param
return None # param untouched

extensions = dict(
__imports__="""
import ast
import json
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric
""",
)

doc = dict(
__class__="""
Builds an AdaBoost model
"""
)

overrides = dict(
weak_learner_params=dict(
getter="""
if self._parms.get("{sname}") != None:
return json.loads(self._parms.get("{sname}"))
else:
self._parms["{sname}"] = None
""",
setter="""
assert_is_type({pname}, None, {ptype})
if {pname} is not None and {pname} != "":
for k in {pname}:
weak_learner_params[k] = weak_learner_params[k]
self._parms["{sname}"] = str(json.dumps({pname}))
else:
self._parms["{sname}"] = None
"""
)
)
33 changes: 33 additions & 0 deletions h2o-py/h2o/estimators/adaboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
# Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details)
#

import ast
import json
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
Expand All @@ -31,6 +37,7 @@ def __init__(self,
nlearners=50, # type: int
weak_learner="auto", # type: Literal["auto", "drf", "glm", "gbm", "deep_learning"]
learn_rate=0.5, # type: float
weak_learner_params=None, # type: Optional[dict]
seed=-1, # type: int
):
"""
Expand Down Expand Up @@ -68,6 +75,9 @@ def __init__(self,
:param learn_rate: Learning rate (from 0.0 to 1.0)
Defaults to ``0.5``.
:type learn_rate: float
:param weak_learner_params: Customized parameters for the weak_learner algorithm.
Defaults to ``None``.
:type weak_learner_params: dict, optional
:param seed: Seed for pseudo random number generator (if applicable)
Defaults to ``-1``.
:type seed: int
Expand All @@ -83,6 +93,7 @@ def __init__(self,
self.nlearners = nlearners
self.weak_learner = weak_learner
self.learn_rate = learn_rate
self.weak_learner_params = weak_learner_params
self.seed = seed

@property
Expand Down Expand Up @@ -203,6 +214,28 @@ def learn_rate(self, learn_rate):
assert_is_type(learn_rate, None, numeric)
self._parms["learn_rate"] = learn_rate

@property
def weak_learner_params(self):
"""
Customized parameters for the weak_learner algorithm.
Type: ``dict``.
"""
if self._parms.get("weak_learner_params") != None:
return json.loads(self._parms.get("weak_learner_params"))
else:
self._parms["weak_learner_params"] = None

@weak_learner_params.setter
def weak_learner_params(self, weak_learner_params):
assert_is_type(weak_learner_params, None, dict)
if weak_learner_params is not None and weak_learner_params != "":
for k in weak_learner_params:
weak_learner_params[k] = weak_learner_params[k]
self._parms["weak_learner_params"] = str(json.dumps(weak_learner_params))
else:
self._parms["weak_learner_params"] = None

@property
def seed(self):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import sys, os
sys.path.insert(1, os.path.join("..","..",".."))
import h2o
from tests import pyunit_utils
from h2o.estimators import H2OAdaBoostEstimator


def adaboost():
print("AdaBoost Weak Learner Params Smoke Test - test only that parameters are correctly passed to backend")

train = h2o.import_file(pyunit_utils.locate("smalldata/prostate/prostate.csv"))
train["CAPSULE"] = train["CAPSULE"].asfactor()

common_adaboost_def = {"nlearners": 10, "seed": 0xBEEF, "learn_rate": 0.6}
common_adaboost_train = {"training_frame": train, "y": "CAPSULE"}

adaboost_model = H2OAdaBoostEstimator(
weak_learner="DRF",
weak_learner_params={
'ntrees': 10,
'histogram_type': "UniformAdaptive"
},
**common_adaboost_def
)
assert isinstance(adaboost_model.weak_learner_params, dict)
adaboost_model.train(**common_adaboost_train)
assert adaboost_model._model_json is not None

adaboost_model = H2OAdaBoostEstimator(
weak_learner="GBM",
weak_learner_params={
'ntrees': 10,
'histogram_type': "UniformAdaptive",
"learn_rate": 0.1
},
**common_adaboost_def
)
assert isinstance(adaboost_model.weak_learner_params, dict)
adaboost_model.train(**common_adaboost_train)
assert adaboost_model._model_json is not None

adaboost_model = H2OAdaBoostEstimator(
weak_learner="GLM",
weak_learner_params={
'max_iterations': 10
},
**common_adaboost_def
)
assert isinstance(adaboost_model.weak_learner_params, dict)
adaboost_model.train(**common_adaboost_train)
assert adaboost_model._model_json is not None

adaboost_model = H2OAdaBoostEstimator(
weak_learner="DEEP_LEARNING",
weak_learner_params={
'nepochs': 10,
'hidden': [2, 2, 4]
},
**common_adaboost_def
)
assert isinstance(adaboost_model.weak_learner_params, dict)
adaboost_model.train(**common_adaboost_train)
assert adaboost_model._model_json is not None


if __name__ == "__main__":
pyunit_utils.standalone_test(adaboost)
else:
adaboost()
9 changes: 9 additions & 0 deletions h2o-r/h2o-package/R/adaboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#' @param weak_learner Choose a weak learner type. Defaults to AUTO, which means DRF. Must be one of: "AUTO", "DRF", "GLM", "GBM",
#' "DEEP_LEARNING". Defaults to AUTO.
#' @param learn_rate Learning rate (from 0.0 to 1.0) Defaults to 0.5.
#' @param weak_learner_params Customized parameters for the weak_learner algorithm.
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
#' Defaults to -1 (time-based random number).
#' @return Creates a \linkS4class{H2OModel} object of the right type.
Expand Down Expand Up @@ -60,6 +61,7 @@ h2o.adaBoost <- function(x,
nlearners = 50,
weak_learner = c("AUTO", "DRF", "GLM", "GBM", "DEEP_LEARNING"),
learn_rate = 0.5,
weak_learner_params = NULL,
seed = -1)
{
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
Expand Down Expand Up @@ -99,6 +101,9 @@ h2o.adaBoost <- function(x,
if (!missing(seed))
parms$seed <- seed

if (!missing(weak_learner_params))
parms$weak_learner_params <- as.character(toJSON(weak_learner_params, pretty = TRUE, auto_unbox = TRUE))

# Error check and build model
model <- .h2o.modelJob('adaboost', parms, h2oRestApiVersion=3, verbose=FALSE)
return(model)
Expand All @@ -112,6 +117,7 @@ h2o.adaBoost <- function(x,
nlearners = 50,
weak_learner = c("AUTO", "DRF", "GLM", "GBM", "DEEP_LEARNING"),
learn_rate = 0.5,
weak_learner_params = NULL,
seed = -1,
segment_columns = NULL,
segment_models_id = NULL,
Expand Down Expand Up @@ -156,6 +162,9 @@ h2o.adaBoost <- function(x,
if (!missing(seed))
parms$seed <- seed

if (!missing(weak_learner_params))
parms$weak_learner_params <- as.character(toJSON(weak_learner_params, pretty = TRUE, auto_unbox = TRUE))

# Build segment-models specific parameters
segment_parms <- list()
if (!missing(segment_columns))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../../scripts/h2o-r-test-setup.R")


test.adaBoost.smoke <- function() {
f <- "https://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv"
data <- h2o.importFile(f)

# Set predictors and response; set response as a factor
data["CAPSULE"] <- as.factor(data["CAPSULE"])
predictors <- c("AGE","RACE","DPROS","DCAPS","PSA","VOL","GLEASON")
response <- "CAPSULE"

h2o_adaboost <- h2o.adaBoost(nlearners = 5, x = predictors, y = response, training_frame = data, seed = 1234, weak_learner = "DRF", weak_learner_params = list(ntrees=3, max_depth=2, histogram_type="UniformAdaptive"))
expect_equal(is.null(h2o_adaboost), FALSE)
h2o_adaboost <- h2o.adaBoost(nlearners = 5, x = predictors, y = response, training_frame = data, seed = 1234, weak_learner = "GBM", weak_learner_params = list(ntrees=3, max_depth=2, histogram_type="UniformAdaptive"))
expect_equal(is.null(h2o_adaboost), FALSE)
h2o_adaboost <- h2o.adaBoost(nlearners = 5, x = predictors, y = response, training_frame = data, seed = 1234, weak_learner = "GLM", weak_learner_params = list(max_iterations=3))
expect_equal(is.null(h2o_adaboost), FALSE)
h2o_adaboost <- h2o.adaBoost(nlearners = 5, x = predictors, y = response, training_frame = data, seed = 1234, weak_learner = "DEEP_LEARNING", weak_learner_params = list(nepochs=3, hidden=list(2,1,2)))
expect_equal(is.null(h2o_adaboost), FALSE)
}

doTest("adaBoost: Smoke Test For Weak Learner Params - only that is pass through the API", test.adaBoost.smoke)

0 comments on commit 73003e8

Please sign in to comment.