Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-15780 set weak learner parameter API #15912

Merged
merged 2 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions h2o-algos/src/main/java/hex/schemas/AdaBoostV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public static final class AdaBoostParametersV3 extends ModelParametersSchemaV3<A
"nlearners",
"weak_learner",
"learn_rate",
"weak_learner_params",
"seed",
};

Expand All @@ -35,6 +36,9 @@ public static final class AdaBoostParametersV3 extends ModelParametersSchemaV3<A
@API(help="Learning rate (from 0.0 to 1.0)", gridable = true)
public double learn_rate;

@API(help = "Customized parameters for the weak_learner algorithm.", gridable=true)
public String weak_learner_params;

@API(help = "Seed for pseudo random number generator (if applicable)", gridable = true)
public long seed;
}
Expand Down
4 changes: 3 additions & 1 deletion h2o-algos/src/test/java/hex/adaboost/AdaBoostTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import hex.deeplearning.DeepLearningModel;
import hex.genmodel.algos.tree.SharedTreeSubgraph;
import hex.glm.GLMModel;
import hex.tree.SharedTreeModel;
import hex.tree.drf.DRFModel;
import hex.tree.gbm.GBMModel;
import org.junit.Before;
Expand Down Expand Up @@ -629,7 +630,7 @@ public void testTrainWithCustomWeakLearnersGBM() {
p._nlearners = nlearners;
p._weak_learner = AdaBoostModel.Algorithm.GBM;
p._response_column = response;
p._weak_learner_params = "{ntrees:3}";
p._weak_learner_params = "{ntrees:3, 'histogram_type': 'UniformAdaptive'}";

AdaBoost adaBoost = new AdaBoost(p);
AdaBoostModel adaBoostModel = adaBoost.trainModel().get();
Expand All @@ -640,6 +641,7 @@ public void testTrainWithCustomWeakLearnersGBM() {
System.out.println("GBM model = " + i);
GBMModel gbmModel = DKV.getGet(adaBoostModel._output.models[i]);
assertEquals(3, gbmModel._output._ntrees);
assertEquals(SharedTreeModel.SharedTreeParameters.HistogramType.UniformAdaptive, gbmModel._parms._histogram_type);
}
Frame score = adaBoostModel.score(train);
Scope.track(score);
Expand Down
16 changes: 14 additions & 2 deletions h2o-bindings/bin/custom/R/gen_adaboost.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,33 @@
def update_param(name, param):
if name == 'weak_learner_params':
param['default_value'] = None
return param
return None # param untouched

extensions = dict(
skip_default_set_params_for=['training_frame', 'ignored_columns', 'response_column',
'max_confusion_matrix_size', 'distribution', 'offset_column'],
'max_confusion_matrix_size', 'distribution', 'offset_column', 'weak_learner_params'],
set_required_params="""
parms$training_frame <- training_frame
args <- .verify_dataxy(training_frame, x, y)
parms$ignored_columns <- args$x_ignore
parms$response_column <- args$y
""",
set_params="""
if (!missing(weak_learner_params))
parms$weak_learner_params <- as.character(toJSON(weak_learner_params, pretty = TRUE, auto_unbox = TRUE))
"""
)


doc = dict(
preamble="""
Build an AdaBoost model

Builds an AdaBoost model on an H2OFrame.
""",
params=dict(
weak_learner_params="Customized parameters for the weak_learner algorithm. E.g list(ntrees=3, max_depth=2, histogram_type='UniformAdaptive'))",
),
returns="""
Creates a \linkS4class{H2OModel} object of the right type.
""",
Expand Down
53 changes: 52 additions & 1 deletion h2o-bindings/bin/custom/python/gen_adaboost.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,59 @@
options = dict(
def update_param(name, param):
if name == 'weak_learner_params':
param['type'] = 'KeyValue'
param['default_value'] = None
return param
return None # param untouched

extensions = dict(
__imports__="""
import ast
import json
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric
""",
)

doc = dict(
__class__="""
Builds an AdaBoost model
"""
)

overrides = dict(
weak_learner_params=dict(
getter="""
if self._parms.get("{sname}") != None:
return json.loads(self._parms.get("{sname}"))
else:
self._parms["{sname}"] = None
""",
setter="""
assert_is_type({pname}, None, {ptype})
if {pname} is not None and {pname} != "":
for k in {pname}:
weak_learner_params[k] = weak_learner_params[k]
self._parms["{sname}"] = str(json.dumps({pname}))
else:
self._parms["{sname}"] = None
"""
)
)

examples = dict(
weak_learner_params="""
>>> prostate_hex = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> prostate_hex["CAPSULE"] = prostate_hex["CAPSULE"].asfactor()
>>> response = "CAPSULE"
>>> seed = 42
>>> adaboost_model = H2OAdaBoostEstimator(seed=seed,
... weak_learner="DRF",
... weak_learner_params={'ntrees':1,'max_depth':3})
>>> adaboost_model.train(y=response,
... ignored_columns=["ID"],
... training_frame=prostate_hex)
>>> print(adaboost_model)
""",
)
47 changes: 47 additions & 0 deletions h2o-py/h2o/estimators/adaboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
# Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details)
#

import ast
import json
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
Expand All @@ -31,6 +37,7 @@ def __init__(self,
nlearners=50, # type: int
weak_learner="auto", # type: Literal["auto", "drf", "glm", "gbm", "deep_learning"]
learn_rate=0.5, # type: float
weak_learner_params=None, # type: Optional[dict]
seed=-1, # type: int
):
"""
Expand Down Expand Up @@ -68,6 +75,9 @@ def __init__(self,
:param learn_rate: Learning rate (from 0.0 to 1.0)
Defaults to ``0.5``.
:type learn_rate: float
:param weak_learner_params: Customized parameters for the weak_learner algorithm.
Defaults to ``None``.
:type weak_learner_params: dict, optional
:param seed: Seed for pseudo random number generator (if applicable)
Defaults to ``-1``.
:type seed: int
Expand All @@ -83,6 +93,7 @@ def __init__(self,
self.nlearners = nlearners
self.weak_learner = weak_learner
self.learn_rate = learn_rate
self.weak_learner_params = weak_learner_params
self.seed = seed

@property
Expand Down Expand Up @@ -203,6 +214,42 @@ def learn_rate(self, learn_rate):
assert_is_type(learn_rate, None, numeric)
self._parms["learn_rate"] = learn_rate

@property
def weak_learner_params(self):
valenad1 marked this conversation as resolved.
Show resolved Hide resolved
"""
Customized parameters for the weak_learner algorithm.

Type: ``dict``.

:examples:

>>> prostate_hex = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> prostate_hex["CAPSULE"] = prostate_hex["CAPSULE"].asfactor()
>>> response = "CAPSULE"
>>> seed = 42
>>> adaboost_model = H2OAdaBoostEstimator(seed=seed,
... weak_learner="DRF",
... weak_learner_params={'ntrees':1,'max_depth':3})
>>> adaboost_model.train(y=response,
... ignored_columns=["ID"],
... training_frame=prostate_hex)
>>> print(adaboost_model)
"""
if self._parms.get("weak_learner_params") != None:
return json.loads(self._parms.get("weak_learner_params"))
else:
self._parms["weak_learner_params"] = None

@weak_learner_params.setter
def weak_learner_params(self, weak_learner_params):
assert_is_type(weak_learner_params, None, dict)
if weak_learner_params is not None and weak_learner_params != "":
for k in weak_learner_params:
weak_learner_params[k] = weak_learner_params[k]
self._parms["weak_learner_params"] = str(json.dumps(weak_learner_params))
else:
self._parms["weak_learner_params"] = None

@property
def seed(self):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import sys, os
sys.path.insert(1, os.path.join("..","..",".."))
import h2o
from tests import pyunit_utils
from h2o.estimators import H2OAdaBoostEstimator


def adaboost():
print("AdaBoost Weak Learner Params Smoke Test - test only that parameters are correctly passed to backend")

train = h2o.import_file(pyunit_utils.locate("smalldata/prostate/prostate.csv"))
train["CAPSULE"] = train["CAPSULE"].asfactor()

common_adaboost_def = {"nlearners": 10, "seed": 0xBEEF, "learn_rate": 0.6}
common_adaboost_train = {"training_frame": train, "y": "CAPSULE"}

adaboost_model = H2OAdaBoostEstimator(
weak_learner="DRF",
weak_learner_params={
'ntrees': 10,
'histogram_type': "UniformAdaptive"
},
**common_adaboost_def
)
assert isinstance(adaboost_model.weak_learner_params, dict)
adaboost_model.train(**common_adaboost_train)
assert adaboost_model._model_json is not None

adaboost_model = H2OAdaBoostEstimator(
weak_learner="GBM",
weak_learner_params={
'ntrees': 10,
'histogram_type': "UniformAdaptive",
"learn_rate": 0.1
},
**common_adaboost_def
)
assert isinstance(adaboost_model.weak_learner_params, dict)
adaboost_model.train(**common_adaboost_train)
assert adaboost_model._model_json is not None

adaboost_model = H2OAdaBoostEstimator(
weak_learner="GLM",
weak_learner_params={
'max_iterations': 10
},
**common_adaboost_def
)
assert isinstance(adaboost_model.weak_learner_params, dict)
adaboost_model.train(**common_adaboost_train)
assert adaboost_model._model_json is not None

adaboost_model = H2OAdaBoostEstimator(
weak_learner="DEEP_LEARNING",
weak_learner_params={
'nepochs': 10,
'hidden': [2, 2, 4]
},
**common_adaboost_def
)
assert isinstance(adaboost_model.weak_learner_params, dict)
adaboost_model.train(**common_adaboost_train)
assert adaboost_model._model_json is not None


if __name__ == "__main__":
pyunit_utils.standalone_test(adaboost)
else:
adaboost()
9 changes: 9 additions & 0 deletions h2o-r/h2o-package/R/adaboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#' @param weak_learner Choose a weak learner type. Defaults to AUTO, which means DRF. Must be one of: "AUTO", "DRF", "GLM", "GBM",
#' "DEEP_LEARNING". Defaults to AUTO.
#' @param learn_rate Learning rate (from 0.0 to 1.0) Defaults to 0.5.
#' @param weak_learner_params Customized parameters for the weak_learner algorithm. E.g list(ntrees=3, max_depth=2, histogram_type='UniformAdaptive'))
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
#' Defaults to -1 (time-based random number).
#' @return Creates a \linkS4class{H2OModel} object of the right type.
Expand Down Expand Up @@ -60,6 +61,7 @@ h2o.adaBoost <- function(x,
nlearners = 50,
weak_learner = c("AUTO", "DRF", "GLM", "GBM", "DEEP_LEARNING"),
learn_rate = 0.5,
weak_learner_params = NULL,
seed = -1)
{
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
Expand Down Expand Up @@ -99,6 +101,9 @@ h2o.adaBoost <- function(x,
if (!missing(seed))
parms$seed <- seed

if (!missing(weak_learner_params))
parms$weak_learner_params <- as.character(toJSON(weak_learner_params, pretty = TRUE, auto_unbox = TRUE))

# Error check and build model
model <- .h2o.modelJob('adaboost', parms, h2oRestApiVersion=3, verbose=FALSE)
return(model)
Expand All @@ -112,6 +117,7 @@ h2o.adaBoost <- function(x,
nlearners = 50,
weak_learner = c("AUTO", "DRF", "GLM", "GBM", "DEEP_LEARNING"),
learn_rate = 0.5,
weak_learner_params = NULL,
seed = -1,
segment_columns = NULL,
segment_models_id = NULL,
Expand Down Expand Up @@ -156,6 +162,9 @@ h2o.adaBoost <- function(x,
if (!missing(seed))
parms$seed <- seed

if (!missing(weak_learner_params))
parms$weak_learner_params <- as.character(toJSON(weak_learner_params, pretty = TRUE, auto_unbox = TRUE))

# Build segment-models specific parameters
segment_parms <- list()
if (!missing(segment_columns))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../../scripts/h2o-r-test-setup.R")


test.adaBoost.smoke <- function() {
f <- "https://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv"
data <- h2o.importFile(f)

# Set predictors and response; set response as a factor
data["CAPSULE"] <- as.factor(data["CAPSULE"])
predictors <- c("AGE","RACE","DPROS","DCAPS","PSA","VOL","GLEASON")
response <- "CAPSULE"

h2o_adaboost <- h2o.adaBoost(nlearners = 5, x = predictors, y = response, training_frame = data, seed = 1234, weak_learner = "DRF", weak_learner_params = list(ntrees=3, max_depth=2, histogram_type="UniformAdaptive"))
expect_equal(is.null(h2o_adaboost), FALSE)
h2o_adaboost <- h2o.adaBoost(nlearners = 5, x = predictors, y = response, training_frame = data, seed = 1234, weak_learner = "GBM", weak_learner_params = list(ntrees=3, max_depth=2, histogram_type="UniformAdaptive"))
expect_equal(is.null(h2o_adaboost), FALSE)
h2o_adaboost <- h2o.adaBoost(nlearners = 5, x = predictors, y = response, training_frame = data, seed = 1234, weak_learner = "GLM", weak_learner_params = list(max_iterations=3))
expect_equal(is.null(h2o_adaboost), FALSE)
h2o_adaboost <- h2o.adaBoost(nlearners = 5, x = predictors, y = response, training_frame = data, seed = 1234, weak_learner = "DEEP_LEARNING", weak_learner_params = list(nepochs=3, hidden=list(2,1,2)))
expect_equal(is.null(h2o_adaboost), FALSE)
}

doTest("adaBoost: Smoke Test For Weak Learner Params - only that is pass through the API", test.adaBoost.smoke)