Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-15779 add DeepLearning to weak_learners #15900

Merged
merged 1 commit into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions h2o-algos/src/main/java/hex/adaboost/AdaBoost.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import hex.Model;
import hex.ModelBuilder;
import hex.ModelCategory;
import hex.deeplearning.DeepLearning;
import hex.deeplearning.DeepLearningModel;
import hex.glm.GLM;
import hex.glm.GLMModel;
import hex.tree.drf.DRF;
Expand Down Expand Up @@ -169,6 +171,8 @@ private ModelBuilder chooseWeakLearner(Frame frame) {
return getGLMWeakLearner(frame);
case GBM:
return getGBMWeakLearner(frame);
case DEEP_LEARNING:
return getDeepLearningWeakLearner(frame);
default:
case DRF:
return getDRFWeakLearner(frame);
Expand Down Expand Up @@ -212,6 +216,17 @@ private GBM getGBMWeakLearner(Frame frame) {
return new GBM(parms);
}

private DeepLearning getDeepLearningWeakLearner(Frame frame) {
DeepLearningModel.DeepLearningParameters parms = new DeepLearningModel.DeepLearningParameters();
parms._train = frame._key;
parms._response_column = _parms._response_column;
parms._weights_column = _weightsName;
parms._seed = _parms._seed;
parms._epochs = 10;
parms._hidden = new int[]{2};
mn-mikke marked this conversation as resolved.
Show resolved Hide resolved
return new DeepLearning(parms);
}

public TwoDimTable createModelSummaryTable() {
List<String> colHeaders = new ArrayList<>();
List<String> colTypes = new ArrayList<>();
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/adaboost/AdaBoostModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
public class AdaBoostModel extends Model<AdaBoostModel, AdaBoostModel.AdaBoostParameters, AdaBoostModel.AdaBoostOutput> {
private static final Logger LOG = Logger.getLogger(AdaBoostModel.class);

public enum Algorithm {DRF, GLM, GBM, AUTO}
public enum Algorithm {DRF, GLM, GBM, DEEP_LEARNING,AUTO}
valenad1 marked this conversation as resolved.
Show resolved Hide resolved

public AdaBoostModel(Key<AdaBoostModel> selfKey, AdaBoostParameters parms,
AdaBoostOutput output) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2361,6 +2361,13 @@ public boolean isDistributionHuber() {
}
}

@Override
public double score(double[] data) {
double[] pred = score0(data, new double[_output.nclasses() + 1], 0);
score0PostProcessSupervised(pred, data);
return pred[0];
}


}

2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/schemas/AdaBoostV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public static final class AdaBoostParametersV3 extends ModelParametersSchemaV3<A
@API(help = "Number of AdaBoost weak learners.", gridable = true)
public int nlearners;

@API(help = "Choose a weak learner type. Defaults to AUTO, which means DRF.", gridable = true, values = {"AUTO", "DRF", "GLM", "GBM"})
@API(help = "Choose a weak learner type. Defaults to AUTO, which means DRF.", gridable = true, values = {"AUTO", "DRF", "GLM", "GBM", "DEEP_LEARNING"})
public AdaBoostModel.Algorithm weak_learner;

@API(help="Learning rate (from 0.0 to 1.0)", gridable = true)
Expand Down
50 changes: 50 additions & 0 deletions h2o-algos/src/test/java/hex/adaboost/AdaBoostTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,30 @@ public void testBasicTrainGLM() {
}
}

@Test
public void testBasicTrainDeepLearning() {
try {
Scope.enter();
Frame train = parseTestFile("smalldata/prostate/prostate.csv");
Scope.track(train);
String response = "CAPSULE";
train.toCategoricalCol(response);
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._nlearners = 20;
p._train = train._key;
p._seed = 0xDECAF;
p._weak_learner = AdaBoostModel.Algorithm.DEEP_LEARNING;
p._response_column = response;

AdaBoost adaBoost = new AdaBoost(p);
AdaBoostModel adaBoostModel = adaBoost.trainModel().get();
Scope.track_generic(adaBoostModel);
assertNotNull(adaBoostModel);
} finally {
Scope.exit();
}
}

@Test
public void testBasicTrainLarge() {
try {
Expand Down Expand Up @@ -488,4 +512,30 @@ public void testBasicTrainAndScoreGBM() {
Scope.exit();
}
}

@Test
public void testBasicTrainAndScoreDeepLearning() {
try {
Scope.enter();
Frame train = Scope.track(parseTestFile("smalldata/prostate/prostate.csv"));
String response = "CAPSULE";
train.toCategoricalCol(response);
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._nlearners = 50;
p._weak_learner = AdaBoostModel.Algorithm.DEEP_LEARNING;
p._response_column = response;

AdaBoost adaBoost = new AdaBoost(p);
AdaBoostModel adaBoostModel = adaBoost.trainModel().get();
Scope.track_generic(adaBoostModel);
assertNotNull(adaBoostModel);

Frame score = adaBoostModel.score(train);
Scope.track(score);
} finally {
Scope.exit();
}
}
}
8 changes: 4 additions & 4 deletions h2o-py/h2o/estimators/adaboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self,
categorical_encoding="auto", # type: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"]
weights_column=None, # type: Optional[str]
nlearners=50, # type: int
weak_learner="auto", # type: Literal["auto", "drf", "glm", "gbm"]
weak_learner="auto", # type: Literal["auto", "drf", "glm", "gbm", "deep_learning"]
learn_rate=0.5, # type: float
seed=-1, # type: int
):
Expand Down Expand Up @@ -64,7 +64,7 @@ def __init__(self,
:type nlearners: int
:param weak_learner: Choose a weak learner type. Defaults to AUTO, which means DRF.
Defaults to ``"auto"``.
:type weak_learner: Literal["auto", "drf", "glm", "gbm"]
:type weak_learner: Literal["auto", "drf", "glm", "gbm", "deep_learning"]
:param learn_rate: Learning rate (from 0.0 to 1.0)
Defaults to ``0.5``.
:type learn_rate: float
Expand Down Expand Up @@ -180,13 +180,13 @@ def weak_learner(self):
"""
Choose a weak learner type. Defaults to AUTO, which means DRF.

Type: ``Literal["auto", "drf", "glm", "gbm"]``, defaults to ``"auto"``.
Type: ``Literal["auto", "drf", "glm", "gbm", "deep_learning"]``, defaults to ``"auto"``.
"""
return self._parms.get("weak_learner")

@weak_learner.setter
def weak_learner(self, weak_learner):
assert_is_type(weak_learner, None, Enum("auto", "drf", "glm", "gbm"))
assert_is_type(weak_learner, None, Enum("auto", "drf", "glm", "gbm", "deep_learning"))
self._parms["weak_learner"] = weak_learner

@property
Expand Down
8 changes: 4 additions & 4 deletions h2o-r/h2o-package/R/adaboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#' you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get
#' an accurate prediction, remove all rows with weight == 0.
#' @param nlearners Number of AdaBoost weak learners. Defaults to 50.
#' @param weak_learner Choose a weak learner type. Defaults to AUTO, which means DRF. Must be one of: "AUTO", "DRF", "GLM", "GBM".
#' Defaults to AUTO.
#' @param weak_learner Choose a weak learner type. Defaults to AUTO, which means DRF. Must be one of: "AUTO", "DRF", "GLM", "GBM",
#' "DEEP_LEARNING". Defaults to AUTO.
#' @param learn_rate Learning rate (from 0.0 to 1.0) Defaults to 0.5.
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
#' Defaults to -1 (time-based random number).
Expand Down Expand Up @@ -58,7 +58,7 @@ h2o.adaBoost <- function(x,
categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
weights_column = NULL,
nlearners = 50,
weak_learner = c("AUTO", "DRF", "GLM", "GBM"),
weak_learner = c("AUTO", "DRF", "GLM", "GBM", "DEEP_LEARNING"),
learn_rate = 0.5,
seed = -1)
{
Expand Down Expand Up @@ -110,7 +110,7 @@ h2o.adaBoost <- function(x,
categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
weights_column = NULL,
nlearners = 50,
weak_learner = c("AUTO", "DRF", "GLM", "GBM"),
weak_learner = c("AUTO", "DRF", "GLM", "GBM", "DEEP_LEARNING"),
learn_rate = 0.5,
seed = -1,
segment_columns = NULL,
Expand Down