diff --git a/h2o-py/h2o/model/metrics/binomial.py b/h2o-py/h2o/model/metrics/binomial.py index 3c7a03665433..23130234a84b 100644 --- a/h2o-py/h2o/model/metrics/binomial.py +++ b/h2o-py/h2o/model/metrics/binomial.py @@ -952,3 +952,27 @@ def gains_lift_plot(self, type="both", server=False, save_plot_path=None, plot=T else: return decorate_plot_result(res=gl) + def thresholds_and_metric_scores(self): + """Retrieve the thresholds and metric scores table. + + :examples: + + >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator + >>> local_data = [[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'], + ... [1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'], + ... [0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'], + ... [0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'],[0, 'b']] + >>> h2o_data = h2o.H2OFrame(local_data) + >>> h2o_data.set_names(['response', 'predictor']) + >>> h2o_data["response"] = h2o_data["response"].asfactor() + >>> gbm = H2OGradientBoostingEstimator(ntrees=1, + ... distribution="bernoulli") + >>> gbm.train(x=list(range(1,h2o_data.ncol)), + ... y="response", + ... training_frame=h2o_data) + >>> perf = gbm.model_performance() + >>> perf + """ + if 'thresholds_and_metric_scores' in self._metric_json: + return self._metric_json['thresholds_and_metric_scores'] + return None diff --git a/h2o-py/h2o/model/models/binomial.py b/h2o-py/h2o/model/models/binomial.py index 6bb2a130ff5c..a2fa65894c7d 100644 --- a/h2o-py/h2o/model/models/binomial.py +++ b/h2o-py/h2o/model/models/binomial.py @@ -10,6 +10,43 @@ class H2OBinomialModel(ModelBase): + def thresholds_and_metric_scores(self, train=False, valid=False, xval=False): + """ + Get the all thresholds and metric scores in a table. + + If all are ``False`` (default), then return the training metric table. + If more than one option is set to ``True``, then return a dictionary of tables where + the keys are "train", "valid", and "xval". + + :param bool train: If ``True``, return the thresholds and metric scores table for the training data. + :param bool valid: If ``True``, return the thresholds and metric scores table value for the validation data. + :param bool xval: If ``True``, return the thresholds and metric scores table value for each of the cross-validated splits. + + :returns: The thresholds and metric scores tables for the specified key(s). + + :examples: + + >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") + >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() + >>> r = cars[0].runif() + >>> train = cars[r > .2] + >>> valid = cars[r <=.2] + >>> response_col = "economy_20mpg" + >>> distribution = "bernoulli" + >>> predictors = ["displacement", "power", "weight", "acceleration", "year"] + >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator + >>> gbm = H2OGradientBoostingEstimator(nfolds=3, + ... distribution=distribution, + ... fold_assignment="Random") + >>> gbm.train(y=response_col, + ... x=predictors, + ... validation_frame=valid, + ... training_frame=train) + >>> gbm.thresholds_and_metric_scores()# <- Default: return training metric table + >>> gbm.thresholds_and_metric_scores(train=True, valid=True, xval=True) + """ + return self._delegate_to_metrics('thresholds_and_metric_scores', train=train, valid=valid, xval=xval) + def F1(self, thresholds=None, train=False, valid=False, xval=False): """ Get the F1 value for a set of thresholds. diff --git a/h2o-py/tests/testdir_algos/gbm/pyunit_gbm_cars_thresholds.py b/h2o-py/tests/testdir_algos/gbm/pyunit_gbm_cars_thresholds.py new file mode 100644 index 000000000000..aa859c4608fe --- /dev/null +++ b/h2o-py/tests/testdir_algos/gbm/pyunit_gbm_cars_thresholds.py @@ -0,0 +1,26 @@ +from builtins import range +import sys +sys.path.insert(1,"../../../") +import h2o +from tests import pyunit_utils +from h2o.estimators.gbm import H2OGradientBoostingEstimator + + +def thresholds_gbm(): + prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv")) + prostate[1] = prostate[1].asfactor() + prostate.summary() + + prostate_gbm = H2OGradientBoostingEstimator(nfolds=5, distribution="bernoulli") + prostate_gbm.train(x=list(range(2, 9)), y=1, training_frame=prostate) + prostate_gbm.show() + + ths_model = prostate_gbm.thresholds_and_metric_scores() + ths_perf = prostate_gbm.model_performance(train=True).thresholds_and_metric_scores() + pyunit_utils.assert_H2OTwoDimTable_equal_upto(ths_model, ths_perf, ths_model.col_header) + + +if __name__ == "__main__": + pyunit_utils.standalone_test(thresholds_gbm) +else: + thresholds_gbm() diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R index f5b005f05191..a757d93a2ccf 100755 --- a/h2o-r/h2o-package/R/models.R +++ b/h2o-r/h2o-package/R/models.R @@ -1667,14 +1667,18 @@ h2o.auuc_table <- function(object, train=FALSE, valid=FALSE) { #' Retrieve the thresholds and metric scores table #' -#' Retrieves the thresholds and metric scores table from an \linkS4class{H2OBinomialUpliftMetrics}. -#' The table contains indices, thresholds, all cumulative uplift values and cumulative number of observations. +#' Retrieves the thresholds and metric scores table from a \linkS4class{H2OBinomialUpliftMetrics} +#' or a \linkS4class{H2OBinomialMetrics}. +#' +#' The table contains indices, thresholds, all cumulative uplift values and cumulative number of observations for +#' uplift binomial models or thresholds and maximal metric values for binomial models. #' If "train" and "valid" parameters are FALSE (default), then the training table is returned. If more #' than one parameter is set to TRUE, then a named vector of tables is returned, where the names are "train", "valid". #' -#' @param object An \linkS4class{H2OBinomialUpliftMetrics} +#' @param object A \linkS4class{H2OBinomialUpliftMetrics} or a \linkS4class{H2OBinomialMetrics} #' @param train Retrieve the training thresholds and metric scores table #' @param valid Retrieve the validation thresholds and metric scores table +#' @param xval Retrieve the cross-validation thresholds and metric scores table (only for \linkS4class{H2OBinomialMetrics}) #' @examples #' \dontrun{ #' library(h2o) @@ -1691,11 +1695,11 @@ h2o.auuc_table <- function(object, train=FALSE, valid=FALSE) { #' h2o.thresholds_and_metric_scores(perf) #' } #' @export -h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE) { +h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE, xval=FALSE) { if( is(object, "H2OModelMetrics") ) return( object@metrics$thresholds_and_metric_score) if( is(object, "H2OModel") ) { model.parts <- .model.parts(object) - if ( !train && !valid ) { + if ( !train && !valid && !xval) { metric <- model.parts$tm@metrics$thresholds_and_metric_score if ( !is.null(metric) ) return(metric) } @@ -1712,6 +1716,13 @@ h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE) { v_names <- c(v_names,"valid") } } + if ( xval ) { + if( is.null(model.parts$xval) ) return(invisible(.warn.no.cross.validation())) + else { + v <- c(v,model.parts$xm@metrics$thresholds_and_metric_score) + v_names <- c(v_names,"xval") + } + } if ( !is.null(v) ) { names(v) <- v_names if ( length(v)==1 ) { return( v[[1]] ) } else { return( v ) } diff --git a/h2o-r/tests/testdir_algos/gbm/runit_GBM_bernoulli.R b/h2o-r/tests/testdir_algos/gbm/runit_GBM_bernoulli.R index 99f5b18f9c6d..077cc1723049 100644 --- a/h2o-r/tests/testdir_algos/gbm/runit_GBM_bernoulli.R +++ b/h2o-r/tests/testdir_algos/gbm/runit_GBM_bernoulli.R @@ -44,7 +44,8 @@ test.GBM.bernoulli <- function() { Log.info("R Confusion Matrix:") print(RCM) Log.info("H2O Confusion Matrix:") - print(h2o.confusionMatrix(h2o.performance(prostate.h2o))) + perf <- h2o.performance(prostate.h2o) + print(h2o.confusionMatrix(perf)) R.auc <- gbm.roc.area(prostate.data$CAPSULE,R.preds) Log.info(paste("R AUC:", R.auc, "\tH2O AUC:", h2o.auc(h2o.performance(prostate.h2o)))) @@ -55,6 +56,10 @@ test.GBM.bernoulli <- function() { print(prostate.h2o@model$init_f) expect_equal(prostate.h2o@model$init_f, f0, tolerance=1e-4) ## check the intercept term + # GH-15889 + ths_model <- h2o.thresholds_and_metric_scores(prostate.h2o) + ths_perf <- h2o.thresholds_and_metric_scores(perf) + expect_equal(ths_model, ths_perf) } doTest("GBM Test: prostate.csv with Bernoulli distribution", test.GBM.bernoulli)