From d33e3b49d34a55632d0b3a1dc33ef5bedf54f6a4 Mon Sep 17 00:00:00 2001 From: Veronika Maurerova Date: Mon, 4 Dec 2023 15:04:47 +0100 Subject: [PATCH] Unify R and Python examples --- h2o-py/h2o/frame.py | 10 ++--- .../pyunit_h2oH2OFrame_match.py | 24 ++++++------ h2o-r/h2o-package/R/frame.R | 10 ++--- .../tests/testdir_munging/slice/runit_match.R | 38 +++++++++---------- 4 files changed, 41 insertions(+), 41 deletions(-) diff --git a/h2o-py/h2o/frame.py b/h2o-py/h2o/frame.py index 0f580cab982b..d1a98aa8e22d 100644 --- a/h2o-py/h2o/frame.py +++ b/h2o-py/h2o/frame.py @@ -4668,12 +4668,12 @@ def match(self, table, nomatch=float("nan"), start_index=1): :examples: - >>> iris = h2o.import_file("h2o://iris") - >>> match_col = iris["C5"].match(['Iris-setosa', 'Iris-versicolor']) + >>> data = h2o.import_file("h2o://iris") + >>> match_col = data["C5"].match(['Iris-setosa', 'Iris-versicolor']) >>> match_col.names = ['match'] - >>> iris_match = iris.cbind(match_col) - >>> iris_split = iris_match.split_frame(ratios=[0.05], seed=1)[0] - >>> iris_split + >>> iris_match = data.cbind(match_col) + >>> sample = iris_match.split_frame(ratios=[0.05], seed=1)[0] + >>> sample """ return H2OFrame._expr(expr=ExprNode("match", self, table, nomatch, start_index)) diff --git a/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame_match.py b/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame_match.py index 4b4df23ad071..f2b4a897798d 100644 --- a/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame_match.py +++ b/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame_match.py @@ -12,12 +12,12 @@ def h2o_H2OFrame_match(): Python API test: h2o.frame.H2OFrame.match(table, nomatch=0) """ - iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv")) + data = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv")) nomatch = 0 # string value - match_frame = iris["C5"].match(['Iris-setosa'], nomatch=nomatch) + match_frame = data["C5"].match(['Iris-setosa'], nomatch=nomatch) assert_is_type(match_frame, H2OFrame) # check return type assert match_frame.sum()[0, 0] == 50.0, "h2o.H2OFrame.match() command is not working." # check return result assert match_frame[0, 0] == 1, "match value should be 1" @@ -25,7 +25,7 @@ def h2o_H2OFrame_match(): assert match_frame[100, 0] == nomatch, "match value should be 0" # string values - match_frame = iris["C5"].match(['Iris-setosa', 'Iris-versicolor'], nomatch=nomatch) + match_frame = data["C5"].match(['Iris-setosa', 'Iris-versicolor'], nomatch=nomatch) assert_is_type(match_frame, H2OFrame, nomatch=0) # check return type assert round(match_frame.sum()[0, 0]) == 150, "h2o.H2OFrame.match() command is not working." # check return result assert match_frame[0, 0] == 1, "match value should be 1" @@ -34,7 +34,7 @@ def h2o_H2OFrame_match(): # use default nomatch match_values = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] - match_frame = iris["C5"].match(match_values) + match_frame = data["C5"].match(match_values) assert_is_type(match_frame, H2OFrame) # check return type assert round(match_frame.sum()[0, 0]) == 300, "h2o.H2OFrame.match() command is not working." assert match_frame[0, 0] == 1, "match value should be 1" @@ -44,7 +44,7 @@ def h2o_H2OFrame_match(): # set nomatch value to -1 nomatch = -1 match_values = ['Iris-setosa', 'Iris-versicolor'] - match_frame = iris["C5"].match(match_values, nomatch=nomatch) + match_frame = data["C5"].match(match_values, nomatch=nomatch) assert round(match_frame.sum()[0, 0]) == 100, "h2o.H2OFrame.match() command is not working." assert match_frame[0, 0] == 1, "match value should be 1" assert match_frame[50, 0] == 2, "match value should be 2" @@ -53,7 +53,7 @@ def h2o_H2OFrame_match(): # start index feature = 0 match_values = ['Iris-setosa', 'Iris-versicolor'] start_index = 0 - match_frame = iris["C5"].match(match_values, start_index=start_index) + match_frame = data["C5"].match(match_values, start_index=start_index) assert match_frame.sum()[0, 0] == 50, "h2o.H2OFrame.match() command is not working." assert match_frame[0, 0] == start_index, "match value should be 0" assert match_frame[50, 0] == start_index+1, "match value should be 1" @@ -61,7 +61,7 @@ def h2o_H2OFrame_match(): # numeric values match_values = [5.1] - match_frame = iris["C1"].match(match_values) + match_frame = data["C1"].match(match_values) assert match_frame.sum()[0, 0] == 9, "h2o.H2OFrame.match() command is not working." assert match_frame[0, 0] == 1, "match value should be 1" assert match_frame[17, 0] == 1, "match value should be 1" @@ -70,7 +70,7 @@ def h2o_H2OFrame_match(): # duplicate in match values nomatch = 0 - match_frame = iris["C5"].match(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa'], nomatch=nomatch) + match_frame = data["C5"].match(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa'], nomatch=nomatch) assert_is_type(match_frame, H2OFrame, nomatch=0) # check return type assert round(match_frame.sum()[0, 0]) == 150, "h2o.H2OFrame.match() command is not working." # check return result assert match_frame[0, 0] == 1, "match value should be 1" @@ -78,11 +78,11 @@ def h2o_H2OFrame_match(): assert match_frame[100, 0] == nomatch, "match value should be 0" # test example for doc - match_col = iris["C5"].match(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa']) + match_col = data["C5"].match(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa']) match_col.names = ['match'] - iris_match = iris.cbind(match_col) - splited = iris_match.split_frame(ratios=[0.05], seed=1)[0] - print(splited) + iris_match = data.cbind(match_col) + sample = iris_match.split_frame(ratios=[0.05], seed=1)[0] + print(sample) pyunit_utils.standalone_test(h2o_H2OFrame_match) diff --git a/h2o-r/h2o-package/R/frame.R b/h2o-r/h2o-package/R/frame.R index 0ac4f4ad1f42..4500e24272a9 100644 --- a/h2o-r/h2o-package/R/frame.R +++ b/h2o-r/h2o-package/R/frame.R @@ -863,11 +863,11 @@ cut.H2OFrame <- h2o.cut #' @examples #' \dontrun{ #' h2o.init() -#' frame <- as.h2o(iris) -#' match_col <- h2o.match(frame$Species, c("setosa", "versicolor", "setosa")) -#' iris_match <- h2o.cbind(frame, match_col) -#' iris_split <- h2o.splitFrame(iris_match, ratios=0.05, seed=1) -#' iris_split[1] +#' data <- as.h2o(iris) +#' match_col <- h2o.match(data$Species, c("setosa", "versicolor", "setosa")) +#' iris_match <- h2o.cbind(data, match_col) +#' sample <- h2o.splitFrame(iris_match, ratios=0.05, seed=1)[1] +#' sample #' } #' @export h2o.match <- function(x, table, nomatch=NA_integer_, start_index=1) { diff --git a/h2o-r/tests/testdir_munging/slice/runit_match.R b/h2o-r/tests/testdir_munging/slice/runit_match.R index 9f9c8e3ba308..3b8f76dd8b08 100644 --- a/h2o-r/tests/testdir_munging/slice/runit_match.R +++ b/h2o-r/tests/testdir_munging/slice/runit_match.R @@ -4,32 +4,32 @@ source("../../../scripts/h2o-r-test-setup.R") test.match <- function() { - frame <- as.h2o(iris) + data <- as.h2o(iris) # compare h2o and base %in% - h2o_in <- 'setosa' %in% frame$Species - base_in <- base::`%in%`("setosa", as.vector(frame$Species)) + h2o_in <- 'setosa' %in% data$Species + base_in <- base::`%in%`("setosa", as.vector(data$Species)) expect_equal(h2o_in, base_in) - sub_h2o_in <- frame$Species %in% c("setosa", "versicolor") - hh_in <- frame[sub_h2o_in,] + sub_h2o_in <- data$Species %in% c("setosa", "versicolor") + hh_in <- data[sub_h2o_in,] expect_equal(dim(hh_in), c(100, 5)) - sub_base_in <- base::`%in%`(as.vector(frame$Species), c("setosa", "versicolor")) - hh_in_base <- frame[as.h2o(sub_base_in),] + sub_base_in <- base::`%in%`(as.vector(data$Species), c("setosa", "versicolor")) + hh_in_base <- data[as.h2o(sub_base_in),] expect_equal(dim(hh_in_base), c(100, 5)) expect_equal(hh_in, hh_in_base) # compare h2o and base match # string values, default setting - sub_h2o_match <- h2o.match(frame$Species, c("setosa", "versicolor")) + sub_h2o_match <- h2o.match(data$Species, c("setosa", "versicolor")) sub_h2o_match <- as.vector(sub_h2o_match) expect_equal(sub_h2o_match[1], 1) expect_equal(sub_h2o_match[51], 2) expect_equal(sub_h2o_match[101], NA_integer_) - sub_base_match <- base::match(as.vector(frame$Species), c("setosa", "versicolor")) + sub_base_match <- base::match(as.vector(data$Species), c("setosa", "versicolor")) expect_equal(sub_base_match[1], 1) expect_equal(sub_base_match[51], 2) expect_equal(sub_base_match[101], NA_integer_) @@ -37,25 +37,25 @@ test.match <- function() { expect_equal(sub_h2o_match, sub_base_match) # string values, nomatch=0 - sub_h2o_match <- h2o.match(frame$Species, c("setosa", "versicolor"), nomatch=0) + sub_h2o_match <- h2o.match(data$Species, c("setosa", "versicolor"), nomatch=0) sub_h2o_match <- as.vector(sub_h2o_match) expect_equal(sub_h2o_match[1], 1) expect_equal(sub_h2o_match[51], 2) expect_equal(sub_h2o_match[101], 0) # string values, start_index=0 - sub_h2o_match <- h2o.match(frame$Species, c("setosa", "versicolor"), start_index=0) + sub_h2o_match <- h2o.match(data$Species, c("setosa", "versicolor"), start_index=0) sub_h2o_match <- as.vector(sub_h2o_match) expect_equal(sub_h2o_match[1], 0) expect_equal(sub_h2o_match[51], 1) expect_equal(sub_h2o_match[101], NA_integer_) - sub_h2o_in <- frame$Sepal.Length %in% c(5.1) - hh_in <- frame[sub_h2o_in,] + sub_h2o_in <- data$Sepal.Length %in% c(5.1) + hh_in <- data[sub_h2o_in,] expect_equal(dim(hh_in), c(9,5)) # numeric value, default setting - sub_h2o_match <- h2o.match(frame$Sepal.Length, c(5.1)) + sub_h2o_match <- h2o.match(data$Sepal.Length, c(5.1)) sub_h2o_match <- as.vector(sub_h2o_match) expect_equal(sub_h2o_match[1], 1) expect_equal(sub_h2o_match[18], 1) @@ -63,17 +63,17 @@ test.match <- function() { expect_equal(sub_h2o_match[2], NA_integer_) # string values, duplicates in match values - sub_h2o_match <- h2o.match(frame$Species, c("setosa", "versicolor", "setosa")) + sub_h2o_match <- h2o.match(data$Species, c("setosa", "versicolor", "setosa")) sub_h2o_match <- as.vector(sub_h2o_match) expect_equal(sub_h2o_match[1], 1) expect_equal(sub_h2o_match[51], 2) expect_equal(sub_h2o_match[101], NA_integer_) # test doc example - match_col <- h2o.match(frame$Species, c("setosa", "versicolor", "setosa")) - iris_match <- h2o.cbind(frame, match_col) - splited <- h2o.splitFrame(iris_match, ratios=0.05, seed=1) - print(splited[1]) + match_col <- h2o.match(data$Species, c("setosa", "versicolor", "setosa")) + iris_match <- h2o.cbind(data, match_col) + sample <- h2o.splitFrame(iris_match, ratios=0.05, seed=1)[1] + print(sample) } doTest("test match", test.match)