Skip to content

Commit

Permalink
Unify R and Python examples
Browse files Browse the repository at this point in the history
  • Loading branch information
maurever committed Dec 4, 2023
1 parent e18157f commit d33e3b4
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 41 deletions.
10 changes: 5 additions & 5 deletions h2o-py/h2o/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4668,12 +4668,12 @@ def match(self, table, nomatch=float("nan"), start_index=1):
:examples:
>>> iris = h2o.import_file("h2o://iris")
>>> match_col = iris["C5"].match(['Iris-setosa', 'Iris-versicolor'])
>>> data = h2o.import_file("h2o://iris")
>>> match_col = data["C5"].match(['Iris-setosa', 'Iris-versicolor'])
>>> match_col.names = ['match']
>>> iris_match = iris.cbind(match_col)
>>> iris_split = iris_match.split_frame(ratios=[0.05], seed=1)[0]
>>> iris_split
>>> iris_match = data.cbind(match_col)
>>> sample = iris_match.split_frame(ratios=[0.05], seed=1)[0]
>>> sample
"""
return H2OFrame._expr(expr=ExprNode("match", self, table, nomatch, start_index))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@ def h2o_H2OFrame_match():
Python API test: h2o.frame.H2OFrame.match(table, nomatch=0)
"""
iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))
data = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))

nomatch = 0

# string value
match_frame = iris["C5"].match(['Iris-setosa'], nomatch=nomatch)
match_frame = data["C5"].match(['Iris-setosa'], nomatch=nomatch)
assert_is_type(match_frame, H2OFrame) # check return type
assert match_frame.sum()[0, 0] == 50.0, "h2o.H2OFrame.match() command is not working." # check return result
assert match_frame[0, 0] == 1, "match value should be 1"
assert match_frame[50, 0] == nomatch, "match value should be 0"
assert match_frame[100, 0] == nomatch, "match value should be 0"

# string values
match_frame = iris["C5"].match(['Iris-setosa', 'Iris-versicolor'], nomatch=nomatch)
match_frame = data["C5"].match(['Iris-setosa', 'Iris-versicolor'], nomatch=nomatch)
assert_is_type(match_frame, H2OFrame, nomatch=0) # check return type
assert round(match_frame.sum()[0, 0]) == 150, "h2o.H2OFrame.match() command is not working." # check return result
assert match_frame[0, 0] == 1, "match value should be 1"
Expand All @@ -34,7 +34,7 @@ def h2o_H2OFrame_match():

# use default nomatch
match_values = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
match_frame = iris["C5"].match(match_values)
match_frame = data["C5"].match(match_values)
assert_is_type(match_frame, H2OFrame) # check return type
assert round(match_frame.sum()[0, 0]) == 300, "h2o.H2OFrame.match() command is not working."
assert match_frame[0, 0] == 1, "match value should be 1"
Expand All @@ -44,7 +44,7 @@ def h2o_H2OFrame_match():
# set nomatch value to -1
nomatch = -1
match_values = ['Iris-setosa', 'Iris-versicolor']
match_frame = iris["C5"].match(match_values, nomatch=nomatch)
match_frame = data["C5"].match(match_values, nomatch=nomatch)
assert round(match_frame.sum()[0, 0]) == 100, "h2o.H2OFrame.match() command is not working."
assert match_frame[0, 0] == 1, "match value should be 1"
assert match_frame[50, 0] == 2, "match value should be 2"
Expand All @@ -53,15 +53,15 @@ def h2o_H2OFrame_match():
# start index feature = 0
match_values = ['Iris-setosa', 'Iris-versicolor']
start_index = 0
match_frame = iris["C5"].match(match_values, start_index=start_index)
match_frame = data["C5"].match(match_values, start_index=start_index)
assert match_frame.sum()[0, 0] == 50, "h2o.H2OFrame.match() command is not working."
assert match_frame[0, 0] == start_index, "match value should be 0"
assert match_frame[50, 0] == start_index+1, "match value should be 1"
assert math.isnan(match_frame[100, 0]), "match value should be nan"

# numeric values
match_values = [5.1]
match_frame = iris["C1"].match(match_values)
match_frame = data["C1"].match(match_values)
assert match_frame.sum()[0, 0] == 9, "h2o.H2OFrame.match() command is not working."
assert match_frame[0, 0] == 1, "match value should be 1"
assert match_frame[17, 0] == 1, "match value should be 1"
Expand All @@ -70,19 +70,19 @@ def h2o_H2OFrame_match():

# duplicate in match values
nomatch = 0
match_frame = iris["C5"].match(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa'], nomatch=nomatch)
match_frame = data["C5"].match(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa'], nomatch=nomatch)
assert_is_type(match_frame, H2OFrame, nomatch=0) # check return type
assert round(match_frame.sum()[0, 0]) == 150, "h2o.H2OFrame.match() command is not working." # check return result
assert match_frame[0, 0] == 1, "match value should be 1"
assert match_frame[50, 0] == 2, "match value should be 2"
assert match_frame[100, 0] == nomatch, "match value should be 0"

# test example for doc
match_col = iris["C5"].match(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa'])
match_col = data["C5"].match(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa'])
match_col.names = ['match']
iris_match = iris.cbind(match_col)
splited = iris_match.split_frame(ratios=[0.05], seed=1)[0]
print(splited)
iris_match = data.cbind(match_col)
sample = iris_match.split_frame(ratios=[0.05], seed=1)[0]
print(sample)


pyunit_utils.standalone_test(h2o_H2OFrame_match)
10 changes: 5 additions & 5 deletions h2o-r/h2o-package/R/frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -863,11 +863,11 @@ cut.H2OFrame <- h2o.cut
#' @examples
#' \dontrun{
#' h2o.init()
#' frame <- as.h2o(iris)
#' match_col <- h2o.match(frame$Species, c("setosa", "versicolor", "setosa"))
#' iris_match <- h2o.cbind(frame, match_col)
#' iris_split <- h2o.splitFrame(iris_match, ratios=0.05, seed=1)
#' iris_split[1]
#' data <- as.h2o(iris)
#' match_col <- h2o.match(data$Species, c("setosa", "versicolor", "setosa"))
#' iris_match <- h2o.cbind(data, match_col)
#' sample <- h2o.splitFrame(iris_match, ratios=0.05, seed=1)[1]
#' sample
#' }
#' @export
h2o.match <- function(x, table, nomatch=NA_integer_, start_index=1) {
Expand Down
38 changes: 19 additions & 19 deletions h2o-r/tests/testdir_munging/slice/runit_match.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,76 +4,76 @@ source("../../../scripts/h2o-r-test-setup.R")

test.match <- function() {

frame <- as.h2o(iris)
data <- as.h2o(iris)

# compare h2o and base %in%
h2o_in <- 'setosa' %in% frame$Species
base_in <- base::`%in%`("setosa", as.vector(frame$Species))
h2o_in <- 'setosa' %in% data$Species
base_in <- base::`%in%`("setosa", as.vector(data$Species))
expect_equal(h2o_in, base_in)

sub_h2o_in <- frame$Species %in% c("setosa", "versicolor")
hh_in <- frame[sub_h2o_in,]
sub_h2o_in <- data$Species %in% c("setosa", "versicolor")
hh_in <- data[sub_h2o_in,]
expect_equal(dim(hh_in), c(100, 5))

sub_base_in <- base::`%in%`(as.vector(frame$Species), c("setosa", "versicolor"))
hh_in_base <- frame[as.h2o(sub_base_in),]
sub_base_in <- base::`%in%`(as.vector(data$Species), c("setosa", "versicolor"))
hh_in_base <- data[as.h2o(sub_base_in),]
expect_equal(dim(hh_in_base), c(100, 5))

expect_equal(hh_in, hh_in_base)

# compare h2o and base match
# string values, default setting
sub_h2o_match <- h2o.match(frame$Species, c("setosa", "versicolor"))
sub_h2o_match <- h2o.match(data$Species, c("setosa", "versicolor"))
sub_h2o_match <- as.vector(sub_h2o_match)
expect_equal(sub_h2o_match[1], 1)
expect_equal(sub_h2o_match[51], 2)
expect_equal(sub_h2o_match[101], NA_integer_)

sub_base_match <- base::match(as.vector(frame$Species), c("setosa", "versicolor"))
sub_base_match <- base::match(as.vector(data$Species), c("setosa", "versicolor"))
expect_equal(sub_base_match[1], 1)
expect_equal(sub_base_match[51], 2)
expect_equal(sub_base_match[101], NA_integer_)

expect_equal(sub_h2o_match, sub_base_match)

# string values, nomatch=0
sub_h2o_match <- h2o.match(frame$Species, c("setosa", "versicolor"), nomatch=0)
sub_h2o_match <- h2o.match(data$Species, c("setosa", "versicolor"), nomatch=0)
sub_h2o_match <- as.vector(sub_h2o_match)
expect_equal(sub_h2o_match[1], 1)
expect_equal(sub_h2o_match[51], 2)
expect_equal(sub_h2o_match[101], 0)

# string values, start_index=0
sub_h2o_match <- h2o.match(frame$Species, c("setosa", "versicolor"), start_index=0)
sub_h2o_match <- h2o.match(data$Species, c("setosa", "versicolor"), start_index=0)
sub_h2o_match <- as.vector(sub_h2o_match)
expect_equal(sub_h2o_match[1], 0)
expect_equal(sub_h2o_match[51], 1)
expect_equal(sub_h2o_match[101], NA_integer_)

sub_h2o_in <- frame$Sepal.Length %in% c(5.1)
hh_in <- frame[sub_h2o_in,]
sub_h2o_in <- data$Sepal.Length %in% c(5.1)
hh_in <- data[sub_h2o_in,]
expect_equal(dim(hh_in), c(9,5))

# numeric value, default setting
sub_h2o_match <- h2o.match(frame$Sepal.Length, c(5.1))
sub_h2o_match <- h2o.match(data$Sepal.Length, c(5.1))
sub_h2o_match <- as.vector(sub_h2o_match)
expect_equal(sub_h2o_match[1], 1)
expect_equal(sub_h2o_match[18], 1)
expect_equal(sub_h2o_match[20], 1)
expect_equal(sub_h2o_match[2], NA_integer_)

# string values, duplicates in match values
sub_h2o_match <- h2o.match(frame$Species, c("setosa", "versicolor", "setosa"))
sub_h2o_match <- h2o.match(data$Species, c("setosa", "versicolor", "setosa"))
sub_h2o_match <- as.vector(sub_h2o_match)
expect_equal(sub_h2o_match[1], 1)
expect_equal(sub_h2o_match[51], 2)
expect_equal(sub_h2o_match[101], NA_integer_)

# test doc example
match_col <- h2o.match(frame$Species, c("setosa", "versicolor", "setosa"))
iris_match <- h2o.cbind(frame, match_col)
splited <- h2o.splitFrame(iris_match, ratios=0.05, seed=1)
print(splited[1])
match_col <- h2o.match(data$Species, c("setosa", "versicolor", "setosa"))
iris_match <- h2o.cbind(data, match_col)
sample <- h2o.splitFrame(iris_match, ratios=0.05, seed=1)[1]
print(sample)
}

doTest("test match", test.match)
Expand Down

0 comments on commit d33e3b4

Please sign in to comment.