From d19bd441036d3984af37efbdc3da1ba4895ba343 Mon Sep 17 00:00:00 2001 From: Veronika Maurerova Date: Fri, 10 Jan 2025 11:02:27 +0100 Subject: [PATCH] Fix junit tests --- h2o-algos/src/main/java/hex/knn/KNNModel.java | 1 - h2o-algos/src/test/java/hex/knn/KNNTest.java | 10 ++-- .../testdir_algos/knn/pyunit_knn_api_test.py | 1 + .../knn/pyunit_knn_compare_sklearn.py | 52 +++++++++++++++++++ 4 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 h2o-py/tests/testdir_algos/knn/pyunit_knn_compare_sklearn.py diff --git a/h2o-algos/src/main/java/hex/knn/KNNModel.java b/h2o-algos/src/main/java/hex/knn/KNNModel.java index f1b3cd0caf37..da5bfc2f8a73 100644 --- a/h2o-algos/src/main/java/hex/knn/KNNModel.java +++ b/h2o-algos/src/main/java/hex/knn/KNNModel.java @@ -21,7 +21,6 @@ public String javaName() { } public int _k = 3; - //public KNNDistance _distance; public DistanceType _distance; public boolean _compute_metrics; diff --git a/h2o-algos/src/test/java/hex/knn/KNNTest.java b/h2o-algos/src/test/java/hex/knn/KNNTest.java index 421f4f47a4ab..17a940c44163 100644 --- a/h2o-algos/src/test/java/hex/knn/KNNTest.java +++ b/h2o-algos/src/test/java/hex/knn/KNNTest.java @@ -35,7 +35,7 @@ public void testIris() { KNNModel.KNNParameters parms = new KNNModel.KNNParameters(); parms._train = fr._key; parms._k = 3; - parms._distance = new EuclideanDistance(); + parms._distance = DistanceType.EUCLIDEAN; parms._response_column = response; parms._id_column = idColumn; parms._auc_type = MultinomialAucType.MACRO_OVR; @@ -90,7 +90,7 @@ public void testSimpleFrameEuclidean() { KNNModel.KNNParameters parms = new KNNModel.KNNParameters(); parms._train = fr._key; parms._k = 2; - parms._distance = new EuclideanDistance(); + parms._distance = DistanceType.EUCLIDEAN; parms._response_column = response; parms._id_column = idColumn; parms._auc_type = MultinomialAucType.MACRO_OVR; @@ -165,7 +165,7 @@ public void testSimpleFrameManhattan() { KNNModel.KNNParameters parms = new KNNModel.KNNParameters(); parms._train = fr._key; parms._k = 2; - parms._distance = new ManhattanDistance(); + parms._distance = DistanceType.MANHATTAN; parms._response_column = response; parms._id_column = idColumn; parms._auc_type = MultinomialAucType.MACRO_OVR; @@ -240,7 +240,7 @@ public void testSimpleFrameCosine() { KNNModel.KNNParameters parms = new KNNModel.KNNParameters(); parms._train = fr._key; parms._k = 2; - parms._distance = new CosineDistance(); + parms._distance = DistanceType.COSINE; parms._response_column = response; parms._id_column = idColumn; parms._auc_type = MultinomialAucType.MACRO_OVR; @@ -332,7 +332,7 @@ public void testIdColumnIsNotDefined() { KNNModel.KNNParameters parms = new KNNModel.KNNParameters(); parms._train = fr._key; parms._k = 2; - parms._distance = new EuclideanDistance(); + parms._distance = DistanceType.EUCLIDEAN; parms._response_column = "class"; parms._id_column = null; diff --git a/h2o-py/tests/testdir_algos/knn/pyunit_knn_api_test.py b/h2o-py/tests/testdir_algos/knn/pyunit_knn_api_test.py index 6e8fbdd6742a..5880fc370d2e 100644 --- a/h2o-py/tests/testdir_algos/knn/pyunit_knn_api_test.py +++ b/h2o-py/tests/testdir_algos/knn/pyunit_knn_api_test.py @@ -17,6 +17,7 @@ def knn_api_smoke(): train_h2o[response_column] = train_h2o[response_column].asfactor() train_h2o[id_column] = h2o.H2OFrame(np.arange(0, train_h2o.shape[0])) + model = H2OKnnEstimator( k=3, id_column=id_column, diff --git a/h2o-py/tests/testdir_algos/knn/pyunit_knn_compare_sklearn.py b/h2o-py/tests/testdir_algos/knn/pyunit_knn_compare_sklearn.py new file mode 100644 index 000000000000..a94763dac8ee --- /dev/null +++ b/h2o-py/tests/testdir_algos/knn/pyunit_knn_compare_sklearn.py @@ -0,0 +1,52 @@ +import sys, os + +sys.path.insert(1, os.path.join("..", "..", "..")) +import h2o +from tests import pyunit_utils, assert_equals +from h2o.estimators.knn import H2OKnnEstimator +import numpy as np +from sklearn.neighbors import KNeighborsClassifier +from sklearn.neighbors import kneighbors_graph +import pandas as pd + + +def knn_sklearn_compare(): + seed = 12345 + id_column = "id" + response_column = "class" + x_names = ["sepal_len", "sepal_wid", "petal_len", "petal_wid"] + + train = pd.read_csv(pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) + + knn = KNeighborsClassifier(n_neighbors=3) + knn.fit(train[x_names], train[response_column]) + print(knn) + knn_score = knn.score(train[x_names], train[response_column]) + print(knn_score) + + knn_graph = kneighbors_graph(train[x_names], 3, mode='connectivity', include_self=False, metric="euclidean") + print(knn_graph) + + train_h2o = h2o.H2OFrame(train) + train_h2o[response_column] = train_h2o[response_column].asfactor() + train_h2o[id_column] = h2o.H2OFrame(np.arange(0, train_h2o.shape[0])) + + h2o_knn = H2OKnnEstimator( + k=3, + id_column=id_column, + distance="euclidean", + seed=seed, + auc_type="macroovr" + ) + + h2o_knn.train(y=response_column, x=x_names, training_frame=train_h2o) + distances_key = h2o_knn._model_json["output"]["distances"] + print(distances_key) + distances_frame = h2o.get_frame(distances_key) + print(distances_frame) + + +if __name__ == "__main__": + pyunit_utils.standalone_test(knn_sklearn_compare) +else: + knn_sklearn_compare()