From 825f84f4849004fc74eae8bead512092691d0156 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Fri, 22 Dec 2023 13:58:48 +0530 Subject: [PATCH 01/15] rulefit-examples-docs1 --- h2o-bindings/bin/custom/python/gen_rulefit.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 55a01c4057db..a9b995a2538f 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -52,3 +52,36 @@ def predict_rules(self, frame, rule_ids): """ ), ) + +examples = dict( + + algorithm=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> train, test = df.split_frame(ratios=[0.8], seed=1) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, algorithm="auto", seed=1) +>>> rfit.train(training_frame=train, x=x, y=y) +>>> print(rfit.rule_importance()) +>>> rfit.predict(test) + +""", + max_categorical_levels=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> train, test = df.split_frame(ratios=[0.8], seed=1) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_categorical_levels=10, seed=1) +>>> rfit.train(training_frame=train, x=x, y=y) +>>> print(rfit.rule_importance()) +>>> rfit.predict(test) +""", +) \ No newline at end of file From 3a1ad407e05765cdb6f7330a437d570475cf3bd8 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:04:23 +0530 Subject: [PATCH 02/15] rulefit-examples-docs2 --- h2o-bindings/bin/custom/python/gen_rulefit.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index a9b995a2538f..f605d0df772e 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -83,5 +83,19 @@ def predict_rules(self, frame, rule_ids): >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) +""", + max_num_rules=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> train, test = df.split_frame(ratios=[0.8], seed=1) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_num_rules=-1, seed=1) +>>> rfit.train(training_frame=train, x=x, y=y) +>>> print(rfit.rule_importance()) +>>> rfit.predict(test) """, ) \ No newline at end of file From f822edb91395621e02aedea05fdd69046c9aa722 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:05:15 +0530 Subject: [PATCH 03/15] rulefit-examples-docs3 --- h2o-bindings/bin/custom/python/gen_rulefit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index f605d0df772e..0d8817c7459f 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -97,5 +97,5 @@ def predict_rules(self, frame, rule_ids): >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) -""", +""" ) \ No newline at end of file From 6e8563c28341a8bcf327a6998c944c82f0d47e5d Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Wed, 3 Jan 2024 11:12:07 -0600 Subject: [PATCH 04/15] ht/gradle build --- h2o-py/h2o/estimators/rulefit.py | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py index 529b371780ea..29f55fbe8637 100644 --- a/h2o-py/h2o/estimators/rulefit.py +++ b/h2o-py/h2o/estimators/rulefit.py @@ -206,6 +206,21 @@ def algorithm(self): The algorithm to use to generate rules. Type: ``Literal["auto", "drf", "gbm"]``, defaults to ``"auto"``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> train, test = df.split_frame(ratios=[0.8], seed=1) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, algorithm="auto", seed=1) + >>> rfit.train(training_frame=train, x=x, y=y) + >>> print(rfit.rule_importance()) + >>> rfit.predict(test) """ return self._parms.get("algorithm") @@ -249,6 +264,21 @@ def max_num_rules(self): by diminishing returns in model deviance. Type: ``int``, defaults to ``-1``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> train, test = df.split_frame(ratios=[0.8], seed=1) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_num_rules=-1, seed=1) + >>> rfit.train(training_frame=train, x=x, y=y) + >>> print(rfit.rule_importance()) + >>> rfit.predict(test) """ return self._parms.get("max_num_rules") @@ -370,6 +400,21 @@ def max_categorical_levels(self): for categorical_encoding == EnumLimited. Type: ``int``, defaults to ``10``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> train, test = df.split_frame(ratios=[0.8], seed=1) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_categorical_levels=10, seed=1) + >>> rfit.train(training_frame=train, x=x, y=y) + >>> print(rfit.rule_importance()) + >>> rfit.predict(test) """ return self._parms.get("max_categorical_levels") From fc9d8a92d63e0e7c3d975aef80a731645561925a Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Fri, 5 Jan 2024 07:35:19 -0600 Subject: [PATCH 05/15] ht/rerun checks --- h2o-bindings/bin/custom/python/gen_rulefit.py | 4 ++-- h2o-py/h2o/estimators/rulefit.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 0d8817c7459f..acf5bf45aad5 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -18,7 +18,7 @@ def rule_importance(self): def predict_rules(self, frame, rule_ids): """ - Evaluates validity of the given rules on the given data. + Evaluates validity of the given rules on the given data. :param frame: H2OFrame on which rule validity is to be evaluated :param rule_ids: string array of rule ids to be evaluated against the frame @@ -98,4 +98,4 @@ def predict_rules(self, frame, rule_ids): >>> print(rfit.rule_importance()) >>> rfit.predict(test) """ -) \ No newline at end of file +) diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py index 29f55fbe8637..6101c281f9bc 100644 --- a/h2o-py/h2o/estimators/rulefit.py +++ b/h2o-py/h2o/estimators/rulefit.py @@ -442,7 +442,7 @@ def rule_importance(self): def predict_rules(self, frame, rule_ids): """ - Evaluates validity of the given rules on the given data. + Evaluates validity of the given rules on the given data. :param frame: H2OFrame on which rule validity is to be evaluated :param rule_ids: string array of rule ids to be evaluated against the frame From 9ac61056297e7a81c7a3d0831843158eb067258f Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Fri, 5 Jan 2024 08:20:16 -0600 Subject: [PATCH 06/15] ht/fixed spacing --- h2o-bindings/bin/custom/python/gen_rulefit.py | 6 +++--- h2o-py/h2o/estimators/rulefit.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index acf5bf45aad5..200fd29a6f8f 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -64,7 +64,7 @@ def predict_rules(self, frame, rule_ids): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" ->>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, algorithm="auto", seed=1) +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, algorithm="auto", seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) @@ -79,7 +79,7 @@ def predict_rules(self, frame, rule_ids): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" ->>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_categorical_levels=10, seed=1) +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_categorical_levels=10, seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) @@ -93,7 +93,7 @@ def predict_rules(self, frame, rule_ids): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" ->>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_num_rules=-1, seed=1) +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_num_rules=-1, seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py index 6101c281f9bc..c84a2357726f 100644 --- a/h2o-py/h2o/estimators/rulefit.py +++ b/h2o-py/h2o/estimators/rulefit.py @@ -217,7 +217,7 @@ def algorithm(self): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" - >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, algorithm="auto", seed=1) + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, algorithm="auto", seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) @@ -275,7 +275,7 @@ def max_num_rules(self): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" - >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_num_rules=-1, seed=1) + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_num_rules=-1, seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) @@ -411,7 +411,7 @@ def max_categorical_levels(self): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" - >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_categorical_levels=10, seed=1) + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_categorical_levels=10, seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) From 74556cd80f14207a1e232a0b268be4cec900d185 Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Thu, 18 Jan 2024 11:07:21 -0600 Subject: [PATCH 07/15] ht/spacing update --- h2o-bindings/bin/custom/python/gen_rulefit.py | 15 ++++++++++++--- h2o-py/h2o/estimators/rulefit.py | 15 ++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 200fd29a6f8f..02a995675479 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -64,7 +64,10 @@ def predict_rules(self, frame, rule_ids): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" ->>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, algorithm="auto", seed=1) +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... algorithm="auto", +... seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) @@ -79,7 +82,10 @@ def predict_rules(self, frame, rule_ids): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" ->>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_categorical_levels=10, seed=1) +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... max_categorical_levels=10, +... seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) @@ -93,7 +99,10 @@ def predict_rules(self, frame, rule_ids): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" ->>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_num_rules=-1, seed=1) +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... max_num_rules=-1, +... seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py index c84a2357726f..8b0e447cf528 100644 --- a/h2o-py/h2o/estimators/rulefit.py +++ b/h2o-py/h2o/estimators/rulefit.py @@ -217,7 +217,10 @@ def algorithm(self): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" - >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, algorithm="auto", seed=1) + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... algorithm="auto", + ... seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) @@ -275,7 +278,10 @@ def max_num_rules(self): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" - >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_num_rules=-1, seed=1) + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... max_num_rules=-1, + ... seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) @@ -411,7 +417,10 @@ def max_categorical_levels(self): >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" - >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_categorical_levels=10, seed=1) + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... max_categorical_levels=10, + ... seed=1) >>> rfit.train(training_frame=train, x=x, y=y) >>> print(rfit.rule_importance()) >>> rfit.predict(test) From a77bedde30f55c1d52ff0ccae62b1f89744d36cf Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Thu, 29 Feb 2024 07:12:20 -0600 Subject: [PATCH 08/15] ht/rerun checks --- h2o-bindings/bin/custom/python/gen_rulefit.py | 2 +- h2o-py/h2o/estimators/rulefit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 02a995675479..70621b25cc59 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -18,7 +18,7 @@ def rule_importance(self): def predict_rules(self, frame, rule_ids): """ - Evaluates validity of the given rules on the given data. + Evaluates validity of the given rules on the given data. :param frame: H2OFrame on which rule validity is to be evaluated :param rule_ids: string array of rule ids to be evaluated against the frame diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py index 8b0e447cf528..e5e9c98f2638 100644 --- a/h2o-py/h2o/estimators/rulefit.py +++ b/h2o-py/h2o/estimators/rulefit.py @@ -451,7 +451,7 @@ def rule_importance(self): def predict_rules(self, frame, rule_ids): """ - Evaluates validity of the given rules on the given data. + Evaluates validity of the given rules on the given data. :param frame: H2OFrame on which rule validity is to be evaluated :param rule_ids: string array of rule ids to be evaluated against the frame From 0c1f4c0441cd44db44581b7b8fb1614c01c4f39f Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Fri, 1 Mar 2024 13:48:10 -0600 Subject: [PATCH 09/15] ht/rerun checks --- h2o-bindings/bin/custom/python/gen_rulefit.py | 2 +- h2o-py/h2o/estimators/rulefit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 70621b25cc59..02a995675479 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -18,7 +18,7 @@ def rule_importance(self): def predict_rules(self, frame, rule_ids): """ - Evaluates validity of the given rules on the given data. + Evaluates validity of the given rules on the given data. :param frame: H2OFrame on which rule validity is to be evaluated :param rule_ids: string array of rule ids to be evaluated against the frame diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py index e5e9c98f2638..8b0e447cf528 100644 --- a/h2o-py/h2o/estimators/rulefit.py +++ b/h2o-py/h2o/estimators/rulefit.py @@ -451,7 +451,7 @@ def rule_importance(self): def predict_rules(self, frame, rule_ids): """ - Evaluates validity of the given rules on the given data. + Evaluates validity of the given rules on the given data. :param frame: H2OFrame on which rule validity is to be evaluated :param rule_ids: string array of rule ids to be evaluated against the frame From 708a3ec2a029826f6c32ff2bd5da8642d3dd1e1a Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Wed, 20 Mar 2024 11:59:21 +0530 Subject: [PATCH 10/15] review-suggestions --- h2o-bindings/bin/custom/python/gen_rulefit.py | 122 ++++++++++++++++-- 1 file changed, 110 insertions(+), 12 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 02a995675479..80499fc18105 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -61,16 +61,14 @@ def predict_rules(self, frame, rule_ids): >>> from h2o.estimators import H2ORuleFitEstimator >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) ->>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, ... max_num_rules=100, -... algorithm="auto", +... algorithm="gbm", ... seed=1) ->>> rfit.train(training_frame=train, x=x, y=y) +>>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) ->>> rfit.predict(test) """, max_categorical_levels=""" @@ -79,16 +77,14 @@ def predict_rules(self, frame, rule_ids): >>> from h2o.estimators import H2ORuleFitEstimator >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) ->>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, ... max_num_rules=100, -... max_categorical_levels=10, +... max_categorical_levels=11, ... seed=1) ->>> rfit.train(training_frame=train, x=x, y=y) +>>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) ->>> rfit.predict(test) """, max_num_rules=""" >>> import h2o @@ -96,15 +92,117 @@ def predict_rules(self, frame, rule_ids): >>> from h2o.estimators import H2ORuleFitEstimator >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) ->>> train, test = df.split_frame(ratios=[0.8], seed=1) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=-2, +... seed=1) +>>> rfit.train(training_frame=df, x=x, y=y) +>>> print(rfit.rule_importance()) +""", + min_rule_length=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... min_rule_length=4, +... seed=1) +>>> rfit.train(training_frame=df, x=x, y=y) +>>> print(rfit.rule_importance()) +""", + max_rule_length=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... min_rule_length=3, +... seed=1) +>>> rfit.train(training_frame=df, x=x, y=y) +>>> print(rfit.rule_importance()) +""", + model_type=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... model_type="rules", +... seed=1) +>>> rfit.train(training_frame=df, x=x, y=y) +>>> print(rfit.rule_importance()) +""", + distribution=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... distribution="bernoulli", +... seed=1) +>>> rfit.train(training_frame=df, x=x, y=y) +>>> print(rfit.rule_importance()) +""", + rule_generation_ntrees=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... rule_generation_ntrees=60, +... seed=1) +>>> rfit.train(training_frame=df, x=x, y=y) +>>> print(rfit.rule_importance()) +""", + rule_importance=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) +>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] +>>> y = "survived" +>>> rfit = H2ORuleFitEstimator(max_rule_length=10, +... max_num_rules=100, +... rule_generation_ntrees=60, +... seed=1) +>>> rfit.train(training_frame=df, x=x, y=y) +>>> print(rfit.rule_importance()) +""", + predict_rules=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2ORuleFitEstimator +>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" +>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, ... max_num_rules=100, -... max_num_rules=-1, +... rule_generation_ntrees=60, ... seed=1) ->>> rfit.train(training_frame=train, x=x, y=y) +>>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) ->>> rfit.predict(test) """ ) From 324420f0cee1218fbd54699ac610e811ef586743 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Wed, 24 Jul 2024 16:33:19 +0530 Subject: [PATCH 11/15] Update gen_rulefit.py --- h2o-bindings/bin/custom/python/gen_rulefit.py | 64 ++++++++++--------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 80499fc18105..383208e41746 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -3,9 +3,24 @@ def class_extensions(): def rule_importance(self): """ - Retrieve rule importances for a Rulefit model +Retrieve rule importances for a Rulefit model :return: H2OTwoDimTable + + :examples: + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... algorithm="gbm", + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> print(rfit.rule_importance()) """ if self._model_json["algo"] != "rulefit": raise H2OValueError("This function is available for Rulefit models only") @@ -23,6 +38,23 @@ def predict_rules(self, frame, rule_ids): :param frame: H2OFrame on which rule validity is to be evaluated :param rule_ids: string array of rule ids to be evaluated against the frame :return: H2OFrame with a column per each input ruleId, representing a flag whether given rule is applied to the observation or not. + + :examples: + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... rule_generation_ntrees=60, + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> rules_to_predict = ['rule_1', 'rule_2'] # Replace with actual rule IDs + >>> predictions = rfit.predict_rules(frame=df, rule_ids=rules_to_predict) + >>> print(predictions) """ from h2o.frame import H2OFrame from h2o.utils.typechecks import assert_is_type @@ -174,35 +206,5 @@ def predict_rules(self, frame, rule_ids): ... seed=1) >>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) -""", - rule_importance=""" ->>> import h2o ->>> h2o.init() ->>> from h2o.estimators import H2ORuleFitEstimator ->>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" ->>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) ->>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] ->>> y = "survived" ->>> rfit = H2ORuleFitEstimator(max_rule_length=10, -... max_num_rules=100, -... rule_generation_ntrees=60, -... seed=1) ->>> rfit.train(training_frame=df, x=x, y=y) ->>> print(rfit.rule_importance()) -""", - predict_rules=""" ->>> import h2o ->>> h2o.init() ->>> from h2o.estimators import H2ORuleFitEstimator ->>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" ->>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) ->>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] ->>> y = "survived" ->>> rfit = H2ORuleFitEstimator(max_rule_length=10, -... max_num_rules=100, -... rule_generation_ntrees=60, -... seed=1) ->>> rfit.train(training_frame=df, x=x, y=y) ->>> print(rfit.rule_importance()) """ ) From fe72b9e8a571738f1156e7879a8b4d27d4f7d902 Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Thu, 15 Aug 2024 11:20:18 -0500 Subject: [PATCH 12/15] ht/built gradle --- h2o-py/h2o/estimators/rulefit.py | 133 +++++++++++++++++++++++++++---- 1 file changed, 119 insertions(+), 14 deletions(-) diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py index 8b0e447cf528..ba6823c558ab 100644 --- a/h2o-py/h2o/estimators/rulefit.py +++ b/h2o-py/h2o/estimators/rulefit.py @@ -214,16 +214,14 @@ def algorithm(self): >>> from h2o.estimators import H2ORuleFitEstimator >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) - >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, ... max_num_rules=100, - ... algorithm="auto", + ... algorithm="gbm", ... seed=1) - >>> rfit.train(training_frame=train, x=x, y=y) + >>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) - >>> rfit.predict(test) """ return self._parms.get("algorithm") @@ -238,6 +236,22 @@ def min_rule_length(self): Minimum length of rules. Defaults to 3. Type: ``int``, defaults to ``3``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... min_rule_length=4, + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> print(rfit.rule_importance()) """ return self._parms.get("min_rule_length") @@ -252,6 +266,22 @@ def max_rule_length(self): Maximum length of rules. Defaults to 3. Type: ``int``, defaults to ``3``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... min_rule_length=3, + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> print(rfit.rule_importance()) """ return self._parms.get("max_rule_length") @@ -275,16 +305,13 @@ def max_num_rules(self): >>> from h2o.estimators import H2ORuleFitEstimator >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) - >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, - ... max_num_rules=100, - ... max_num_rules=-1, + ... max_num_rules=-2, ... seed=1) - >>> rfit.train(training_frame=train, x=x, y=y) + >>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) - >>> rfit.predict(test) """ return self._parms.get("max_num_rules") @@ -299,6 +326,22 @@ def model_type(self): Specifies type of base learners in the ensemble. Type: ``Literal["rules_and_linear", "rules", "linear"]``, defaults to ``"rules_and_linear"``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... model_type="rules", + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> print(rfit.rule_importance()) """ return self._parms.get("model_type") @@ -334,6 +377,22 @@ def distribution(self): Type: ``Literal["auto", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber"]``, defaults to ``"auto"``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... distribution="bernoulli", + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> print(rfit.rule_importance()) """ return self._parms.get("distribution") @@ -348,6 +407,22 @@ def rule_generation_ntrees(self): Specifies the number of trees to build in the tree model. Defaults to 50. Type: ``int``, defaults to ``50``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... rule_generation_ntrees=60, + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> print(rfit.rule_importance()) """ return self._parms.get("rule_generation_ntrees") @@ -414,16 +489,14 @@ def max_categorical_levels(self): >>> from h2o.estimators import H2ORuleFitEstimator >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) - >>> train, test = df.split_frame(ratios=[0.8], seed=1) >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, ... max_num_rules=100, - ... max_categorical_levels=10, + ... max_categorical_levels=11, ... seed=1) - >>> rfit.train(training_frame=train, x=x, y=y) + >>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) - >>> rfit.predict(test) """ return self._parms.get("max_categorical_levels") @@ -436,9 +509,24 @@ def max_categorical_levels(self, max_categorical_levels): def rule_importance(self): """ - Retrieve rule importances for a Rulefit model +Retrieve rule importances for a Rulefit model :return: H2OTwoDimTable + + :examples: + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... algorithm="gbm", + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> print(rfit.rule_importance()) """ if self._model_json["algo"] != "rulefit": raise H2OValueError("This function is available for Rulefit models only") @@ -456,6 +544,23 @@ def predict_rules(self, frame, rule_ids): :param frame: H2OFrame on which rule validity is to be evaluated :param rule_ids: string array of rule ids to be evaluated against the frame :return: H2OFrame with a column per each input ruleId, representing a flag whether given rule is applied to the observation or not. + + :examples: + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2ORuleFitEstimator + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" + >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) + >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] + >>> y = "survived" + >>> rfit = H2ORuleFitEstimator(max_rule_length=10, + ... max_num_rules=100, + ... rule_generation_ntrees=60, + ... seed=1) + >>> rfit.train(training_frame=df, x=x, y=y) + >>> rules_to_predict = ['rule_1', 'rule_2'] # Replace with actual rule IDs + >>> predictions = rfit.predict_rules(frame=df, rule_ids=rules_to_predict) + >>> print(predictions) """ from h2o.frame import H2OFrame from h2o.utils.typechecks import assert_is_type From 503cf3f3721105beaff6e8b5b92cecf83f517222 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Mon, 26 Aug 2024 11:29:07 +0530 Subject: [PATCH 13/15] Update gen_rulefit.py --- h2o-bindings/bin/custom/python/gen_rulefit.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 383208e41746..2baed562c83b 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -17,9 +17,9 @@ def rule_importance(self): >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, ... max_num_rules=100, - ... algorithm="gbm", ... seed=1) >>> rfit.train(training_frame=df, x=x, y=y) + >>> rule_importance = rfit.rule_importance() >>> print(rfit.rule_importance()) """ if self._model_json["algo"] != "rulefit": @@ -49,12 +49,10 @@ def predict_rules(self, frame, rule_ids): >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, ... max_num_rules=100, - ... rule_generation_ntrees=60, ... seed=1) >>> rfit.train(training_frame=df, x=x, y=y) - >>> rules_to_predict = ['rule_1', 'rule_2'] # Replace with actual rule IDs - >>> predictions = rfit.predict_rules(frame=df, rule_ids=rules_to_predict) - >>> print(predictions) + >>> predictions = rfit.predict(test) + >>> print("Predictions:\n", predictions) """ from h2o.frame import H2OFrame from h2o.utils.typechecks import assert_is_type @@ -127,7 +125,7 @@ def predict_rules(self, frame, rule_ids): >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, -... max_num_rules=-2, +... max_num_rules=3, ... seed=1) >>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) From 72904b505d09730e1e0a187b54f62361a0f04835 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:12:34 +0530 Subject: [PATCH 14/15] review-suggestioons --- h2o-bindings/bin/custom/python/gen_rulefit.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py index 2baed562c83b..22d2f92eb9f0 100644 --- a/h2o-bindings/bin/custom/python/gen_rulefit.py +++ b/h2o-bindings/bin/custom/python/gen_rulefit.py @@ -43,16 +43,19 @@ def predict_rules(self, frame, rule_ids): >>> import h2o >>> h2o.init() >>> from h2o.estimators import H2ORuleFitEstimator - >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" - >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) - >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] - >>> y = "survived" - >>> rfit = H2ORuleFitEstimator(max_rule_length=10, - ... max_num_rules=100, - ... seed=1) - >>> rfit.train(training_frame=df, x=x, y=y) - >>> predictions = rfit.predict(test) - >>> print("Predictions:\n", predictions) + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv" + >>> df = h2o.import_file(path=f, col_types={'species': "enum"}) + >>> x = df.columns + >>> y = "species" + >>> x.remove(y) + >>> train, test = df.split_frame(ratios=[.8], seed=1234) + >>> rfit = H2ORuleFitEstimator(min_rule_length=4, + ... max_rule_length=5, + ... max_num_rules=3, + ... seed=1234, + ... model_type="rules") + >>> rfit.train(training_frame=train, x=x, y=y, validation_frame=test) + >>> print(rfit.predict_rules(train, ['M0T38N5_Iris-virginica'])) """ from h2o.frame import H2OFrame from h2o.utils.typechecks import assert_is_type From 4e216a8b896719f96d0a57929547a0e31c982c23 Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Tue, 1 Oct 2024 08:34:41 -0500 Subject: [PATCH 15/15] ht/gradle build --- h2o-py/h2o/estimators/rulefit.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py index ba6823c558ab..639e34129c53 100644 --- a/h2o-py/h2o/estimators/rulefit.py +++ b/h2o-py/h2o/estimators/rulefit.py @@ -308,7 +308,7 @@ def max_num_rules(self): >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, - ... max_num_rules=-2, + ... max_num_rules=3, ... seed=1) >>> rfit.train(training_frame=df, x=x, y=y) >>> print(rfit.rule_importance()) @@ -523,9 +523,9 @@ def rule_importance(self): >>> y = "survived" >>> rfit = H2ORuleFitEstimator(max_rule_length=10, ... max_num_rules=100, - ... algorithm="gbm", ... seed=1) >>> rfit.train(training_frame=df, x=x, y=y) + >>> rule_importance = rfit.rule_importance() >>> print(rfit.rule_importance()) """ if self._model_json["algo"] != "rulefit": @@ -549,18 +549,19 @@ def predict_rules(self, frame, rule_ids): >>> import h2o >>> h2o.init() >>> from h2o.estimators import H2ORuleFitEstimator - >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv" - >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"}) - >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"] - >>> y = "survived" - >>> rfit = H2ORuleFitEstimator(max_rule_length=10, - ... max_num_rules=100, - ... rule_generation_ntrees=60, - ... seed=1) - >>> rfit.train(training_frame=df, x=x, y=y) - >>> rules_to_predict = ['rule_1', 'rule_2'] # Replace with actual rule IDs - >>> predictions = rfit.predict_rules(frame=df, rule_ids=rules_to_predict) - >>> print(predictions) + >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv" + >>> df = h2o.import_file(path=f, col_types={'species': "enum"}) + >>> x = df.columns + >>> y = "species" + >>> x.remove(y) + >>> train, test = df.split_frame(ratios=[.8], seed=1234) + >>> rfit = H2ORuleFitEstimator(min_rule_length=4, + ... max_rule_length=5, + ... max_num_rules=3, + ... seed=1234, + ... model_type="rules") + >>> rfit.train(training_frame=train, x=x, y=y, validation_frame=test) + >>> print(rfit.predict_rules(train, ['M0T38N5_Iris-virginica'])) """ from h2o.frame import H2OFrame from h2o.utils.typechecks import assert_is_type