From 825f84f4849004fc74eae8bead512092691d0156 Mon Sep 17 00:00:00 2001
From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com>
Date: Fri, 22 Dec 2023 13:58:48 +0530
Subject: [PATCH 01/15] rulefit-examples-docs1

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 55a01c4057db..a9b995a2538f 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -52,3 +52,36 @@ def predict_rules(self, frame, rule_ids):
  """
     ),
 )
+
+examples = dict(
+
+    algorithm="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> train, test = df.split_frame(ratios=[0.8], seed=1)
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, algorithm="auto", seed=1)
+>>> rfit.train(training_frame=train, x=x, y=y)
+>>> print(rfit.rule_importance())
+>>> rfit.predict(test)
+
+""",
+    max_categorical_levels="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> train, test = df.split_frame(ratios=[0.8], seed=1)
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_categorical_levels=10, seed=1)
+>>> rfit.train(training_frame=train, x=x, y=y)
+>>> print(rfit.rule_importance())
+>>> rfit.predict(test)
+""",
+)
\ No newline at end of file

From 3a1ad407e05765cdb6f7330a437d570475cf3bd8 Mon Sep 17 00:00:00 2001
From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com>
Date: Fri, 22 Dec 2023 14:04:23 +0530
Subject: [PATCH 02/15] rulefit-examples-docs2

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index a9b995a2538f..f605d0df772e 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -83,5 +83,19 @@ def predict_rules(self, frame, rule_ids):
 >>> rfit.train(training_frame=train, x=x, y=y)
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
+""",
+    max_num_rules="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> train, test = df.split_frame(ratios=[0.8], seed=1)
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_num_rules=-1, seed=1)
+>>> rfit.train(training_frame=train, x=x, y=y)
+>>> print(rfit.rule_importance())
+>>> rfit.predict(test)
 """,
 )
\ No newline at end of file

From f822edb91395621e02aedea05fdd69046c9aa722 Mon Sep 17 00:00:00 2001
From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com>
Date: Fri, 22 Dec 2023 14:05:15 +0530
Subject: [PATCH 03/15] rulefit-examples-docs3

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index f605d0df772e..0d8817c7459f 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -97,5 +97,5 @@ def predict_rules(self, frame, rule_ids):
 >>> rfit.train(training_frame=train, x=x, y=y)
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
-""",
+"""
 )
\ No newline at end of file

From 6e8563c28341a8bcf327a6998c944c82f0d47e5d Mon Sep 17 00:00:00 2001
From: Hannah Tillman <hannah.tillman@h2o.ai>
Date: Wed, 3 Jan 2024 11:12:07 -0600
Subject: [PATCH 04/15] ht/gradle build

---
 h2o-py/h2o/estimators/rulefit.py | 45 ++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py
index 529b371780ea..29f55fbe8637 100644
--- a/h2o-py/h2o/estimators/rulefit.py
+++ b/h2o-py/h2o/estimators/rulefit.py
@@ -206,6 +206,21 @@ def algorithm(self):
         The algorithm to use to generate rules.
 
         Type: ``Literal["auto", "drf", "gbm"]``, defaults to ``"auto"``.
+
+        :examples:
+
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> train, test = df.split_frame(ratios=[0.8], seed=1)
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, algorithm="auto", seed=1)
+        >>> rfit.train(training_frame=train, x=x, y=y)
+        >>> print(rfit.rule_importance())
+        >>> rfit.predict(test)
         """
         return self._parms.get("algorithm")
 
@@ -249,6 +264,21 @@ def max_num_rules(self):
         by diminishing returns in model deviance.
 
         Type: ``int``, defaults to ``-1``.
+
+        :examples:
+
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> train, test = df.split_frame(ratios=[0.8], seed=1)
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_num_rules=-1, seed=1)
+        >>> rfit.train(training_frame=train, x=x, y=y)
+        >>> print(rfit.rule_importance())
+        >>> rfit.predict(test)
         """
         return self._parms.get("max_num_rules")
 
@@ -370,6 +400,21 @@ def max_categorical_levels(self):
         for categorical_encoding == EnumLimited.
 
         Type: ``int``, defaults to ``10``.
+
+        :examples:
+
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> train, test = df.split_frame(ratios=[0.8], seed=1)
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_categorical_levels=10, seed=1)
+        >>> rfit.train(training_frame=train, x=x, y=y)
+        >>> print(rfit.rule_importance())
+        >>> rfit.predict(test)
         """
         return self._parms.get("max_categorical_levels")
 

From fc9d8a92d63e0e7c3d975aef80a731645561925a Mon Sep 17 00:00:00 2001
From: Hannah Tillman <hannah.tillman@h2o.ai>
Date: Fri, 5 Jan 2024 07:35:19 -0600
Subject: [PATCH 05/15] ht/rerun checks

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 4 ++--
 h2o-py/h2o/estimators/rulefit.py              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 0d8817c7459f..acf5bf45aad5 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -18,7 +18,7 @@ def rule_importance(self):
 
     def predict_rules(self, frame, rule_ids):
         """
-        Evaluates validity of the given rules on the given data. 
+        Evaluates validity of the given rules on the given data.
 
         :param frame: H2OFrame on which rule validity is to be evaluated
         :param rule_ids: string array of rule ids to be evaluated against the frame
@@ -98,4 +98,4 @@ def predict_rules(self, frame, rule_ids):
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
 """
-)
\ No newline at end of file
+)
diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py
index 29f55fbe8637..6101c281f9bc 100644
--- a/h2o-py/h2o/estimators/rulefit.py
+++ b/h2o-py/h2o/estimators/rulefit.py
@@ -442,7 +442,7 @@ def rule_importance(self):
 
     def predict_rules(self, frame, rule_ids):
         """
-        Evaluates validity of the given rules on the given data. 
+        Evaluates validity of the given rules on the given data.
 
         :param frame: H2OFrame on which rule validity is to be evaluated
         :param rule_ids: string array of rule ids to be evaluated against the frame

From 9ac61056297e7a81c7a3d0831843158eb067258f Mon Sep 17 00:00:00 2001
From: Hannah Tillman <hannah.tillman@h2o.ai>
Date: Fri, 5 Jan 2024 08:20:16 -0600
Subject: [PATCH 06/15] ht/fixed spacing

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 6 +++---
 h2o-py/h2o/estimators/rulefit.py              | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index acf5bf45aad5..200fd29a6f8f 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -64,7 +64,7 @@ def predict_rules(self, frame, rule_ids):
 >>> train, test = df.split_frame(ratios=[0.8], seed=1)
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
->>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, algorithm="auto", seed=1)
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, algorithm="auto", seed=1)
 >>> rfit.train(training_frame=train, x=x, y=y)
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
@@ -79,7 +79,7 @@ def predict_rules(self, frame, rule_ids):
 >>> train, test = df.split_frame(ratios=[0.8], seed=1)
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
->>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_categorical_levels=10, seed=1)
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_categorical_levels=10, seed=1)
 >>> rfit.train(training_frame=train, x=x, y=y)
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
@@ -93,7 +93,7 @@ def predict_rules(self, frame, rule_ids):
 >>> train, test = df.split_frame(ratios=[0.8], seed=1)
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
->>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_num_rules=-1, seed=1)
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_num_rules=-1, seed=1)
 >>> rfit.train(training_frame=train, x=x, y=y)
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py
index 6101c281f9bc..c84a2357726f 100644
--- a/h2o-py/h2o/estimators/rulefit.py
+++ b/h2o-py/h2o/estimators/rulefit.py
@@ -217,7 +217,7 @@ def algorithm(self):
         >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
-        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, algorithm="auto", seed=1)
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, algorithm="auto", seed=1)
         >>> rfit.train(training_frame=train, x=x, y=y)
         >>> print(rfit.rule_importance())
         >>> rfit.predict(test)
@@ -275,7 +275,7 @@ def max_num_rules(self):
         >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
-        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_num_rules=-1, seed=1)
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_num_rules=-1, seed=1)
         >>> rfit.train(training_frame=train, x=x, y=y)
         >>> print(rfit.rule_importance())
         >>> rfit.predict(test)
@@ -411,7 +411,7 @@ def max_categorical_levels(self):
         >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
-        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,max_num_rules=100, max_categorical_levels=10, seed=1)
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_categorical_levels=10, seed=1)
         >>> rfit.train(training_frame=train, x=x, y=y)
         >>> print(rfit.rule_importance())
         >>> rfit.predict(test)

From 74556cd80f14207a1e232a0b268be4cec900d185 Mon Sep 17 00:00:00 2001
From: Hannah Tillman <hannah.tillman@h2o.ai>
Date: Thu, 18 Jan 2024 11:07:21 -0600
Subject: [PATCH 07/15] ht/spacing update

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 15 ++++++++++++---
 h2o-py/h2o/estimators/rulefit.py              | 15 ++++++++++++---
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 200fd29a6f8f..02a995675479 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -64,7 +64,10 @@ def predict_rules(self, frame, rule_ids):
 >>> train, test = df.split_frame(ratios=[0.8], seed=1)
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
->>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, algorithm="auto", seed=1)
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            algorithm="auto",
+...                            seed=1)
 >>> rfit.train(training_frame=train, x=x, y=y)
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
@@ -79,7 +82,10 @@ def predict_rules(self, frame, rule_ids):
 >>> train, test = df.split_frame(ratios=[0.8], seed=1)
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
->>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_categorical_levels=10, seed=1)
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            max_categorical_levels=10,
+...                            seed=1)
 >>> rfit.train(training_frame=train, x=x, y=y)
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
@@ -93,7 +99,10 @@ def predict_rules(self, frame, rule_ids):
 >>> train, test = df.split_frame(ratios=[0.8], seed=1)
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
->>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_num_rules=-1, seed=1)
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            max_num_rules=-1,
+...                            seed=1)
 >>> rfit.train(training_frame=train, x=x, y=y)
 >>> print(rfit.rule_importance())
 >>> rfit.predict(test)
diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py
index c84a2357726f..8b0e447cf528 100644
--- a/h2o-py/h2o/estimators/rulefit.py
+++ b/h2o-py/h2o/estimators/rulefit.py
@@ -217,7 +217,10 @@ def algorithm(self):
         >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
-        >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, algorithm="auto", seed=1)
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            algorithm="auto",
+        ...                            seed=1)
         >>> rfit.train(training_frame=train, x=x, y=y)
         >>> print(rfit.rule_importance())
         >>> rfit.predict(test)
@@ -275,7 +278,10 @@ def max_num_rules(self):
         >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
-        >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_num_rules=-1, seed=1)
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            max_num_rules=-1,
+        ...                            seed=1)
         >>> rfit.train(training_frame=train, x=x, y=y)
         >>> print(rfit.rule_importance())
         >>> rfit.predict(test)
@@ -411,7 +417,10 @@ def max_categorical_levels(self):
         >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
-        >>> rfit = H2ORuleFitEstimator(max_rule_length=10, max_num_rules=100, max_categorical_levels=10, seed=1)
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            max_categorical_levels=10,
+        ...                            seed=1)
         >>> rfit.train(training_frame=train, x=x, y=y)
         >>> print(rfit.rule_importance())
         >>> rfit.predict(test)

From a77bedde30f55c1d52ff0ccae62b1f89744d36cf Mon Sep 17 00:00:00 2001
From: Hannah Tillman <hannah.tillman@h2o.ai>
Date: Thu, 29 Feb 2024 07:12:20 -0600
Subject: [PATCH 08/15] ht/rerun checks

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 2 +-
 h2o-py/h2o/estimators/rulefit.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 02a995675479..70621b25cc59 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -18,7 +18,7 @@ def rule_importance(self):
 
     def predict_rules(self, frame, rule_ids):
         """
-        Evaluates validity of the given rules on the given data.
+        Evaluates validity of the given rules on the given data. 
 
         :param frame: H2OFrame on which rule validity is to be evaluated
         :param rule_ids: string array of rule ids to be evaluated against the frame
diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py
index 8b0e447cf528..e5e9c98f2638 100644
--- a/h2o-py/h2o/estimators/rulefit.py
+++ b/h2o-py/h2o/estimators/rulefit.py
@@ -451,7 +451,7 @@ def rule_importance(self):
 
     def predict_rules(self, frame, rule_ids):
         """
-        Evaluates validity of the given rules on the given data.
+        Evaluates validity of the given rules on the given data. 
 
         :param frame: H2OFrame on which rule validity is to be evaluated
         :param rule_ids: string array of rule ids to be evaluated against the frame

From 0c1f4c0441cd44db44581b7b8fb1614c01c4f39f Mon Sep 17 00:00:00 2001
From: Hannah Tillman <hannah.tillman@h2o.ai>
Date: Fri, 1 Mar 2024 13:48:10 -0600
Subject: [PATCH 09/15] ht/rerun checks

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 2 +-
 h2o-py/h2o/estimators/rulefit.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 70621b25cc59..02a995675479 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -18,7 +18,7 @@ def rule_importance(self):
 
     def predict_rules(self, frame, rule_ids):
         """
-        Evaluates validity of the given rules on the given data. 
+        Evaluates validity of the given rules on the given data.
 
         :param frame: H2OFrame on which rule validity is to be evaluated
         :param rule_ids: string array of rule ids to be evaluated against the frame
diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py
index e5e9c98f2638..8b0e447cf528 100644
--- a/h2o-py/h2o/estimators/rulefit.py
+++ b/h2o-py/h2o/estimators/rulefit.py
@@ -451,7 +451,7 @@ def rule_importance(self):
 
     def predict_rules(self, frame, rule_ids):
         """
-        Evaluates validity of the given rules on the given data. 
+        Evaluates validity of the given rules on the given data.
 
         :param frame: H2OFrame on which rule validity is to be evaluated
         :param rule_ids: string array of rule ids to be evaluated against the frame

From 708a3ec2a029826f6c32ff2bd5da8642d3dd1e1a Mon Sep 17 00:00:00 2001
From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com>
Date: Wed, 20 Mar 2024 11:59:21 +0530
Subject: [PATCH 10/15] review-suggestions

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 122 ++++++++++++++++--
 1 file changed, 110 insertions(+), 12 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 02a995675479..80499fc18105 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -61,16 +61,14 @@ def predict_rules(self, frame, rule_ids):
 >>> from h2o.estimators import H2ORuleFitEstimator
 >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
 >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
->>> train, test = df.split_frame(ratios=[0.8], seed=1)
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
 >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
 ...                            max_num_rules=100,
-...                            algorithm="auto",
+...                            algorithm="gbm",
 ...                            seed=1)
->>> rfit.train(training_frame=train, x=x, y=y)
+>>> rfit.train(training_frame=df, x=x, y=y)
 >>> print(rfit.rule_importance())
->>> rfit.predict(test)
 
 """,
     max_categorical_levels="""
@@ -79,16 +77,14 @@ def predict_rules(self, frame, rule_ids):
 >>> from h2o.estimators import H2ORuleFitEstimator
 >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
 >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
->>> train, test = df.split_frame(ratios=[0.8], seed=1)
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
 >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
 ...                            max_num_rules=100,
-...                            max_categorical_levels=10,
+...                            max_categorical_levels=11,
 ...                            seed=1)
->>> rfit.train(training_frame=train, x=x, y=y)
+>>> rfit.train(training_frame=df, x=x, y=y)
 >>> print(rfit.rule_importance())
->>> rfit.predict(test)
 """,
     max_num_rules="""
 >>> import h2o
@@ -96,15 +92,117 @@ def predict_rules(self, frame, rule_ids):
 >>> from h2o.estimators import H2ORuleFitEstimator
 >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
 >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
->>> train, test = df.split_frame(ratios=[0.8], seed=1)
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=-2,
+...                            seed=1)
+>>> rfit.train(training_frame=df, x=x, y=y)
+>>> print(rfit.rule_importance())
+""",
+    min_rule_length="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            min_rule_length=4,
+...                            seed=1)
+>>> rfit.train(training_frame=df, x=x, y=y)
+>>> print(rfit.rule_importance())
+""",
+    max_rule_length="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            min_rule_length=3,
+...                            seed=1)
+>>> rfit.train(training_frame=df, x=x, y=y)
+>>> print(rfit.rule_importance())
+""",
+    model_type="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            model_type="rules",
+...                            seed=1)
+>>> rfit.train(training_frame=df, x=x, y=y)
+>>> print(rfit.rule_importance())
+""",
+    distribution="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            distribution="bernoulli",
+...                            seed=1)
+>>> rfit.train(training_frame=df, x=x, y=y)
+>>> print(rfit.rule_importance())
+""",
+    rule_generation_ntrees="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            rule_generation_ntrees=60,
+...                            seed=1)
+>>> rfit.train(training_frame=df, x=x, y=y)
+>>> print(rfit.rule_importance())
+""",
+    rule_importance="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+>>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+>>> y = "survived"
+>>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+...                            max_num_rules=100,
+...                            rule_generation_ntrees=60,
+...                            seed=1)
+>>> rfit.train(training_frame=df, x=x, y=y)
+>>> print(rfit.rule_importance())
+""",
+    predict_rules="""
+>>> import h2o
+>>> h2o.init()
+>>> from h2o.estimators import H2ORuleFitEstimator
+>>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+>>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
 >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
 ...                            max_num_rules=100,
-...                            max_num_rules=-1,
+...                            rule_generation_ntrees=60,
 ...                            seed=1)
->>> rfit.train(training_frame=train, x=x, y=y)
+>>> rfit.train(training_frame=df, x=x, y=y)
 >>> print(rfit.rule_importance())
->>> rfit.predict(test)
 """
 )

From 324420f0cee1218fbd54699ac610e811ef586743 Mon Sep 17 00:00:00 2001
From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com>
Date: Wed, 24 Jul 2024 16:33:19 +0530
Subject: [PATCH 11/15] Update gen_rulefit.py

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 64 ++++++++++---------
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 80499fc18105..383208e41746 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -3,9 +3,24 @@
 def class_extensions():
     def rule_importance(self):
         """
-        Retrieve rule importances for a Rulefit model
+Retrieve rule importances for a Rulefit model
 
         :return: H2OTwoDimTable
+        
+        :examples:
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            algorithm="gbm",
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> print(rfit.rule_importance())
         """
         if self._model_json["algo"] != "rulefit":
             raise H2OValueError("This function is available for Rulefit models only")
@@ -23,6 +38,23 @@ def predict_rules(self, frame, rule_ids):
         :param frame: H2OFrame on which rule validity is to be evaluated
         :param rule_ids: string array of rule ids to be evaluated against the frame
         :return: H2OFrame with a column per each input ruleId, representing a flag whether given rule is applied to the observation or not.
+        
+        :examples:
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            rule_generation_ntrees=60,
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> rules_to_predict = ['rule_1', 'rule_2']  # Replace with actual rule IDs
+        >>> predictions = rfit.predict_rules(frame=df, rule_ids=rules_to_predict)
+        >>> print(predictions)
         """
         from h2o.frame import H2OFrame
         from h2o.utils.typechecks import assert_is_type
@@ -174,35 +206,5 @@ def predict_rules(self, frame, rule_ids):
 ...                            seed=1)
 >>> rfit.train(training_frame=df, x=x, y=y)
 >>> print(rfit.rule_importance())
-""",
-    rule_importance="""
->>> import h2o
->>> h2o.init()
->>> from h2o.estimators import H2ORuleFitEstimator
->>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
->>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
->>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
->>> y = "survived"
->>> rfit = H2ORuleFitEstimator(max_rule_length=10,
-...                            max_num_rules=100,
-...                            rule_generation_ntrees=60,
-...                            seed=1)
->>> rfit.train(training_frame=df, x=x, y=y)
->>> print(rfit.rule_importance())
-""",
-    predict_rules="""
->>> import h2o
->>> h2o.init()
->>> from h2o.estimators import H2ORuleFitEstimator
->>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
->>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
->>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
->>> y = "survived"
->>> rfit = H2ORuleFitEstimator(max_rule_length=10,
-...                            max_num_rules=100,
-...                            rule_generation_ntrees=60,
-...                            seed=1)
->>> rfit.train(training_frame=df, x=x, y=y)
->>> print(rfit.rule_importance())
 """
 )

From fe72b9e8a571738f1156e7879a8b4d27d4f7d902 Mon Sep 17 00:00:00 2001
From: Hannah Tillman <hannah.tillman@h2o.ai>
Date: Thu, 15 Aug 2024 11:20:18 -0500
Subject: [PATCH 12/15] ht/built gradle

---
 h2o-py/h2o/estimators/rulefit.py | 133 +++++++++++++++++++++++++++----
 1 file changed, 119 insertions(+), 14 deletions(-)

diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py
index 8b0e447cf528..ba6823c558ab 100644
--- a/h2o-py/h2o/estimators/rulefit.py
+++ b/h2o-py/h2o/estimators/rulefit.py
@@ -214,16 +214,14 @@ def algorithm(self):
         >>> from h2o.estimators import H2ORuleFitEstimator
         >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
         >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
-        >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
         >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
         ...                            max_num_rules=100,
-        ...                            algorithm="auto",
+        ...                            algorithm="gbm",
         ...                            seed=1)
-        >>> rfit.train(training_frame=train, x=x, y=y)
+        >>> rfit.train(training_frame=df, x=x, y=y)
         >>> print(rfit.rule_importance())
-        >>> rfit.predict(test)
         """
         return self._parms.get("algorithm")
 
@@ -238,6 +236,22 @@ def min_rule_length(self):
         Minimum length of rules. Defaults to 3.
 
         Type: ``int``, defaults to ``3``.
+
+        :examples:
+
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            min_rule_length=4,
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> print(rfit.rule_importance())
         """
         return self._parms.get("min_rule_length")
 
@@ -252,6 +266,22 @@ def max_rule_length(self):
         Maximum length of rules. Defaults to 3.
 
         Type: ``int``, defaults to ``3``.
+
+        :examples:
+
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            min_rule_length=3,
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> print(rfit.rule_importance())
         """
         return self._parms.get("max_rule_length")
 
@@ -275,16 +305,13 @@ def max_num_rules(self):
         >>> from h2o.estimators import H2ORuleFitEstimator
         >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
         >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
-        >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
         >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
-        ...                            max_num_rules=100,
-        ...                            max_num_rules=-1,
+        ...                            max_num_rules=-2,
         ...                            seed=1)
-        >>> rfit.train(training_frame=train, x=x, y=y)
+        >>> rfit.train(training_frame=df, x=x, y=y)
         >>> print(rfit.rule_importance())
-        >>> rfit.predict(test)
         """
         return self._parms.get("max_num_rules")
 
@@ -299,6 +326,22 @@ def model_type(self):
         Specifies type of base learners in the ensemble.
 
         Type: ``Literal["rules_and_linear", "rules", "linear"]``, defaults to ``"rules_and_linear"``.
+
+        :examples:
+
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            model_type="rules",
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> print(rfit.rule_importance())
         """
         return self._parms.get("model_type")
 
@@ -334,6 +377,22 @@ def distribution(self):
 
         Type: ``Literal["auto", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace",
         "quantile", "huber"]``, defaults to ``"auto"``.
+
+        :examples:
+
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            distribution="bernoulli",
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> print(rfit.rule_importance())
         """
         return self._parms.get("distribution")
 
@@ -348,6 +407,22 @@ def rule_generation_ntrees(self):
         Specifies the number of trees to build in the tree model. Defaults to 50.
 
         Type: ``int``, defaults to ``50``.
+
+        :examples:
+
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            rule_generation_ntrees=60,
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> print(rfit.rule_importance())
         """
         return self._parms.get("rule_generation_ntrees")
 
@@ -414,16 +489,14 @@ def max_categorical_levels(self):
         >>> from h2o.estimators import H2ORuleFitEstimator
         >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
         >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
-        >>> train, test = df.split_frame(ratios=[0.8], seed=1)
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
         >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
         ...                            max_num_rules=100,
-        ...                            max_categorical_levels=10,
+        ...                            max_categorical_levels=11,
         ...                            seed=1)
-        >>> rfit.train(training_frame=train, x=x, y=y)
+        >>> rfit.train(training_frame=df, x=x, y=y)
         >>> print(rfit.rule_importance())
-        >>> rfit.predict(test)
         """
         return self._parms.get("max_categorical_levels")
 
@@ -436,9 +509,24 @@ def max_categorical_levels(self, max_categorical_levels):
 
     def rule_importance(self):
         """
-        Retrieve rule importances for a Rulefit model
+Retrieve rule importances for a Rulefit model
 
         :return: H2OTwoDimTable
+
+        :examples:
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            algorithm="gbm",
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> print(rfit.rule_importance())
         """
         if self._model_json["algo"] != "rulefit":
             raise H2OValueError("This function is available for Rulefit models only")
@@ -456,6 +544,23 @@ def predict_rules(self, frame, rule_ids):
         :param frame: H2OFrame on which rule validity is to be evaluated
         :param rule_ids: string array of rule ids to be evaluated against the frame
         :return: H2OFrame with a column per each input ruleId, representing a flag whether given rule is applied to the observation or not.
+
+        :examples:
+        >>> import h2o
+        >>> h2o.init()
+        >>> from h2o.estimators import H2ORuleFitEstimator
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
+        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
+        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
+        >>> y = "survived"
+        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
+        ...                            max_num_rules=100,
+        ...                            rule_generation_ntrees=60,
+        ...                            seed=1)
+        >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> rules_to_predict = ['rule_1', 'rule_2']  # Replace with actual rule IDs
+        >>> predictions = rfit.predict_rules(frame=df, rule_ids=rules_to_predict)
+        >>> print(predictions)
         """
         from h2o.frame import H2OFrame
         from h2o.utils.typechecks import assert_is_type

From 503cf3f3721105beaff6e8b5b92cecf83f517222 Mon Sep 17 00:00:00 2001
From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com>
Date: Mon, 26 Aug 2024 11:29:07 +0530
Subject: [PATCH 13/15] Update gen_rulefit.py

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 383208e41746..2baed562c83b 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -17,9 +17,9 @@ def rule_importance(self):
         >>> y = "survived"
         >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
         ...                            max_num_rules=100,
-        ...                            algorithm="gbm",
         ...                            seed=1)
         >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> rule_importance = rfit.rule_importance()
         >>> print(rfit.rule_importance())
         """
         if self._model_json["algo"] != "rulefit":
@@ -49,12 +49,10 @@ def predict_rules(self, frame, rule_ids):
         >>> y = "survived"
         >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
         ...                            max_num_rules=100,
-        ...                            rule_generation_ntrees=60,
         ...                            seed=1)
         >>> rfit.train(training_frame=df, x=x, y=y)
-        >>> rules_to_predict = ['rule_1', 'rule_2']  # Replace with actual rule IDs
-        >>> predictions = rfit.predict_rules(frame=df, rule_ids=rules_to_predict)
-        >>> print(predictions)
+        >>> predictions = rfit.predict(test)
+        >>> print("Predictions:\n", predictions)
         """
         from h2o.frame import H2OFrame
         from h2o.utils.typechecks import assert_is_type
@@ -127,7 +125,7 @@ def predict_rules(self, frame, rule_ids):
 >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
 >>> y = "survived"
 >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
-...                            max_num_rules=-2,
+...                            max_num_rules=3,
 ...                            seed=1)
 >>> rfit.train(training_frame=df, x=x, y=y)
 >>> print(rfit.rule_importance())

From 72904b505d09730e1e0a187b54f62361a0f04835 Mon Sep 17 00:00:00 2001
From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com>
Date: Fri, 27 Sep 2024 11:12:34 +0530
Subject: [PATCH 14/15] review-suggestioons

---
 h2o-bindings/bin/custom/python/gen_rulefit.py | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/h2o-bindings/bin/custom/python/gen_rulefit.py b/h2o-bindings/bin/custom/python/gen_rulefit.py
index 2baed562c83b..22d2f92eb9f0 100644
--- a/h2o-bindings/bin/custom/python/gen_rulefit.py
+++ b/h2o-bindings/bin/custom/python/gen_rulefit.py
@@ -43,16 +43,19 @@ def predict_rules(self, frame, rule_ids):
         >>> import h2o
         >>> h2o.init()
         >>> from h2o.estimators import H2ORuleFitEstimator
-        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
-        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
-        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
-        >>> y = "survived"
-        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
-        ...                            max_num_rules=100,
-        ...                            seed=1)
-        >>> rfit.train(training_frame=df, x=x, y=y)
-        >>> predictions = rfit.predict(test)
-        >>> print("Predictions:\n", predictions)
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv"
+        >>> df = h2o.import_file(path=f, col_types={'species': "enum"})
+        >>> x = df.columns
+        >>> y = "species"
+        >>> x.remove(y)
+        >>> train, test = df.split_frame(ratios=[.8], seed=1234)
+        >>> rfit = H2ORuleFitEstimator(min_rule_length=4,
+        ...                            max_rule_length=5,
+        ...                            max_num_rules=3,
+        ...                            seed=1234,
+        ...                            model_type="rules")
+        >>> rfit.train(training_frame=train, x=x, y=y, validation_frame=test)
+        >>> print(rfit.predict_rules(train, ['M0T38N5_Iris-virginica']))
         """
         from h2o.frame import H2OFrame
         from h2o.utils.typechecks import assert_is_type

From 4e216a8b896719f96d0a57929547a0e31c982c23 Mon Sep 17 00:00:00 2001
From: Hannah Tillman <hannah.tillman@h2o.ai>
Date: Tue, 1 Oct 2024 08:34:41 -0500
Subject: [PATCH 15/15] ht/gradle build

---
 h2o-py/h2o/estimators/rulefit.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/h2o-py/h2o/estimators/rulefit.py b/h2o-py/h2o/estimators/rulefit.py
index ba6823c558ab..639e34129c53 100644
--- a/h2o-py/h2o/estimators/rulefit.py
+++ b/h2o-py/h2o/estimators/rulefit.py
@@ -308,7 +308,7 @@ def max_num_rules(self):
         >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
         >>> y = "survived"
         >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
-        ...                            max_num_rules=-2,
+        ...                            max_num_rules=3,
         ...                            seed=1)
         >>> rfit.train(training_frame=df, x=x, y=y)
         >>> print(rfit.rule_importance())
@@ -523,9 +523,9 @@ def rule_importance(self):
         >>> y = "survived"
         >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
         ...                            max_num_rules=100,
-        ...                            algorithm="gbm",
         ...                            seed=1)
         >>> rfit.train(training_frame=df, x=x, y=y)
+        >>> rule_importance = rfit.rule_importance()
         >>> print(rfit.rule_importance())
         """
         if self._model_json["algo"] != "rulefit":
@@ -549,18 +549,19 @@ def predict_rules(self, frame, rule_ids):
         >>> import h2o
         >>> h2o.init()
         >>> from h2o.estimators import H2ORuleFitEstimator
-        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv"
-        >>> df = h2o.import_file(path=f, col_types={'pclass': "enum", 'survived': "enum"})
-        >>> x = ["age", "sibsp", "parch", "fare", "sex", "pclass"]
-        >>> y = "survived"
-        >>> rfit = H2ORuleFitEstimator(max_rule_length=10,
-        ...                            max_num_rules=100,
-        ...                            rule_generation_ntrees=60,
-        ...                            seed=1)
-        >>> rfit.train(training_frame=df, x=x, y=y)
-        >>> rules_to_predict = ['rule_1', 'rule_2']  # Replace with actual rule IDs
-        >>> predictions = rfit.predict_rules(frame=df, rule_ids=rules_to_predict)
-        >>> print(predictions)
+        >>> f = "https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv"
+        >>> df = h2o.import_file(path=f, col_types={'species': "enum"})
+        >>> x = df.columns
+        >>> y = "species"
+        >>> x.remove(y)
+        >>> train, test = df.split_frame(ratios=[.8], seed=1234)
+        >>> rfit = H2ORuleFitEstimator(min_rule_length=4,
+        ...                            max_rule_length=5,
+        ...                            max_num_rules=3,
+        ...                            seed=1234,
+        ...                            model_type="rules")
+        >>> rfit.train(training_frame=train, x=x, y=y, validation_frame=test)
+        >>> print(rfit.predict_rules(train, ['M0T38N5_Iris-virginica']))
         """
         from h2o.frame import H2OFrame
         from h2o.utils.typechecks import assert_is_type