Added commentary + fixed bugs

online-ml · Nov 27, 2024 · 432363d · 432363d
1 parent e159250
commit 432363d
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 7 deletions.
diff --git a/river/model_selection/__init__.py b/river/model_selection/__init__.py
@@ -19,6 +19,7 @@
 from .bandit import BanditClassifier, BanditRegressor
 from .greedy import GreedyRegressor
 from .hoeffding_races import HoeffdingRaceClassifier
+from .hoeffding_races import HoeffdingRaceRegressor
 from .sh import SuccessiveHalvingClassifier, SuccessiveHalvingRegressor
 
 __all__ = [
@@ -29,4 +30,5 @@
     "SuccessiveHalvingClassifier",
     "SuccessiveHalvingRegressor",
     "HoeffdingRaceClassifier",
+    "HoeffdingRaceRegressor"
 ]
diff --git a/river/model_selection/hoeffding_races.py b/river/model_selection/hoeffding_races.py
@@ -7,6 +7,21 @@
 
 class HoeffdingRaceClassifier(base.Classifier):
     """
+    HoeffdingRace-based model selection for Classification.
+
+    Each models is associated to a performance (here its accuracy). When the model is considered too inaccurate by the hoeffding bound,
+    the model is removed.
+
+    Parameters
+    ----------
+    models
+        The models to select from.
+    metric
+        The metric that is used to measure the performance of each model.
+    delta
+        Hoeffding bound precision.
+
+
     Tests on Binary Classification
 
     >>> from river import model_selection
@@ -41,9 +56,13 @@ def __init__(
         self.n = 0
         self.model_metrics = {name: metric.clone() for name in models.keys()}
         self.model_performance = {name: 0 for name in models.keys()}
-        self.remaining_models = set(models.keys())
+        self.remaining_models = [i for i in models.keys()]
 
     def hoeffding_bound(self, n):
+        """
+        Computes the hoeffding bound according to n, the number of iterations done.
+
+        """
         return math.sqrt((math.log(1 / self.delta)) / (2 * n))
 
     def learn_one(self, x, y):
@@ -55,24 +74,58 @@ def learn_one(self, x, y):
             self.models[name].learn_one(x, y)
 
             # Update performance
+
             self.model_metrics[name].update(y, y_pred)
             self.model_performance[name] = self.model_metrics[name].get()
 
             if self.model_performance[name] + self.hoeffding_bound(self.n) < best_perf:
                 self.remaining_models.remove(name)
+                if len(self.remaining_models) == 1:
+                    break
 
     def predict_one(self, x):
-        # Prediction by best remaining model
+
         if len(self.remaining_models) == 1:
             return self.models[list(self.remaining_models)[0]].predict_one(x)
-        return None  # Pas de prédiction tant qu'un modèle n'est pas sélectionné
+        return None  
 
     def single_model_remaining(self):
         return len(self.remaining_models) == 1
 
 
 class HoeffdingRaceRegressor(base.Regressor):
-    """ """
+    """ 
+    HoeffdingRace-based model selection for regression.
+
+    Each models is associated to a performance (here its accuracy). When the model is considered too inaccurate by the hoeffding bound,
+    the model is removed.
+
+    Parameters
+    ----------
+    models
+        The models to select from.
+    metric
+        The metric that is used to measure the performance of each model.
+    delta
+        Hoeffding bound precision.
+
+    Tests on Regression models
+    >>> from river import linear_model, neighbors, tree, metrics, datasets, model_selection
+    >>> hoeffding_race = model_selection.HoeffdingRaceRegressor(
+    ... models = {"KNN": neighbors.KNNRegressor(),
+    ...           "Log_Reg":linear_model.LinearRegression()},
+    ...           metric=metrics.MAE(),
+    ...           delta=0.05)
+    >>> dataset = datasets.ChickWeights()
+    >>> for x, y in dataset:
+    ...     hoeffding_race.learn_one(x, y)
+    ...     if hoeffding_race.single_model_remaining():
+    ...         break
+    ...
+>>> print(hoeffding_race.remaining_models)
+['Log_Reg']
+    
+    """
 
     def __init__(
         self,
@@ -86,7 +139,7 @@ def __init__(
         self.n = 0
         self.model_metrics = {name: metric.clone() for name in models.keys()}
         self.model_performance = {name: 0 for name in models.keys()}
-        self.remaining_models = set(models.keys())
+        self.remaining_models = [i for i in models.keys()]
 
     def hoeffding_bound(self, n):
         return math.sqrt((math.log(1 / self.delta)) / (2 * n))
@@ -100,17 +153,22 @@ def learn_one(self, x, y):
             self.models[name].learn_one(x, y)
 
             # Update performance
+
             self.model_metrics[name].update(y, y_pred)
             self.model_performance[name] = self.model_metrics[name].get()
 
             if self.model_performance[name] + self.hoeffding_bound(self.n) < best_perf:
                 self.remaining_models.remove(name)
+                if len(self.remaining_models) == 1:
+                    break
 
     def predict_one(self, x):
-        # Prediction by best remaining model
         if len(self.remaining_models) == 1:
             return self.models[list(self.remaining_models)[0]].predict_one(x)
-        return None  # Pas de prédiction tant qu'un modèle n'est pas sélectionné
+        return None  
 
     def single_model_remaining(self):
+        """
+        Method to be able to know if the "race" has ended.
+        """
         return len(self.remaining_models) == 1