Skip to content

Commit

Permalink
Added commentary + fixed bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
EL HANI Marwane committed Nov 27, 2024
1 parent e159250 commit 432363d
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 7 deletions.
2 changes: 2 additions & 0 deletions river/model_selection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .bandit import BanditClassifier, BanditRegressor
from .greedy import GreedyRegressor
from .hoeffding_races import HoeffdingRaceClassifier
from .hoeffding_races import HoeffdingRaceRegressor
from .sh import SuccessiveHalvingClassifier, SuccessiveHalvingRegressor

__all__ = [
Expand All @@ -29,4 +30,5 @@
"SuccessiveHalvingClassifier",
"SuccessiveHalvingRegressor",
"HoeffdingRaceClassifier",
"HoeffdingRaceRegressor"
]
72 changes: 65 additions & 7 deletions river/model_selection/hoeffding_races.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@

class HoeffdingRaceClassifier(base.Classifier):
"""
HoeffdingRace-based model selection for Classification.
Each models is associated to a performance (here its accuracy). When the model is considered too inaccurate by the hoeffding bound,
the model is removed.
Parameters
----------
models
The models to select from.
metric
The metric that is used to measure the performance of each model.
delta
Hoeffding bound precision.
Tests on Binary Classification
>>> from river import model_selection
Expand Down Expand Up @@ -41,9 +56,13 @@ def __init__(
self.n = 0
self.model_metrics = {name: metric.clone() for name in models.keys()}
self.model_performance = {name: 0 for name in models.keys()}
self.remaining_models = set(models.keys())
self.remaining_models = [i for i in models.keys()]

def hoeffding_bound(self, n):
"""
Computes the hoeffding bound according to n, the number of iterations done.
"""
return math.sqrt((math.log(1 / self.delta)) / (2 * n))

def learn_one(self, x, y):
Expand All @@ -55,24 +74,58 @@ def learn_one(self, x, y):
self.models[name].learn_one(x, y)

# Update performance

self.model_metrics[name].update(y, y_pred)
self.model_performance[name] = self.model_metrics[name].get()

if self.model_performance[name] + self.hoeffding_bound(self.n) < best_perf:
self.remaining_models.remove(name)
if len(self.remaining_models) == 1:
break

def predict_one(self, x):
# Prediction by best remaining model

if len(self.remaining_models) == 1:
return self.models[list(self.remaining_models)[0]].predict_one(x)
return None # Pas de prédiction tant qu'un modèle n'est pas sélectionné
return None

def single_model_remaining(self):
return len(self.remaining_models) == 1


class HoeffdingRaceRegressor(base.Regressor):
""" """
"""
HoeffdingRace-based model selection for regression.
Each models is associated to a performance (here its accuracy). When the model is considered too inaccurate by the hoeffding bound,
the model is removed.
Parameters
----------
models
The models to select from.
metric
The metric that is used to measure the performance of each model.
delta
Hoeffding bound precision.
Tests on Regression models
>>> from river import linear_model, neighbors, tree, metrics, datasets, model_selection
>>> hoeffding_race = model_selection.HoeffdingRaceRegressor(
... models = {"KNN": neighbors.KNNRegressor(),
... "Log_Reg":linear_model.LinearRegression()},
... metric=metrics.MAE(),
... delta=0.05)
>>> dataset = datasets.ChickWeights()
>>> for x, y in dataset:
... hoeffding_race.learn_one(x, y)
... if hoeffding_race.single_model_remaining():
... break
...
>>> print(hoeffding_race.remaining_models)
['Log_Reg']
"""

def __init__(
self,
Expand All @@ -86,7 +139,7 @@ def __init__(
self.n = 0
self.model_metrics = {name: metric.clone() for name in models.keys()}
self.model_performance = {name: 0 for name in models.keys()}
self.remaining_models = set(models.keys())
self.remaining_models = [i for i in models.keys()]

def hoeffding_bound(self, n):
return math.sqrt((math.log(1 / self.delta)) / (2 * n))
Expand All @@ -100,17 +153,22 @@ def learn_one(self, x, y):
self.models[name].learn_one(x, y)

# Update performance

self.model_metrics[name].update(y, y_pred)
self.model_performance[name] = self.model_metrics[name].get()

if self.model_performance[name] + self.hoeffding_bound(self.n) < best_perf:
self.remaining_models.remove(name)
if len(self.remaining_models) == 1:
break

def predict_one(self, x):
# Prediction by best remaining model
if len(self.remaining_models) == 1:
return self.models[list(self.remaining_models)[0]].predict_one(x)
return None # Pas de prédiction tant qu'un modèle n'est pas sélectionné
return None

def single_model_remaining(self):
"""
Method to be able to know if the "race" has ended.
"""
return len(self.remaining_models) == 1

0 comments on commit 432363d

Please sign in to comment.