Type hints for 'return self' (#1601)

* Make all learn and update methods return None Changing the return types in the base classes means MyPy can detect when a method returns an unexpected value. * Propagate None returns to child classes * Remove wrong returns * Add a note in the changelog * Change the return type of SortedWindow.append * Fix new MyPy 11 errors
online-ml · Aug 31, 2024 · 25f9f89 · 25f9f89
1 parent f982476
commit 25f9f89
Show file tree

Hide file tree

Showing 29 changed files with 67 additions and 80 deletions.
diff --git a/docs/releases/unreleased.md b/docs/releases/unreleased.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 - The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.
+- The methods `learn_one`, `learn_many`, `update`, `revert`, and `append` now return `None`.
 
 ## cluster
 

diff --git a/river/anomaly/base.py b/river/anomaly/base.py
@@ -15,7 +15,7 @@ def _supervised(self):
         return False
 
     @abc.abstractmethod
-    def learn_one(self, x: dict):
+    def learn_one(self, x: dict) -> None:
         """Update the model.
 
         Parameters
@@ -48,7 +48,7 @@ class SupervisedAnomalyDetector(base.Estimator):
     """A supervised anomaly detector."""
 
     @abc.abstractmethod
-    def learn_one(self, x: dict, y: base.typing.Target):
+    def learn_one(self, x: dict, y: base.typing.Target) -> None:
         """Update the model.
 
         Parameters
@@ -137,7 +137,7 @@ def score_one(self, *args, **kwargs):
         """
         return self.anomaly_detector.score_one(*args, **kwargs)
 
-    def learn_one(self, *args, **learn_kwargs):
+    def learn_one(self, *args, **learn_kwargs) -> None:
         """Update the anomaly filter and the underlying anomaly detector.
 
         Parameters

diff --git a/river/anomaly/pad.py b/river/anomaly/pad.py
@@ -80,7 +80,7 @@ class PredictiveAnomalyDetection(anomaly.base.SupervisedAnomalyDetector):
 
     >>> for t, (x, y) in enumerate(datasets.AirlinePassengers()):
     ...     score = PAD.score_one(None, y)
-    ...     PAD = PAD.learn_one(None, y)
+    ...     PAD.learn_one(None, y)
     ...     scores.append(score)
 
     >>> print(scores[-1])
@@ -131,7 +131,6 @@ def learn_one(self, x: dict | None, y: base.typing.Target | float):
                 self.predictive_model.learn_one(y=y, x=x)
         else:
             self.predictive_model.learn_one(x=x, y=y)
-        return self
 
     def score_one(self, x: dict, y: base.typing.Target):
         # Return the predicted value of x from the predictive model, first by checking whether

diff --git a/river/base/classifier.py b/river/base/classifier.py
@@ -15,7 +15,7 @@ class Classifier(estimator.Estimator):
     """A classifier."""
 
     @abc.abstractmethod
-    def learn_one(self, x: dict, y: base.typing.ClfTarget) -> Classifier:
+    def learn_one(self, x: dict, y: base.typing.ClfTarget) -> None:
         """Update the model with a set of features `x` and a label `y`.
 
         Parameters
@@ -81,7 +81,7 @@ class MiniBatchClassifier(Classifier):
     """A classifier that can operate on mini-batches."""
 
     @abc.abstractmethod
-    def learn_many(self, X: pd.DataFrame, y: pd.Series):
+    def learn_many(self, X: pd.DataFrame, y: pd.Series) -> None:
         """Update the model with a mini-batch of features `X` and boolean targets `y`.
 
         Parameters

diff --git a/river/base/clusterer.py b/river/base/clusterer.py
@@ -13,7 +13,7 @@ def _supervised(self):
         return False
 
     @abc.abstractmethod
-    def learn_one(self, x: dict) -> Clusterer:
+    def learn_one(self, x: dict) -> None:
         """Update the model with a set of features `x`.
 
         Parameters

diff --git a/river/base/drift_detector.py b/river/base/drift_detector.py
@@ -77,7 +77,7 @@ class BinaryDriftDetector(_BaseDriftDetector):
     """A drift detector for binary data."""
 
     @abc.abstractmethod
-    def update(self, x: bool) -> BinaryDriftDetector:
+    def update(self, x: bool) -> None:
         """Update the detector with a single boolean input.
 
         Parameters

diff --git a/river/base/multi_output.py b/river/base/multi_output.py
@@ -10,7 +10,7 @@ class MultiLabelClassifier(Estimator, abc.ABC):
     """Multi-label classifier."""
 
     @abc.abstractmethod
-    def learn_one(self, x: dict, y: dict[FeatureName, bool]):
+    def learn_one(self, x: dict, y: dict[FeatureName, bool]) -> None:
         """Update the model with a set of features `x` and the labels `y`.
 
         Parameters
@@ -68,7 +68,7 @@ class MultiTargetRegressor(Estimator, abc.ABC):
     """Multi-target regressor."""
 
     @abc.abstractmethod
-    def learn_one(self, x: dict, y: dict[FeatureName, RegTarget], **kwargs):
+    def learn_one(self, x: dict, y: dict[FeatureName, RegTarget], **kwargs) -> None:
         """Fits to a set of features `x` and a real-valued target `y`.
 
         Parameters

diff --git a/river/base/regressor.py b/river/base/regressor.py
@@ -15,7 +15,7 @@ class Regressor(estimator.Estimator):
     """A regressor."""
 
     @abc.abstractmethod
-    def learn_one(self, x: dict, y: base.typing.RegTarget):
+    def learn_one(self, x: dict, y: base.typing.RegTarget) -> None:
         """Fits to a set of features `x` and a real-valued target `y`.
 
         Parameters
@@ -47,7 +47,7 @@ class MiniBatchRegressor(Regressor):
     """A regressor that can operate on mini-batches."""
 
     @abc.abstractmethod
-    def learn_many(self, X: pd.DataFrame, y: pd.Series):
+    def learn_many(self, X: pd.DataFrame, y: pd.Series) -> None:
         """Update the model with a mini-batch of features `X` and real-valued targets `y`.
 
         Parameters

diff --git a/river/base/transformer.py b/river/base/transformer.py
@@ -57,7 +57,7 @@ class Transformer(base.Estimator, BaseTransformer):
     def _supervised(self):
         return False
 
-    def learn_one(self, x: dict):
+    def learn_one(self, x: dict) -> None:
         """Update with a set of features `x`.
 
         A lot of transformers don't actually have to do anything during the `learn_one` step
@@ -81,7 +81,7 @@ class SupervisedTransformer(base.Estimator, BaseTransformer):
     def _supervised(self):
         return True
 
-    def learn_one(self, x: dict, y: base.typing.Target):
+    def learn_one(self, x: dict, y: base.typing.Target) -> None:
         """Update with a set of features `x` and a target `y`.
 
         Parameters
@@ -113,7 +113,7 @@ def transform_many(self, X: pd.DataFrame) -> pd.DataFrame:
 
         """
 
-    def learn_many(self, X: pd.DataFrame):
+    def learn_many(self, X: pd.DataFrame) -> None:
         """Update with a mini-batch of features.
 
         A lot of transformers don't actually have to do anything during the `learn_many` step
@@ -138,7 +138,7 @@ def _supervised(self):
         return True
 
     @abc.abstractmethod
-    def learn_many(self, X: pd.DataFrame, y: pd.Series):
+    def learn_many(self, X: pd.DataFrame, y: pd.Series) -> None:
         """Update the model with a mini-batch of features `X` and targets `y`.
 
         Parameters

diff --git a/river/cluster/textclust.py b/river/cluster/textclust.py
@@ -213,7 +213,6 @@ def learn_one(self, x, t=None, w=None):
 
         ## increment observation counter
         self.n += 1
-        return clusterId
 
     ## predicts the cluster number. The type specifies whether this should happen on micro-cluster
     ## or macro-cluster level

diff --git a/river/compat/sklearn_to_river.py b/river/compat/sklearn_to_river.py
@@ -33,7 +33,7 @@ def convert_sklearn_to_river(estimator: sklearn_base.BaseEstimator, classes: lis
         (sklearn_base.RegressorMixin, SKL2RiverRegressor),
         (
             sklearn_base.ClassifierMixin,
-            functools.partial(SKL2RiverClassifier, classes=classes),
+            functools.partial(SKL2RiverClassifier, classes=classes),  # type:ignore[arg-type]
         ),
     ]
 

diff --git a/river/covariance/emp.py b/river/covariance/emp.py
@@ -379,5 +379,3 @@ def update_many(self, X: pd.DataFrame):
             row = self._w[fi] * inv_cov[i]
             for j, fj in enumerate(X):
                 self._inv_cov[min((fi, fj), (fj, fi))] = row[j]
-
-        return self
diff --git a/river/ensemble/ewa.py b/river/ensemble/ewa.py
@@ -122,7 +122,6 @@ def learn_predict_one(self, x, y):
 
     def learn_one(self, x, y):
         self.learn_predict_one(x, y)
-        return self
 
     def predict_one(self, x):
         return sum(model.predict_one(x) * weight for model, weight in zip(self, self.weights))
diff --git a/river/feature_extraction/vectorize.py b/river/feature_extraction/vectorize.py
@@ -203,7 +203,7 @@ def __init__(
         # Stop word removal
         if self.stop_words:
             self.processing_steps.append(
-                functools.partial(remove_stop_words, stop_words=stop_words)
+                functools.partial(remove_stop_words, stop_words=self.stop_words)
             )
 
         # n-grams

diff --git a/river/linear_model/base.py b/river/linear_model/base.py
@@ -122,8 +122,6 @@ def _fit(self, x, y, w, get_grad):
 
             self._update_weights(x)
 
-        return self
-
     def _update_weights(self, x):
         # L1 cumulative penalty helper
 
@@ -161,7 +159,7 @@ def _eval_gradient_one(self, x: dict, y: float, w: float) -> tuple[dict, float]:
 
         return (loss_gradient * utils.VectorDict(x), loss_gradient)
 
-    def learn_one(self, x, y, w=1.0):
+    def learn_one(self, x, y, w=1.0) -> None:
         with self._learn_mode(x):
             self._fit(x, y, w, get_grad=self._eval_gradient_one)
 
@@ -188,7 +186,7 @@ def _eval_gradient_many(
 
         return dict(zip(X.columns, gradient)), loss_gradient.mean()
 
-    def learn_many(self, X: pd.DataFrame, y: pd.Series, w: float | pd.Series = 1):
+    def learn_many(self, X: pd.DataFrame, y: pd.Series, w: float | pd.Series = 1) -> None:
         self._y_name = y.name
         with self._learn_mode(set(X)):
             self._fit(X, y, w, get_grad=self._eval_gradient_many)
diff --git a/river/metrics/base.py b/river/metrics/base.py
@@ -22,11 +22,11 @@ class Metric(base.Base, abc.ABC):
     """Mother class for all metrics."""
 
     @abc.abstractmethod
-    def update(self, y_true, y_pred) -> Metric:
+    def update(self, y_true, y_pred) -> None:
         """Update the metric."""
 
     @abc.abstractmethod
-    def revert(self, y_true, y_pred) -> Metric:
+    def revert(self, y_true, y_pred) -> None:
         """Revert the metric."""
 
     @abc.abstractmethod
@@ -89,14 +89,14 @@ def __init__(self, cm=None):
             cm = metrics.ConfusionMatrix()
         self.cm = cm
 
-    def update(self, y_true, y_pred, w=1.0):
+    def update(self, y_true, y_pred, w=1.0) -> None:
         self.cm.update(
             y_true,
             y_pred,
             w=w,
         )
 
-    def revert(self, y_true, y_pred, w=1.0):
+    def revert(self, y_true, y_pred, w=1.0) -> None:
         self.cm.revert(
             y_true,
             y_pred,
@@ -152,7 +152,7 @@ def update(
         y_true: bool,
         y_pred: bool | float | dict[bool, float],
         w=1.0,
-    ) -> BinaryMetric:
+    ) -> None:
         if self.requires_labels:
             y_pred = y_pred == self.pos_val
         return super().update(y_true == self.pos_val, y_pred, w)
@@ -162,7 +162,7 @@ def revert(
         y_true: bool,
         y_pred: bool | float | dict[bool, float],
         w=1.0,
-    ) -> BinaryMetric:
+    ) -> None:
         if self.requires_labels:
             y_pred = y_pred == self.pos_val
         return super().revert(y_true == self.pos_val, y_pred, w)
@@ -190,11 +190,11 @@ class RegressionMetric(Metric):
     _fmt = ",.6f"  # use commas to separate big numbers and show 6 decimals
 
     @abc.abstractmethod
-    def update(self, y_true: numbers.Number, y_pred: numbers.Number) -> RegressionMetric:
+    def update(self, y_true: numbers.Number, y_pred: numbers.Number) -> None:
         """Update the metric."""
 
     @abc.abstractmethod
-    def revert(self, y_true: numbers.Number, y_pred: numbers.Number) -> RegressionMetric:
+    def revert(self, y_true: numbers.Number, y_pred: numbers.Number) -> None:
         """Revert the metric."""
 
     @property
@@ -227,7 +227,7 @@ def __init__(self, metrics, str_sep="\n"):
         super().__init__(metrics)
         self.str_sep = str_sep
 
-    def update(self, y_true, y_pred, w=1.0):
+    def update(self, y_true, y_pred, w=1.0) -> None:
         # If the metrics are classification metrics, then we have to handle the case where some
         # of the metrics require labels, whilst others need to be fed probabilities
         if hasattr(self, "requires_labels") and not self.requires_labels:
@@ -241,7 +241,7 @@ def update(self, y_true, y_pred, w=1.0):
         for m in self:
             m.update(y_true, y_pred)
 
-    def revert(self, y_true, y_pred, w=1.0):
+    def revert(self, y_true, y_pred, w=1.0) -> None:
         # If the metrics are classification metrics, then we have to handle the case where some
         # of the metrics require labels, whilst others need to be fed probabilities
         if hasattr(self, "requires_labels") and not self.requires_labels:
@@ -334,10 +334,10 @@ def __init__(self):
     def _eval(self, y_true, y_pred):
         pass
 
-    def update(self, y_true, y_pred, w=1.0):
+    def update(self, y_true, y_pred, w=1.0) -> None:
         self._mean.update(x=self._eval(y_true, y_pred), w=w)
 
-    def revert(self, y_true, y_pred, w=1.0):
+    def revert(self, y_true, y_pred, w=1.0) -> None:
         self._mean.revert(x=self._eval(y_true, y_pred), w=w)
 
     def get(self):
@@ -353,11 +353,11 @@ class ClusteringMetric(base.Base, abc.ABC):
     _fmt = ",.6f"  # Use commas to separate big numbers and show 6 decimals
 
     @abc.abstractmethod
-    def update(self, x, y_pred, centers, w=1.0) -> ClusteringMetric:
+    def update(self, x, y_pred, centers, w=1.0) -> None:
         """Update the metric."""
 
     @abc.abstractmethod
-    def revert(self, x, y_pred, centers, w=1.0) -> ClusteringMetric:
+    def revert(self, x, y_pred, centers, w=1.0) -> None:
         """Revert the metric."""
 
     @abc.abstractmethod

diff --git a/river/metrics/multioutput/base.py b/river/metrics/multioutput/base.py
@@ -38,7 +38,7 @@ def update(
         y_pred: dict[str | int, base.typing.ClfTarget]
         | dict[str | int, dict[base.typing.ClfTarget, float]],
         w=1.0,
-    ):
+    ) -> None:
         """Update the metric."""
         self.cm.update(y_true, y_pred, w)
 
@@ -48,7 +48,7 @@ def revert(
         y_pred: dict[str | int, base.typing.ClfTarget]
         | dict[str | int, dict[base.typing.ClfTarget, float]],
         w=1.0,
-    ):
+    ) -> None:
         """Revert the metric."""
         self.cm.revert(y_true, y_pred, w)
 
@@ -72,15 +72,15 @@ def update(
         self,
         y_true: dict[str | int, float | int],
         y_pred: dict[str | int, float | int],
-    ):
+    ) -> None:
         """Update the metric."""
 
     @abc.abstractmethod
     def revert(
         self,
         y_true: dict[str | int, float | int],
         y_pred: dict[str | int, float | int],
-    ):
+    ) -> None:
         """Revert the metric."""
 
     def works_with(self, model) -> bool:

diff --git a/river/neighbors/base.py b/river/neighbors/base.py
@@ -37,7 +37,7 @@ def __init__(self, dist_func: DistanceFunc | FunctionWrapper):
         self.dist_func = dist_func
 
     @abc.abstractmethod
-    def append(self, item: typing.Any, **kwargs):
+    def append(self, item: typing.Any, **kwargs) -> None:
         pass
 
     @abc.abstractmethod

diff --git a/river/neighbors/knn_classifier.py b/river/neighbors/knn_classifier.py
@@ -143,8 +143,6 @@ def _run_class_cleanup(self):
                 self.clean_up_classes()
                 self._cleanup_counter = self.cleanup_every
 
-        return self
-
     def predict_proba_one(self, x, **kwargs):
         nearest = self._nn.search((x, None), n_neighbors=self.n_neighbors, **kwargs)