Skip to content

Commit

Permalink
Type hints for 'return self' (#1601)
Browse files Browse the repository at this point in the history
* Make all learn and update methods return None

Changing the return types in the base classes means MyPy can detect when
a method returns an unexpected value.

* Propagate None returns to child classes

* Remove wrong returns

* Add a note in the changelog

* Change the return type of SortedWindow.append

* Fix new MyPy 11 errors
  • Loading branch information
e10e3 authored Aug 31, 2024
1 parent f982476 commit 25f9f89
Show file tree
Hide file tree
Showing 29 changed files with 67 additions and 80 deletions.
1 change: 1 addition & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Unreleased

- The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.
- The methods `learn_one`, `learn_many`, `update`, `revert`, and `append` now return `None`.

## cluster

Expand Down
6 changes: 3 additions & 3 deletions river/anomaly/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def _supervised(self):
return False

@abc.abstractmethod
def learn_one(self, x: dict):
def learn_one(self, x: dict) -> None:
"""Update the model.
Parameters
Expand Down Expand Up @@ -48,7 +48,7 @@ class SupervisedAnomalyDetector(base.Estimator):
"""A supervised anomaly detector."""

@abc.abstractmethod
def learn_one(self, x: dict, y: base.typing.Target):
def learn_one(self, x: dict, y: base.typing.Target) -> None:
"""Update the model.
Parameters
Expand Down Expand Up @@ -137,7 +137,7 @@ def score_one(self, *args, **kwargs):
"""
return self.anomaly_detector.score_one(*args, **kwargs)

def learn_one(self, *args, **learn_kwargs):
def learn_one(self, *args, **learn_kwargs) -> None:
"""Update the anomaly filter and the underlying anomaly detector.
Parameters
Expand Down
3 changes: 1 addition & 2 deletions river/anomaly/pad.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class PredictiveAnomalyDetection(anomaly.base.SupervisedAnomalyDetector):
>>> for t, (x, y) in enumerate(datasets.AirlinePassengers()):
... score = PAD.score_one(None, y)
... PAD = PAD.learn_one(None, y)
... PAD.learn_one(None, y)
... scores.append(score)
>>> print(scores[-1])
Expand Down Expand Up @@ -131,7 +131,6 @@ def learn_one(self, x: dict | None, y: base.typing.Target | float):
self.predictive_model.learn_one(y=y, x=x)
else:
self.predictive_model.learn_one(x=x, y=y)
return self

def score_one(self, x: dict, y: base.typing.Target):
# Return the predicted value of x from the predictive model, first by checking whether
Expand Down
4 changes: 2 additions & 2 deletions river/base/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Classifier(estimator.Estimator):
"""A classifier."""

@abc.abstractmethod
def learn_one(self, x: dict, y: base.typing.ClfTarget) -> Classifier:
def learn_one(self, x: dict, y: base.typing.ClfTarget) -> None:
"""Update the model with a set of features `x` and a label `y`.
Parameters
Expand Down Expand Up @@ -81,7 +81,7 @@ class MiniBatchClassifier(Classifier):
"""A classifier that can operate on mini-batches."""

@abc.abstractmethod
def learn_many(self, X: pd.DataFrame, y: pd.Series):
def learn_many(self, X: pd.DataFrame, y: pd.Series) -> None:
"""Update the model with a mini-batch of features `X` and boolean targets `y`.
Parameters
Expand Down
2 changes: 1 addition & 1 deletion river/base/clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def _supervised(self):
return False

@abc.abstractmethod
def learn_one(self, x: dict) -> Clusterer:
def learn_one(self, x: dict) -> None:
"""Update the model with a set of features `x`.
Parameters
Expand Down
2 changes: 1 addition & 1 deletion river/base/drift_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class BinaryDriftDetector(_BaseDriftDetector):
"""A drift detector for binary data."""

@abc.abstractmethod
def update(self, x: bool) -> BinaryDriftDetector:
def update(self, x: bool) -> None:
"""Update the detector with a single boolean input.
Parameters
Expand Down
4 changes: 2 additions & 2 deletions river/base/multi_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class MultiLabelClassifier(Estimator, abc.ABC):
"""Multi-label classifier."""

@abc.abstractmethod
def learn_one(self, x: dict, y: dict[FeatureName, bool]):
def learn_one(self, x: dict, y: dict[FeatureName, bool]) -> None:
"""Update the model with a set of features `x` and the labels `y`.
Parameters
Expand Down Expand Up @@ -68,7 +68,7 @@ class MultiTargetRegressor(Estimator, abc.ABC):
"""Multi-target regressor."""

@abc.abstractmethod
def learn_one(self, x: dict, y: dict[FeatureName, RegTarget], **kwargs):
def learn_one(self, x: dict, y: dict[FeatureName, RegTarget], **kwargs) -> None:
"""Fits to a set of features `x` and a real-valued target `y`.
Parameters
Expand Down
4 changes: 2 additions & 2 deletions river/base/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Regressor(estimator.Estimator):
"""A regressor."""

@abc.abstractmethod
def learn_one(self, x: dict, y: base.typing.RegTarget):
def learn_one(self, x: dict, y: base.typing.RegTarget) -> None:
"""Fits to a set of features `x` and a real-valued target `y`.
Parameters
Expand Down Expand Up @@ -47,7 +47,7 @@ class MiniBatchRegressor(Regressor):
"""A regressor that can operate on mini-batches."""

@abc.abstractmethod
def learn_many(self, X: pd.DataFrame, y: pd.Series):
def learn_many(self, X: pd.DataFrame, y: pd.Series) -> None:
"""Update the model with a mini-batch of features `X` and real-valued targets `y`.
Parameters
Expand Down
8 changes: 4 additions & 4 deletions river/base/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class Transformer(base.Estimator, BaseTransformer):
def _supervised(self):
return False

def learn_one(self, x: dict):
def learn_one(self, x: dict) -> None:
"""Update with a set of features `x`.
A lot of transformers don't actually have to do anything during the `learn_one` step
Expand All @@ -81,7 +81,7 @@ class SupervisedTransformer(base.Estimator, BaseTransformer):
def _supervised(self):
return True

def learn_one(self, x: dict, y: base.typing.Target):
def learn_one(self, x: dict, y: base.typing.Target) -> None:
"""Update with a set of features `x` and a target `y`.
Parameters
Expand Down Expand Up @@ -113,7 +113,7 @@ def transform_many(self, X: pd.DataFrame) -> pd.DataFrame:
"""

def learn_many(self, X: pd.DataFrame):
def learn_many(self, X: pd.DataFrame) -> None:
"""Update with a mini-batch of features.
A lot of transformers don't actually have to do anything during the `learn_many` step
Expand All @@ -138,7 +138,7 @@ def _supervised(self):
return True

@abc.abstractmethod
def learn_many(self, X: pd.DataFrame, y: pd.Series):
def learn_many(self, X: pd.DataFrame, y: pd.Series) -> None:
"""Update the model with a mini-batch of features `X` and targets `y`.
Parameters
Expand Down
1 change: 0 additions & 1 deletion river/cluster/textclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ def learn_one(self, x, t=None, w=None):

## increment observation counter
self.n += 1
return clusterId

## predicts the cluster number. The type specifies whether this should happen on micro-cluster
## or macro-cluster level
Expand Down
2 changes: 1 addition & 1 deletion river/compat/sklearn_to_river.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def convert_sklearn_to_river(estimator: sklearn_base.BaseEstimator, classes: lis
(sklearn_base.RegressorMixin, SKL2RiverRegressor),
(
sklearn_base.ClassifierMixin,
functools.partial(SKL2RiverClassifier, classes=classes),
functools.partial(SKL2RiverClassifier, classes=classes), # type:ignore[arg-type]
),
]

Expand Down
2 changes: 0 additions & 2 deletions river/covariance/emp.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,5 +379,3 @@ def update_many(self, X: pd.DataFrame):
row = self._w[fi] * inv_cov[i]
for j, fj in enumerate(X):
self._inv_cov[min((fi, fj), (fj, fi))] = row[j]

return self
1 change: 0 additions & 1 deletion river/ensemble/ewa.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ def learn_predict_one(self, x, y):

def learn_one(self, x, y):
self.learn_predict_one(x, y)
return self

def predict_one(self, x):
return sum(model.predict_one(x) * weight for model, weight in zip(self, self.weights))
2 changes: 1 addition & 1 deletion river/feature_extraction/vectorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def __init__(
# Stop word removal
if self.stop_words:
self.processing_steps.append(
functools.partial(remove_stop_words, stop_words=stop_words)
functools.partial(remove_stop_words, stop_words=self.stop_words)
)

# n-grams
Expand Down
6 changes: 2 additions & 4 deletions river/linear_model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ def _fit(self, x, y, w, get_grad):

self._update_weights(x)

return self

def _update_weights(self, x):
# L1 cumulative penalty helper

Expand Down Expand Up @@ -161,7 +159,7 @@ def _eval_gradient_one(self, x: dict, y: float, w: float) -> tuple[dict, float]:

return (loss_gradient * utils.VectorDict(x), loss_gradient)

def learn_one(self, x, y, w=1.0):
def learn_one(self, x, y, w=1.0) -> None:
with self._learn_mode(x):
self._fit(x, y, w, get_grad=self._eval_gradient_one)

Expand All @@ -188,7 +186,7 @@ def _eval_gradient_many(

return dict(zip(X.columns, gradient)), loss_gradient.mean()

def learn_many(self, X: pd.DataFrame, y: pd.Series, w: float | pd.Series = 1):
def learn_many(self, X: pd.DataFrame, y: pd.Series, w: float | pd.Series = 1) -> None:
self._y_name = y.name
with self._learn_mode(set(X)):
self._fit(X, y, w, get_grad=self._eval_gradient_many)
28 changes: 14 additions & 14 deletions river/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ class Metric(base.Base, abc.ABC):
"""Mother class for all metrics."""

@abc.abstractmethod
def update(self, y_true, y_pred) -> Metric:
def update(self, y_true, y_pred) -> None:
"""Update the metric."""

@abc.abstractmethod
def revert(self, y_true, y_pred) -> Metric:
def revert(self, y_true, y_pred) -> None:
"""Revert the metric."""

@abc.abstractmethod
Expand Down Expand Up @@ -89,14 +89,14 @@ def __init__(self, cm=None):
cm = metrics.ConfusionMatrix()
self.cm = cm

def update(self, y_true, y_pred, w=1.0):
def update(self, y_true, y_pred, w=1.0) -> None:
self.cm.update(
y_true,
y_pred,
w=w,
)

def revert(self, y_true, y_pred, w=1.0):
def revert(self, y_true, y_pred, w=1.0) -> None:
self.cm.revert(
y_true,
y_pred,
Expand Down Expand Up @@ -152,7 +152,7 @@ def update(
y_true: bool,
y_pred: bool | float | dict[bool, float],
w=1.0,
) -> BinaryMetric:
) -> None:
if self.requires_labels:
y_pred = y_pred == self.pos_val
return super().update(y_true == self.pos_val, y_pred, w)
Expand All @@ -162,7 +162,7 @@ def revert(
y_true: bool,
y_pred: bool | float | dict[bool, float],
w=1.0,
) -> BinaryMetric:
) -> None:
if self.requires_labels:
y_pred = y_pred == self.pos_val
return super().revert(y_true == self.pos_val, y_pred, w)
Expand Down Expand Up @@ -190,11 +190,11 @@ class RegressionMetric(Metric):
_fmt = ",.6f" # use commas to separate big numbers and show 6 decimals

@abc.abstractmethod
def update(self, y_true: numbers.Number, y_pred: numbers.Number) -> RegressionMetric:
def update(self, y_true: numbers.Number, y_pred: numbers.Number) -> None:
"""Update the metric."""

@abc.abstractmethod
def revert(self, y_true: numbers.Number, y_pred: numbers.Number) -> RegressionMetric:
def revert(self, y_true: numbers.Number, y_pred: numbers.Number) -> None:
"""Revert the metric."""

@property
Expand Down Expand Up @@ -227,7 +227,7 @@ def __init__(self, metrics, str_sep="\n"):
super().__init__(metrics)
self.str_sep = str_sep

def update(self, y_true, y_pred, w=1.0):
def update(self, y_true, y_pred, w=1.0) -> None:
# If the metrics are classification metrics, then we have to handle the case where some
# of the metrics require labels, whilst others need to be fed probabilities
if hasattr(self, "requires_labels") and not self.requires_labels:
Expand All @@ -241,7 +241,7 @@ def update(self, y_true, y_pred, w=1.0):
for m in self:
m.update(y_true, y_pred)

def revert(self, y_true, y_pred, w=1.0):
def revert(self, y_true, y_pred, w=1.0) -> None:
# If the metrics are classification metrics, then we have to handle the case where some
# of the metrics require labels, whilst others need to be fed probabilities
if hasattr(self, "requires_labels") and not self.requires_labels:
Expand Down Expand Up @@ -334,10 +334,10 @@ def __init__(self):
def _eval(self, y_true, y_pred):
pass

def update(self, y_true, y_pred, w=1.0):
def update(self, y_true, y_pred, w=1.0) -> None:
self._mean.update(x=self._eval(y_true, y_pred), w=w)

def revert(self, y_true, y_pred, w=1.0):
def revert(self, y_true, y_pred, w=1.0) -> None:
self._mean.revert(x=self._eval(y_true, y_pred), w=w)

def get(self):
Expand All @@ -353,11 +353,11 @@ class ClusteringMetric(base.Base, abc.ABC):
_fmt = ",.6f" # Use commas to separate big numbers and show 6 decimals

@abc.abstractmethod
def update(self, x, y_pred, centers, w=1.0) -> ClusteringMetric:
def update(self, x, y_pred, centers, w=1.0) -> None:
"""Update the metric."""

@abc.abstractmethod
def revert(self, x, y_pred, centers, w=1.0) -> ClusteringMetric:
def revert(self, x, y_pred, centers, w=1.0) -> None:
"""Revert the metric."""

@abc.abstractmethod
Expand Down
8 changes: 4 additions & 4 deletions river/metrics/multioutput/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def update(
y_pred: dict[str | int, base.typing.ClfTarget]
| dict[str | int, dict[base.typing.ClfTarget, float]],
w=1.0,
):
) -> None:
"""Update the metric."""
self.cm.update(y_true, y_pred, w)

Expand All @@ -48,7 +48,7 @@ def revert(
y_pred: dict[str | int, base.typing.ClfTarget]
| dict[str | int, dict[base.typing.ClfTarget, float]],
w=1.0,
):
) -> None:
"""Revert the metric."""
self.cm.revert(y_true, y_pred, w)

Expand All @@ -72,15 +72,15 @@ def update(
self,
y_true: dict[str | int, float | int],
y_pred: dict[str | int, float | int],
):
) -> None:
"""Update the metric."""

@abc.abstractmethod
def revert(
self,
y_true: dict[str | int, float | int],
y_pred: dict[str | int, float | int],
):
) -> None:
"""Revert the metric."""

def works_with(self, model) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion river/neighbors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self, dist_func: DistanceFunc | FunctionWrapper):
self.dist_func = dist_func

@abc.abstractmethod
def append(self, item: typing.Any, **kwargs):
def append(self, item: typing.Any, **kwargs) -> None:
pass

@abc.abstractmethod
Expand Down
2 changes: 0 additions & 2 deletions river/neighbors/knn_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,6 @@ def _run_class_cleanup(self):
self.clean_up_classes()
self._cleanup_counter = self.cleanup_every

return self

def predict_proba_one(self, x, **kwargs):
nearest = self._nn.search((x, None), n_neighbors=self.n_neighbors, **kwargs)

Expand Down
Loading

0 comments on commit 25f9f89

Please sign in to comment.