Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NumPy 2.0 compatibility #1632

Merged
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/install-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ runs:
uses: actions/cache@v4
with:
path: ~/.local # the path depends on the OS
key: poetry-2 # increment to reset cache
key: poetry-5 # increment to reset cache
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved

- name: Install poetry
uses: snok/install-poetry@v1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/code-quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
ubuntu:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved

- name: Build River
uses: ./.github/actions/install-env
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Build River
uses: ./.github/actions/install-env
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up rust
if: matrix.os != 'ubuntu-20.04' && matrix.os != 'ubuntu-22.04'
Expand Down Expand Up @@ -104,7 +104,7 @@ jobs:
name: Build source distribution
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Build River
uses: ./.github/actions/install-env
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ jobs:
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Build River
uses: ./.github/actions/install-env
with:
python-version: "3.12"

- name: Cache River datasets
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/river_data
key: ${{ runner.os }}

- name: Cache scikit-learn datasets
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/scikit_learn_data
key: ${{ runner.os }}
Expand Down
31 changes: 9 additions & 22 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,23 @@
import platform
from distutils.command.build_ext import build_ext
from distutils.errors import CCompilerError, DistutilsExecError, DistutilsPlatformError
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved

import numpy
import setuptools
from Cython.Build import cythonize
from setuptools.command.build_ext import build_ext
from setuptools.errors import CCompilerError
from setuptools_rust import Binding, RustExtension

try:
from numpy import __version__ as numpy_version
from numpy import get_include
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy"])
from numpy import __version__ as numpy_version
from numpy import get_include

try:
from Cython.Build import cythonize
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "Cython"])
from Cython.Build import cythonize # type: ignore


agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved
ext_modules = cythonize(
module_list=[
setuptools.Extension(
"*",
sources=["**/*.pyx"],
include_dirs=[get_include()],
sources=["river/**/*.pyx"],
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved
include_dirs=[numpy.get_include()],
libraries=[] if platform.system() == "Windows" else ["m"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
)
],
compiler_directives={
"language_level": 3,
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved
"binding": True,
"embedsignature": True,
},
Expand All @@ -47,13 +34,13 @@ class ExtBuilder(build_ext):
def run(self):
try:
build_ext.run(self)
except (DistutilsPlatformError, FileNotFoundError):
except (FileNotFoundError):
raise BuildFailed("File not found. Could not compile C extension.")

def build_extension(self, ext):
try:
build_ext.build_extension(self, ext)
except (CCompilerError, DistutilsExecError, DistutilsPlatformError, ValueError):
except (CCompilerError, ValueError):
raise BuildFailed("Could not compile C extension.")


Expand Down
4,059 changes: 2,142 additions & 1,917 deletions poetry.lock

Large diffs are not rendered by default.

32 changes: 19 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
[build-system]
requires = ["poetry-core>=1.0.0", "cython", "numpy", "setuptools", "wheel", "setuptools-rust"]
requires = [
"poetry-core>=1.0.0",
"cython>3",
"numpy>=2.0.0",
"setuptools>70.1.0",
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved
"setuptools-rust",
]
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved
build-backend = "poetry.core.masonry.api"

[tool.poetry]
Expand All @@ -20,7 +26,7 @@ include = [
"river/datasets/*.zip",
"river/stream/*.zip",
"Cargo.toml",
"rust_src/**/*"
"rust_src/**/*",
]

[tool.poetry.build]
Expand All @@ -29,34 +35,34 @@ script = "build.py"

[tool.poetry.dependencies]
python = "^3.9"
numpy = "^1.23.0"
scipy = "^1.12.1"
pandas = "^2.1"
numpy = ">=1.23.0"
scipy = "^1.13.1"
pandas = "^2.2.3"

[tool.poetry.group.dev.dependencies]
graphviz = "^0.20.1"
gymnasium = "^0.29.0"
matplotlib = "^3.0.2"
matplotlib = "^3.8.4"
mypy = "^1.11.1"
pre-commit = "^3.5.0"
pytest = "^7.4.2"
ruff = "^0.4.10"
scikit-learn = "^1.3.1"
scikit-learn = "^1.5.1"
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved
sqlalchemy = "^2.0.22"
sympy = "^1.10.1"
pytest-xdist = {extras = ["psutil"], version = "^3.3.1"}
sympy = "^1.12.1"
pytest-xdist = { extras = ["psutil"], version = "^3.3.1" }
ipykernel = "^6.26.0"
ipython = "^8.17.2"
rich = "^13.6.0"
jupyter = "^1.0.0"
mike = "^2.0.0"
polars = "^0.20.8"
polars = "^1.1.0"

[tool.poetry.group.compat]
optional = true

[tool.poetry.group.compat.dependencies]
scikit-learn = "^1.0.1"
scikit-learn = "^1.5.1"
sqlalchemy = "^2.0.0"

[tool.poetry.group.docs]
Expand Down Expand Up @@ -84,7 +90,7 @@ optional = true

[tool.poetry.group.benchmark.dependencies]
"dominate" = "2.8.0"
"scikit-learn" = "1.3.1"
"scikit-learn" = "1.5.1"
"tabulate" = "0.9.0"
"vowpalwabbit" = "9.9.0"
"watermark" = "2.4.3"
Expand Down Expand Up @@ -161,7 +167,7 @@ module = [
"requests.*",
"gymnasium.*",
"sympy.*",
"polars.*"
"polars.*",
]
ignore_missing_imports = true

Expand Down
4 changes: 2 additions & 2 deletions river/compose/test_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ def test_issue_1253():
>>> model = group1 + group1 * group2
>>> XT = model.transform_many(X)

>>> XT.memory_usage().sum() // 1000
>>> XT.memory_usage().sum().item() // 1000
85

>>> XT.sparse.to_dense().memory_usage().sum() // 1000
>>> XT.sparse.to_dense().memory_usage().sum().item() // 1000
4455

>>> X, y = datasets.make_regression(n_samples=6, n_features=2)
Expand Down
2 changes: 1 addition & 1 deletion river/datasets/synth/anomaly_sine.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@ def __iter__(self):
self._generate_data()

for xi, yi in itertools.zip_longest(self.X, self.y if hasattr(self.y, "__iter__") else []):
yield dict(zip(["sine", "cosine"], xi)), bool(yi)
yield dict(zip(["sine", "cosine"], xi.tolist())), bool(yi)
5 changes: 4 additions & 1 deletion river/datasets/synth/logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def __iter__(self):
X, Y = self._make_logical(n_tiles=self.n_tiles, shuffle=self.shuffle)

for xi, yi in itertools.zip_longest(X, Y if hasattr(Y, "__iter__") else []):
yield dict(zip(self.feature_names, xi)), dict(zip(self.target_names, yi))
yield (
dict(zip(self.feature_names, xi.tolist())),
dict(zip(self.target_names, yi.tolist())),
)

def _make_logical(self, n_tiles: int = 1, shuffle: bool = True):
"""Make toy dataset"""
Expand Down
2 changes: 1 addition & 1 deletion river/facto/ffm.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FFMClassifier(FFM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/fm.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FMClassifier(FM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/fwfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FwFMClassifier(FwFM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/hofm.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class HOFMClassifier(HOFM, base.Classifier):
Expand Down
10 changes: 6 additions & 4 deletions river/forest/adaptive_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,12 @@ def learn_one(self, x: dict, y: base.typing.Target, **kwargs):
# Update performance evaluator
self._metrics[i].update(
y_true=y,
y_pred=model.predict_proba_one(x)
if isinstance(self.metric, metrics.base.ClassificationMetric)
and not self.metric.requires_labels
else y_pred,
y_pred=(
model.predict_proba_one(x)
if isinstance(self.metric, metrics.base.ClassificationMetric)
and not self.metric.requires_labels
else y_pred
),
)

k = poisson(rate=self.lambda_value, rng=self._rng)
Expand Down
2 changes: 1 addition & 1 deletion river/linear_model/bayesian_lin_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def predict_one(self, x, with_dist=False):
"""

# Bishop equation 3.58
y_pred_mean = utils.math.dot(self._m, x)
y_pred_mean = 0.0 if not len(self._m) else utils.math.dot(self._m, x).item()
if not with_dist:
return y_pred_mean

Expand Down
2 changes: 1 addition & 1 deletion river/naive_bayes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def one_hot_encode(y: pd.Series) -> pd.DataFrame:
"""
classes = np.unique(y)
indices = np.searchsorted(classes, y)
indptr = np.hstack((0, np.cumsum(np.in1d(y, classes))))
indptr = np.hstack((0, np.cumsum(np.isin(y, classes))))
data = np.empty_like(indices)
data.fill(1)
return pd.DataFrame.sparse.from_spmatrix(
Expand Down
2 changes: 1 addition & 1 deletion river/optim/initializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class Normal(Initializer):
>>> init = optim.initializers.Normal(mu=0, sigma=1, seed=42)

>>> init(shape=1)
0.496714
np.float64(0.4967141...)

>>> init(shape=2)
array([-0.1382643 , 0.64768854])
Expand Down
4 changes: 2 additions & 2 deletions river/optim/newton.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def sherman_morrison(A_inv: dict, u: dict, v: dict) -> dict:

den = 1 + utils.math.dot(utils.math.dotvecmat(u, A_inv), v)

for k, v in utils.math.matmul2d(
for k, val in utils.math.matmul2d(
agriyakhetarpal marked this conversation as resolved.
Show resolved Hide resolved
utils.math.matmul2d(A_inv, utils.math.outer(u, v)), A_inv
).items():
A_inv[k] = A_inv.get(k, 0) - v / den
A_inv[k] = A_inv.get(k, 0) - val / den

return A_inv

Expand Down
2 changes: 1 addition & 1 deletion river/preprocessing/lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def transform_one(self, x):
# Sample empirical topic assignment:
_, components = self._compute_statistics_components(words_indexes_list)

return dict(enumerate(components))
return dict(enumerate(components.tolist()))

def _update_indexes(self, word_list: typing.Iterable[str]):
"""
Expand Down
8 changes: 5 additions & 3 deletions river/preprocessing/scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,12 @@ def learn_many(self, X: pd.DataFrame):
a = old_count / (old_count + new_count)
b = new_count / (old_count + new_count)

self.means[col] = a * old_mean + b * new_mean
self.means[col] = (a * old_mean + b * new_mean).item()
if self.with_std:
self.vars[col] = a * old_var + b * new_var + a * b * (old_mean - new_mean) ** 2
self.counts[col] += new_count
self.vars[col] = (
a * old_var + b * new_var + a * b * (old_mean - new_mean) ** 2
).item()
self.counts[col] += new_count.item()

def transform_many(self, X: pd.DataFrame):
"""Scale a mini-batch of features.
Expand Down
10 changes: 5 additions & 5 deletions river/proba/beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,20 @@ def revert(self, x):
else:
self.beta -= 1

def __call__(self, p: float):
def __call__(self, p: float) -> float:
return (
p ** (self.alpha - 1) * (1 - p) ** (self.beta - 1) / _beta_func(self.alpha, self.beta)
)

def sample(self):
def sample(self) -> float:
return self._rng.betavariate(self.alpha, self.beta)

@property
def mode(self):
def mode(self) -> float:
try:
return (self.alpha - 1) / (self.alpha + self.beta - 2)
except ZeroDivisionError:
return 0.5

def cdf(self, x):
return scipy.special.betainc(self.alpha, self.beta, x)
def cdf(self, x) -> float:
return scipy.special.betainc(self.alpha, self.beta, x).item()
Loading