Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove scikit-learn<1.6.0 limit #1451

Merged
merged 8 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
install_requires=[
"plotly>=5.10.0,<6",
"statsmodels>=0.12.2",
"scikit-learn>=1.0.1,<1.6.0",
"scikit-learn>=1.0.1",
"pandas[parquet]>=1.3.5",
"numpy>=1.22.0,<2.1",
"nltk>=3.6.7",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ def calculate(self, data: InputData) -> ClassificationDummyMetricResults:
coeff_precision = min(1.0, (1 - threshold) / 0.5)
neg_label_precision = precision_score(target, dummy_preds, pos_label=labels[1]) * coeff_precision
neg_label_recall = recall_score(target, dummy_preds, pos_label=labels[1]) * coeff_recall
f1_label2_value = 2 * neg_label_precision * neg_label_recall / (neg_label_precision + neg_label_recall)
f1_label2_value = (
2 * neg_label_precision * neg_label_recall / (neg_label_precision + neg_label_recall)
if (neg_label_precision + neg_label_recall) != 0
else float("nan")
)
metrics_matrix = {
str(labels[0]): ClassMetric(
precision=current_dummy.precision,
Expand Down Expand Up @@ -242,16 +246,19 @@ def correction_for_threshold(
fpr = dummy_results.fpr * coeff_recall
fnr = dummy_results.fnr * coeff_precision

f1_denominator = dummy_results.precision * coeff_precision + dummy_results.recall * coeff_recall

f1 = (
2 * dummy_results.precision * coeff_precision * dummy_results.recall * coeff_recall / f1_denominator
if f1_denominator != 0
else float("nan")
)

return DatasetClassificationQuality(
accuracy=dummy_results.accuracy,
precision=dummy_results.precision * coeff_precision,
recall=dummy_results.recall * coeff_recall,
f1=2
* dummy_results.precision
* coeff_precision
* dummy_results.recall
* coeff_recall
/ (dummy_results.precision * coeff_precision + dummy_results.recall * coeff_recall),
f1=f1,
roc_auc=0.5,
log_loss=None,
tpr=tpr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error

from evidently.base_metric import InputData
from evidently.base_metric import Metric
from evidently.base_metric import MetricResult
from evidently.metrics.regression_performance.regression_quality import RegressionQualityMetric
from evidently.metrics.utils import root_mean_squared_error_compat
from evidently.model.widget import BaseWidgetInfo
from evidently.options.base import AnyOptions
from evidently.renderers.base_renderer import MetricRenderer
Expand Down Expand Up @@ -76,10 +76,9 @@ def calculate(self, data: InputData) -> RegressionDummyMetricResults:
)
# rmse
dummy_preds = data.current_data[target_name].mean()
rmse_default = mean_squared_error(
rmse_default = root_mean_squared_error_compat(
y_true=data.current_data[target_name],
y_pred=[dummy_preds] * data.current_data.shape[0],
squared=False,
)
# mape default values
# optimal constant for mape
Expand Down Expand Up @@ -118,10 +117,9 @@ def calculate(self, data: InputData) -> RegressionDummyMetricResults:
)
# rmse
dummy_preds = data.reference_data[target_name].mean()
rmse_by_ref = mean_squared_error(
rmse_by_ref = root_mean_squared_error_compat(
y_true=data.current_data[target_name],
y_pred=[dummy_preds] * data.current_data.shape[0],
squared=False,
)
# mape default values
# optimal constant for mape
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

from evidently.base_metric import InputData
Expand All @@ -19,6 +18,7 @@
from evidently.metrics.regression_performance.objects import RegressionMetricsScatter
from evidently.metrics.regression_performance.utils import apply_func_to_binned_data
from evidently.metrics.utils import make_target_bins_for_reg_plots
from evidently.metrics.utils import root_mean_squared_error_compat
from evidently.model.widget import BaseWidgetInfo
from evidently.renderers.base_renderer import MetricRenderer
from evidently.renderers.base_renderer import default_renderer
Expand Down Expand Up @@ -123,10 +123,9 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
y_true=data.current_data[data.column_mapping.target],
y_pred=data.current_data[data.column_mapping.prediction],
)
rmse_score_value = mean_squared_error(
rmse_score_value = root_mean_squared_error_compat(
y_true=data.current_data[data.column_mapping.target],
y_pred=data.current_data[data.column_mapping.prediction],
squared=False,
)

# mae default values
Expand All @@ -138,16 +137,14 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
# rmse default values
rmse_ref = None
if data.reference_data is not None:
rmse_ref = mean_squared_error(
rmse_ref = root_mean_squared_error_compat(
y_true=data.reference_data[data.column_mapping.target],
y_pred=data.reference_data[data.column_mapping.prediction],
squared=False,
)
dummy_preds = data.current_data[data.column_mapping.target].mean()
rmse_default = mean_squared_error(
rmse_default = root_mean_squared_error_compat(
y_true=data.current_data[data.column_mapping.target],
y_pred=[dummy_preds] * data.current_data.shape[0],
squared=False,
)
# mape default values
# optimal constant for mape
Expand Down Expand Up @@ -211,7 +208,7 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
["r2_score", "rmse", "mean_abs_error", "mean_abs_perc_error"],
[
r2_score,
lambda x, y: mean_squared_error(x, y, squared=False),
lambda x, y: root_mean_squared_error_compat(x, y),
mean_absolute_error,
mean_absolute_percentage_error,
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

from evidently.base_metric import InputData
Expand All @@ -21,6 +20,7 @@
from evidently.metrics.regression_performance.regression_performance_metrics import RegressionMetrics
from evidently.metrics.regression_performance.utils import apply_func_to_binned_data
from evidently.metrics.utils import make_target_bins_for_reg_plots
from evidently.metrics.utils import root_mean_squared_error_compat
from evidently.model.widget import BaseWidgetInfo
from evidently.renderers.base_renderer import MetricRenderer
from evidently.renderers.base_renderer import default_renderer
Expand Down Expand Up @@ -116,10 +116,9 @@ def calculate(self, data: InputData) -> RegressionQualityMetricResults:
y_true=data.current_data[target_name],
y_pred=data.current_data[prediction_name],
)
rmse_score_value = mean_squared_error(
rmse_score_value = root_mean_squared_error_compat(
y_true=data.current_data[target_name],
y_pred=data.current_data[prediction_name],
squared=False,
)

# mae default values
Expand All @@ -131,16 +130,14 @@ def calculate(self, data: InputData) -> RegressionQualityMetricResults:
# rmse default values
rmse_ref = None
if data.reference_data is not None:
rmse_ref = mean_squared_error(
rmse_ref = root_mean_squared_error_compat(
y_true=data.reference_data[target_name],
y_pred=data.reference_data[prediction_name],
squared=False,
)
dummy_preds = data.current_data[target_name].mean()
rmse_default = mean_squared_error(
rmse_default = root_mean_squared_error_compat(
y_true=data.current_data[target_name],
y_pred=[dummy_preds] * data.current_data.shape[0],
squared=False,
)
# mape default values
# optimal constant for mape
Expand Down Expand Up @@ -207,7 +204,7 @@ def calculate(self, data: InputData) -> RegressionQualityMetricResults:
["r2_score", "rmse", "mean_abs_error", "mean_abs_perc_error"],
[
r2_score,
lambda x, y: mean_squared_error(x, y, squared=False),
lambda x, y: root_mean_squared_error_compat(x, y),
mean_absolute_error,
mean_absolute_percentage_error,
],
Expand Down
21 changes: 21 additions & 0 deletions src/evidently/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
import pandas as pd
import sklearn.metrics

use_new_root_mean_squared_error = True if hasattr(sklearn.metrics, "root_mean_squared_error") else False
DimaAmega marked this conversation as resolved.
Show resolved Hide resolved


def root_mean_squared_error_compat(y_true, y_pred):
"""
Compute the Root Mean Squared Error (RMSE) in a way that is compatible
with both old and new versions of scikit-learn.

In scikit-learn >= 1.6.0, uses sklearn.metrics.root_mean_squared_error.
In earlier versions, uses mean_squared_error with squared=False.
"""
if use_new_root_mean_squared_error:
from sklearn.metrics import root_mean_squared_error

return root_mean_squared_error(y_true, y_pred)

from sklearn.metrics import mean_squared_error

return mean_squared_error(y_true, y_pred, squared=False)


def make_target_bins_for_reg_plots(
Expand Down
Loading