Skip to content

Commit

Permalink
remove scikit-learn<1.6.0 limit (#1451)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Mikhail Sveshnikov <[email protected]>
  • Loading branch information
DimaAmega and mike0sv authored Jan 30, 2025
1 parent aa7629f commit 2b39ee7
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 29 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
install_requires=[
"plotly>=5.10.0,<6",
"statsmodels>=0.12.2",
"scikit-learn>=1.0.1,<1.6.0",
"scikit-learn>=1.0.1",
"pandas[parquet]>=1.3.5",
"numpy>=1.22.0,<2.1",
"nltk>=3.6.7",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,12 @@ def calculate(self, data: InputData) -> ClassificationDummyMetricResults:
coeff_precision = min(1.0, (1 - threshold) / 0.5)
neg_label_precision = precision_score(target, dummy_preds, pos_label=labels[1]) * coeff_precision
neg_label_recall = recall_score(target, dummy_preds, pos_label=labels[1]) * coeff_recall
f1_label2_value = 2 * neg_label_precision * neg_label_recall / (neg_label_precision + neg_label_recall)
f1_label2_denominator = neg_label_precision + neg_label_recall
f1_label2_value = (
2 * neg_label_precision * neg_label_recall / f1_label2_denominator
if f1_label2_denominator != 0
else float("nan")
)
metrics_matrix = {
str(labels[0]): ClassMetric(
precision=current_dummy.precision,
Expand Down Expand Up @@ -242,16 +247,19 @@ def correction_for_threshold(
fpr = dummy_results.fpr * coeff_recall
fnr = dummy_results.fnr * coeff_precision

f1_denominator = dummy_results.precision * coeff_precision + dummy_results.recall * coeff_recall

f1 = (
2 * dummy_results.precision * coeff_precision * dummy_results.recall * coeff_recall / f1_denominator
if f1_denominator != 0
else float("nan")
)

return DatasetClassificationQuality(
accuracy=dummy_results.accuracy,
precision=dummy_results.precision * coeff_precision,
recall=dummy_results.recall * coeff_recall,
f1=2
* dummy_results.precision
* coeff_precision
* dummy_results.recall
* coeff_recall
/ (dummy_results.precision * coeff_precision + dummy_results.recall * coeff_recall),
f1=f1,
roc_auc=0.5,
log_loss=None,
tpr=tpr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error

from evidently.base_metric import InputData
from evidently.base_metric import Metric
from evidently.base_metric import MetricResult
from evidently.metrics.regression_performance.regression_quality import RegressionQualityMetric
from evidently.metrics.utils import root_mean_squared_error_compat
from evidently.model.widget import BaseWidgetInfo
from evidently.options.base import AnyOptions
from evidently.renderers.base_renderer import MetricRenderer
Expand Down Expand Up @@ -76,10 +76,9 @@ def calculate(self, data: InputData) -> RegressionDummyMetricResults:
)
# rmse
dummy_preds = data.current_data[target_name].mean()
rmse_default = mean_squared_error(
rmse_default = root_mean_squared_error_compat(
y_true=data.current_data[target_name],
y_pred=[dummy_preds] * data.current_data.shape[0],
squared=False,
)
# mape default values
# optimal constant for mape
Expand Down Expand Up @@ -118,10 +117,9 @@ def calculate(self, data: InputData) -> RegressionDummyMetricResults:
)
# rmse
dummy_preds = data.reference_data[target_name].mean()
rmse_by_ref = mean_squared_error(
rmse_by_ref = root_mean_squared_error_compat(
y_true=data.current_data[target_name],
y_pred=[dummy_preds] * data.current_data.shape[0],
squared=False,
)
# mape default values
# optimal constant for mape
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

from evidently.base_metric import InputData
Expand All @@ -19,6 +18,7 @@
from evidently.metrics.regression_performance.objects import RegressionMetricsScatter
from evidently.metrics.regression_performance.utils import apply_func_to_binned_data
from evidently.metrics.utils import make_target_bins_for_reg_plots
from evidently.metrics.utils import root_mean_squared_error_compat
from evidently.model.widget import BaseWidgetInfo
from evidently.renderers.base_renderer import MetricRenderer
from evidently.renderers.base_renderer import default_renderer
Expand Down Expand Up @@ -123,10 +123,9 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
y_true=data.current_data[data.column_mapping.target],
y_pred=data.current_data[data.column_mapping.prediction],
)
rmse_score_value = mean_squared_error(
rmse_score_value = root_mean_squared_error_compat(
y_true=data.current_data[data.column_mapping.target],
y_pred=data.current_data[data.column_mapping.prediction],
squared=False,
)

# mae default values
Expand All @@ -138,16 +137,14 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
# rmse default values
rmse_ref = None
if data.reference_data is not None:
rmse_ref = mean_squared_error(
rmse_ref = root_mean_squared_error_compat(
y_true=data.reference_data[data.column_mapping.target],
y_pred=data.reference_data[data.column_mapping.prediction],
squared=False,
)
dummy_preds = data.current_data[data.column_mapping.target].mean()
rmse_default = mean_squared_error(
rmse_default = root_mean_squared_error_compat(
y_true=data.current_data[data.column_mapping.target],
y_pred=[dummy_preds] * data.current_data.shape[0],
squared=False,
)
# mape default values
# optimal constant for mape
Expand Down Expand Up @@ -211,7 +208,7 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
["r2_score", "rmse", "mean_abs_error", "mean_abs_perc_error"],
[
r2_score,
lambda x, y: mean_squared_error(x, y, squared=False),
lambda x, y: root_mean_squared_error_compat(x, y),
mean_absolute_error,
mean_absolute_percentage_error,
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

from evidently.base_metric import InputData
Expand All @@ -21,6 +20,7 @@
from evidently.metrics.regression_performance.regression_performance_metrics import RegressionMetrics
from evidently.metrics.regression_performance.utils import apply_func_to_binned_data
from evidently.metrics.utils import make_target_bins_for_reg_plots
from evidently.metrics.utils import root_mean_squared_error_compat
from evidently.model.widget import BaseWidgetInfo
from evidently.renderers.base_renderer import MetricRenderer
from evidently.renderers.base_renderer import default_renderer
Expand Down Expand Up @@ -116,10 +116,9 @@ def calculate(self, data: InputData) -> RegressionQualityMetricResults:
y_true=data.current_data[target_name],
y_pred=data.current_data[prediction_name],
)
rmse_score_value = mean_squared_error(
rmse_score_value = root_mean_squared_error_compat(
y_true=data.current_data[target_name],
y_pred=data.current_data[prediction_name],
squared=False,
)

# mae default values
Expand All @@ -131,16 +130,14 @@ def calculate(self, data: InputData) -> RegressionQualityMetricResults:
# rmse default values
rmse_ref = None
if data.reference_data is not None:
rmse_ref = mean_squared_error(
rmse_ref = root_mean_squared_error_compat(
y_true=data.reference_data[target_name],
y_pred=data.reference_data[prediction_name],
squared=False,
)
dummy_preds = data.current_data[target_name].mean()
rmse_default = mean_squared_error(
rmse_default = root_mean_squared_error_compat(
y_true=data.current_data[target_name],
y_pred=[dummy_preds] * data.current_data.shape[0],
squared=False,
)
# mape default values
# optimal constant for mape
Expand Down Expand Up @@ -207,7 +204,7 @@ def calculate(self, data: InputData) -> RegressionQualityMetricResults:
["r2_score", "rmse", "mean_abs_error", "mean_abs_perc_error"],
[
r2_score,
lambda x, y: mean_squared_error(x, y, squared=False),
lambda x, y: root_mean_squared_error_compat(x, y),
mean_absolute_error,
mean_absolute_percentage_error,
],
Expand Down
21 changes: 21 additions & 0 deletions src/evidently/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
import pandas as pd
import sklearn.metrics

use_new_root_mean_squared_error = hasattr(sklearn.metrics, "root_mean_squared_error")


def root_mean_squared_error_compat(y_true, y_pred):
"""
Compute the Root Mean Squared Error (RMSE) in a way that is compatible
with both old and new versions of scikit-learn.
In scikit-learn >= 1.6.0, uses sklearn.metrics.root_mean_squared_error.
In earlier versions, uses mean_squared_error with squared=False.
"""
if use_new_root_mean_squared_error:
from sklearn.metrics import root_mean_squared_error

return root_mean_squared_error(y_true, y_pred)

from sklearn.metrics import mean_squared_error

return mean_squared_error(y_true, y_pred, squared=False)


def make_target_bins_for_reg_plots(
Expand Down

0 comments on commit 2b39ee7

Please sign in to comment.