From dea6d4507080a7ed135bdbe5054b1ec13b6c9b58 Mon Sep 17 00:00:00 2001
From: W0lfgunbl00d <elakrabasouissi@gmail.com>
Date: Wed, 20 Nov 2024 11:44:20 +0100
Subject: [PATCH 1/3] Create AUC_SGD.py

---
 river/linear_model/AUC_SGD.py | 53 +++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 river/linear_model/AUC_SGD.py

diff --git a/river/linear_model/AUC_SGD.py b/river/linear_model/AUC_SGD.py
new file mode 100644
index 0000000000..ba350de660
--- /dev/null
+++ b/river/linear_model/AUC_SGD.py
@@ -0,0 +1,53 @@
+import numpy as np
+
+def sigma_eps(z, eps):
+    return 1 / (1 + np.exp(-z / eps))
+
+
+class AUCMetric:
+    def __init__(self):
+        super().__init__()
+        self.positive_scores = []
+        self.negative_scores = []
+
+    def update(self, y_true, y_pred):
+        """Updates the metric with the new prediction and true label."""
+        if y_true == 1:
+            self.positive_scores.append(y_pred)
+        else:
+            self.negative_scores.append(y_pred)
+        return self
+
+    def get(self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01):
+        """
+        Implements the stochastic gradient ascent method to optimize theta and computes the AUC
+        based on the accumulated scores.
+
+        Parameters:
+        - X_train: Training feature matrix.
+        - y_train: Training labels.
+        - X_test: Test feature matrix.
+        - y_test: Test labels.
+        - epochs: Number of training epochs.
+        - lr: Initial learning rate.
+        - n_mc: Number of Monte Carlo samples for gradient estimation.
+        - gamma: Learning rate discount factor.
+        - eps: Smoothing parameter for the sigmoid function.
+
+        Returns:
+        - auc: Final AUC score based on the accumulated scores.
+        """
+
+        # Define the stochastic gradient function
+    def stochastic_gradient(theta, X1, X0, N, eps, random_state=1):
+        np.random.seed(random_state)
+        indices_1 = np.random.choice(np.arange(X1.shape[0]), size=N)
+        indices_0 = np.random.choice(np.arange(X0.shape[0]), size=N)
+        X1_, X0_ = X1[indices_1], X0[indices_0]
+        avg = np.zeros_like(theta)
+        for xi, xj in zip(X1_, X0_):
+            dx = xj - xi
+            sig = sigma_eps(np.dot(theta, dx))
+            avg += sig * (1 - sig) * dx
+        return avg / (N * eps)
+

From ac717883b557964fa4babd645d9e7092e818e57d Mon Sep 17 00:00:00 2001
From: W0lfgunbl00d <elakrabasouissi@gmail.com>
Date: Sun, 1 Dec 2024 14:48:26 +0100
Subject: [PATCH 2/3] AUC_SGD

---
 river/linear_model/AUC_SGD.py  | 166 +++++++++++++++++++++++++++------
 river/linear_model/__init__.py |   2 +
 2 files changed, 138 insertions(+), 30 deletions(-)

diff --git a/river/linear_model/AUC_SGD.py b/river/linear_model/AUC_SGD.py
index ba350de660..c9a20fc47f 100644
--- a/river/linear_model/AUC_SGD.py
+++ b/river/linear_model/AUC_SGD.py
@@ -1,53 +1,159 @@
+from __future__ import annotations
+
 import numpy as np
 
-def sigma_eps(z, eps):
-    return 1 / (1 + np.exp(-z / eps))
 
+class AUC_SGD:
+    """
+        AUC Stochastic Gradient Descent (SGD)
+
+        This class implements an SGD-based optimization method for maximizing the AUC (Area Under the Curve)
+        of a binary classifier assuming a linear regression model.
+
+        Attributes
+        ----------
+        epochs : int
+            Number of training epochs.
+        lr : float
+            Initial learning rate for gradient descent updates.
+        n_mc : int
+            Number of Monte Carlo samples used for estimating gradients.
+        gamma : float
+            Learning rate decay parameter.
+        eps : float
+            Smoothing parameter for numerical stability.
+
+        Methods
+        -------
+
+        getTrain(X_train, y_train):
+            Returns the Prediction to maximize training AUC score.
+        getTest(X_test, y_test):
+            Returns the Prediction to maximize testing AUC score.
 
-class AUCMetric:
-    def __init__(self):
+        Examples
+        --------
+        >>> from river import linear_model
+        >>> from sklearn.metrics import roc_auc_score
+        >>> from sklearn.datasets import make_classification
+        >>> from sklearn.model_selection import train_test_split
+        >>> from sklearn.linear_model import LogisticRegression
+        >>> X, y = make_classification(n_samples=2000, n_informative=9, n_redundant=0, n_repeated=0, random_state=2)
+        >>> X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=4)
+        >>> base = LogisticRegression().fit(X_train, y_train)
+        >>> X1 = X_train[y_train==1]
+        >>> X0 = X_train[y_train==0]
+        >>> model = linear_model.AUC_SGD()
+        >>> np.random.seed(123)
+        >>> theta = np.random.randn(X_train[0].shape[0])
+        >>> test_auc = model.getTest(X_train, X_test, y_train)
+        >>> train_auc = model.getTrain(X_train, y_train)
+        >>> print(roc_auc_score(y_train, train_auc))
+        0.8899135830932864
+        >>> print(roc_auc_score(y_test, test_auc))
+        0.8849634963496349
+        """
+
+    def __init__(self, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01):
         super().__init__()
-        self.positive_scores = []
-        self.negative_scores = []
+        self.epochs = epochs
+        self.lr = lr
+        self.n_mc = n_mc
+        self.gamma = gamma
+        self.eps = eps
 
-    def update(self, y_true, y_pred):
-        """Updates the metric with the new prediction and true label."""
-        if y_true == 1:
-            self.positive_scores.append(y_pred)
+    def sigma_eps(self, z, eps):
+        q = - z / eps
+        if abs(q) < 35:
+            return 1 / (1 + np.exp(q))
+        elif q > 0:
+            return 0
         else:
-            self.negative_scores.append(y_pred)
-        return self
+            return 1
+
 
-    def get(self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01):
+    def stochastic_gradient(self, theta, X1, X0, N=1000, eps=0.01, random_state=1):
         """
-        Implements the stochastic gradient ascent method to optimize theta and computes the AUC
-        based on the accumulated scores.
+        Computes the stochastic gradient of the AUC objective.
 
-        Parameters:
-        - X_train: Training feature matrix.
-        - y_train: Training labels.
-        - X_test: Test feature matrix.
-        - y_test: Test labels.
-        - epochs: Number of training epochs.
-        - lr: Initial learning rate.
-        - n_mc: Number of Monte Carlo samples for gradient estimation.
-        - gamma: Learning rate discount factor.
-        - eps: Smoothing parameter for the sigmoid function.
+        This function calculates the gradient of the AUC metric with respect to the parameter `theta` using
+        Monte Carlo sampling. Positive and negative samples are drawn to estimate the difference in predictions.
 
-        Returns:
-        - auc: Final AUC score based on the accumulated scores.
+        Parameters
+            ----------
+        theta : numpy.ndarray
+            The parameter vector (weights) of the model.
+        X1 : numpy.ndarray
+            Feature matrix for the positive class (label = 1).
+        X0 : numpy.ndarray
+            Feature matrix for the negative class (label = 0).
+        N : int, optional, default=1000
+            Number of Monte Carlo samples to use for gradient estimation.
+        eps : float, optional, default=0.01
+            Smoothing parameter for the sigmoid function.
+        random_state : int, optional, default=1
+            Random seed for reproducibility.
+
+        Returns
+        -------
+        numpy.ndarray
+            The estimated gradient vector.
         """
 
-        # Define the stochastic gradient function
-    def stochastic_gradient(theta, X1, X0, N, eps, random_state=1):
         np.random.seed(random_state)
         indices_1 = np.random.choice(np.arange(X1.shape[0]), size=N)
         indices_0 = np.random.choice(np.arange(X0.shape[0]), size=N)
         X1_, X0_ = X1[indices_1], X0[indices_0]
         avg = np.zeros_like(theta)
+
         for xi, xj in zip(X1_, X0_):
             dx = xj - xi
-            sig = sigma_eps(np.dot(theta, dx))
+            sig = self.sigma_eps(np.dot(theta, dx), eps)
             avg += sig * (1 - sig) * dx
         return avg / (N * eps)
 
+    def compute(self, X_train, X_test, y_train):
+        X1 = X_train[y_train == 1]
+        X0 = X_train[y_train == 0]
+        np.random.seed(123)
+        theta = np.random.randn(X_train[0].shape[0])
+        epochs_list = list(range(self.epochs))
+
+        for seed, epoch in enumerate(epochs_list):
+            # learning rate scheduler
+            self.lr = self.lr / (1 + self.gamma)
+
+            theta = theta - self.lr * self.stochastic_gradient(theta, X1, X0, N=self.n_mc, random_state=seed)
+
+        if X_test is not None:
+            return theta @ X_test.T
+        else:
+            return theta @ X_train.T
+
+    def getTrain(self, X_train, y_train):
+        """
+        Implements the stochastic gradient ascent method to optimize theta for a maximised AUC training score.
+
+        Parameters:
+        - X_train: Training feature matrix.
+        - y_train: Training labels.
+
+        Returns:
+        - Prediction to maximize training AUC score.
+        """
+
+        return self.compute(X_train, None, y_train)
+
+    def getTest(self, X_train, X_test, y_train):
+        """
+        Implements the stochastic gradient ascent method to optimize theta for a maximised AUC testing score.
+
+        Parameters:
+        - X_train: Training feature matrix.
+        - X_test: Testing feature matrix.
+        - y_train: Training labels.
+
+        Returns:
+        - Prediction to maximize testing AUC score.
+        """
+        return self.compute(X_train, X_test, y_train)
diff --git a/river/linear_model/__init__.py b/river/linear_model/__init__.py
index 756720490a..2c3847a33e 100644
--- a/river/linear_model/__init__.py
+++ b/river/linear_model/__init__.py
@@ -10,6 +10,7 @@
 from .pa import PAClassifier, PARegressor
 from .perceptron import Perceptron
 from .softmax import SoftmaxRegression
+from .AUC_SGD import AUC_SGD
 
 __all__ = [
     "base",
@@ -21,4 +22,5 @@
     "PARegressor",
     "Perceptron",
     "SoftmaxRegression",
+    "AUC_SGD",
 ]

From c1d08937611d73bb95d831802b267f066bd0d7db Mon Sep 17 00:00:00 2001
From: W0lfgunbl00d <elakrabasouissi@gmail.com>
Date: Sun, 1 Dec 2024 14:49:14 +0100
Subject: [PATCH 3/3] Update AUC_SGD.py

---
 river/linear_model/AUC_SGD.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/river/linear_model/AUC_SGD.py b/river/linear_model/AUC_SGD.py
index c9a20fc47f..3fd2f51a75 100644
--- a/river/linear_model/AUC_SGD.py
+++ b/river/linear_model/AUC_SGD.py
@@ -132,7 +132,7 @@ def compute(self, X_train, X_test, y_train):
 
     def getTrain(self, X_train, y_train):
         """
-        Implements the stochastic gradient ascent method to optimize theta for a maximised AUC training score.
+        Implements the stochastic gradient ascent method to optimize theta for a maximized AUC training score.
 
         Parameters:
         - X_train: Training feature matrix.
@@ -146,7 +146,7 @@ def getTrain(self, X_train, y_train):
 
     def getTest(self, X_train, X_test, y_train):
         """
-        Implements the stochastic gradient ascent method to optimize theta for a maximised AUC testing score.
+        Implements the stochastic gradient ascent method to optimize theta for a maximized AUC testing score.
 
         Parameters:
         - X_train: Training feature matrix.