queens-py · maxdinkel · Jan 9, 2025 · Oct 4, 2024 · Oct 8, 2024 · Dec 3, 2024
diff --git a/queens/stochastic_optimizers/adam.py b/queens/stochastic_optimizers/adam.py
@@ -52,6 +52,7 @@ def __init__(
         beta_1=0.9,
         beta_2=0.999,
         eps=1e-8,
+        learning_rate_decay=None,
     ):
         """Initialize optimizer.
 
@@ -68,6 +69,7 @@ def __init__(
             beta_1 (float): :math:`beta_1` parameter as described in [1]
             beta_2 (float): :math:`beta_1` parameter as described in [1]
             eps (float): Nugget term to avoid a division by values close to zero
+            learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
         """
         # pylint:disable=duplicate-code
         super().__init__(
@@ -78,6 +80,7 @@ def __init__(
             clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
             clip_by_value_threshold=clip_by_value_threshold,
             max_iteration=max_iteration,
+            learning_rate_decay=learning_rate_decay,
         )
         self.beta_1 = beta_1
         self.beta_2 = beta_2

diff --git a/queens/stochastic_optimizers/adamax.py b/queens/stochastic_optimizers/adamax.py
@@ -52,6 +52,7 @@ def __init__(
         beta_1=0.9,
         beta_2=0.999,
         eps=1e-8,
+        learning_rate_decay=None,
     ):
         """Initialize optimizer.
 
@@ -68,6 +69,7 @@ def __init__(
             beta_1 (float): :math:`beta_1` parameter as described in [1]
             beta_2 (float): :math:`beta_1` parameter as described in [1]
             eps (float): Nugget term to avoid a division by values close to zero
+            learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
         """
         super().__init__(
             learning_rate=learning_rate,
@@ -77,6 +79,7 @@ def __init__(
             clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
             clip_by_value_threshold=clip_by_value_threshold,
             max_iteration=max_iteration,
+            learning_rate_decay=learning_rate_decay,
         )
         self.beta_1 = beta_1
         self.beta_2 = beta_2

diff --git a/queens/stochastic_optimizers/learning_rate_decay.py b/queens/stochastic_optimizers/learning_rate_decay.py
@@ -17,6 +17,8 @@
 import abc
 import logging
 
+import numpy as np
+
 _logger = logging.getLogger(__name__)
 
 
@@ -65,6 +67,120 @@ def __call__(self, learning_rate, params, gradient):
         Returns:
             learning_rate (float): Adapted learning rate
         """
+        if self.iteration > 1:
+            learning_rate *= (self.iteration - 1) ** self.slope / self.iteration**self.slope
         self.iteration += 1
-        learning_rate /= self.iteration**self.slope
         return learning_rate
+
+
+class StepwiseLearningRateDecay(LearningRateDecay):
+    """Step-wise learning rate decay.
+
+    Attributes:
+        decay_factor (float): Decay factor
+        decay_interval (int): Decay interval
+        iteration (int): Iteration number
+    """
+
+    def __init__(self, decay_factor, decay_interval):
+        """Initialize StepwiseLearningRateDecay.
+
+        Args:
+            decay_factor (float): Decay factor
+            decay_interval (int): Decay interval
+        """
+        self.decay_factor = decay_factor
+        self.decay_interval = decay_interval
+        self.iteration = 0
+
+    def __call__(self, learning_rate, params, gradient):
+        """Adapt learning rate.
+
+        Args:
+            learning_rate (float): Current learning rate
+            params (np.array): Current parameters
+            gradient (np.array): Current gradient
+
+        Returns:
+            learning_rate (float): Adapted learning rate
+        """
+        if self.iteration >= self.decay_interval:
+            learning_rate *= self.decay_factor
+            self.iteration = 0
+            _logger.info("learning_rate=%.2e", learning_rate)
+        else:
+            self.iteration += 1
+        return learning_rate
+
+
+class DynamicLearningRateDecay(LearningRateDecay):
+    """Dynamic learning rate decay.
+
+    Attributes:
+        alpha (float): Decay factor
+        rho_min (float): Threshold for signal-to-noise ratio
+        k_min (int): Minimum number of iterations before learning rate is decreased
+        k (int): Iteration number
+        a (np.array): Sum of parameters
+        b (np.array): Sum of squared parameters
+        c (np.array): Sum of parameters times iteration number
+    """
+
+    def __init__(self, alpha=0.1, rho_min=1.0):
+        """Initialize DynamicLearningRateDecay.
+
+        Args:
+            alpha (float): Decay factor
+            rho_min (float): Threshold for signal-to-noise ratio
+        """
+        if alpha >= 1.0 or alpha <= 0.0:
+            raise ValueError("alpha must be between 0 and 1.")
+        if rho_min <= 0.0:
+            raise ValueError("rho_min must be greater than 0.")
+        self.alpha = alpha
+        self.rho_min = rho_min
+        self.k_min = 2
+        self._reset()
+
+    def __call__(self, learning_rate, params, gradient):
+        """Adapt learning rate.
+
+        Args:
+            learning_rate (float): Current learning rate
+            params (np.array): Current parameters
+            gradient (np.array): Current gradient
+
+        Returns:
+            learning_rate (float): Adapted learning rate
+        """
+        self.k += 1
+        self.a += params
+        self.b += params**2
+        self.c += self.k * params
+
+        if self.k >= self.k_min:
+            rho = 1 / (
+                (self.k * (self.k + 1) * (self.k + 2) / 12)
+                * (self.b - self.a**2 / (self.k + 1))
+                / (self.c - self.k / 2 * self.a) ** 2
+                - 1
+            )
+            rho_mean = np.mean(rho)
+            if rho_mean < self.rho_min:
+                learning_rate *= self.alpha
+                _logger.info(
+                    "Criterion reached after %i iterations: learning_rate=%.2e",
+                    self.k,
+                    learning_rate,
+                )
+                self.k_min = self.k
+                self._reset()
+
+        return learning_rate
+
+    def _reset(self):
+        """Reset regression parameters."""
+        self.k = -1
+        self.a = 0
+        self.b = 0
+        self.c = 0
diff --git a/queens/stochastic_optimizers/rms_prop.py b/queens/stochastic_optimizers/rms_prop.py
@@ -50,6 +50,7 @@ def __init__(
         max_iteration=1e6,
         beta=0.999,
         eps=1e-8,
+        learning_rate_decay=None,
     ):
         """Initialize optimizer.
 
@@ -65,6 +66,7 @@ def __init__(
             max_iteration (int): Maximum number of iterations
             beta (float): :math:`beta` parameter as described in [1]
             eps (float): Nugget term to avoid a division by values close to zero
+            learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
         """
         super().__init__(
             learning_rate=learning_rate,
@@ -74,6 +76,7 @@ def __init__(
             clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
             clip_by_value_threshold=clip_by_value_threshold,
             max_iteration=max_iteration,
+            learning_rate_decay=learning_rate_decay,
         )
         self.beta = beta
         self.v = ExponentialAveraging(coefficient=beta)

diff --git a/queens/stochastic_optimizers/sgd.py b/queens/stochastic_optimizers/sgd.py
@@ -37,6 +37,7 @@ def __init__(
         clip_by_l2_norm_threshold=np.inf,
         clip_by_value_threshold=np.inf,
         max_iteration=1e6,
+        learning_rate_decay=None,
     ):
         """Initialize optimizer.
 
@@ -50,6 +51,7 @@ def __init__(
             clip_by_l2_norm_threshold (float): Threshold to clip the gradient by L2-norm
             clip_by_value_threshold (float): Threshold to clip the gradient components
             max_iteration (int): Maximum number of iterations
+            learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
         """
         super().__init__(
             learning_rate=learning_rate,
@@ -59,6 +61,7 @@ def __init__(
             clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
             clip_by_value_threshold=clip_by_value_threshold,
             max_iteration=max_iteration,
+            learning_rate_decay=learning_rate_decay,
         )
 
     def scheme_specific_gradient(self, gradient):

diff --git a/tests/unit_tests/stochastic_optimizers/test_learning_rate_decay.py b/tests/unit_tests/stochastic_optimizers/test_learning_rate_decay.py
@@ -0,0 +1,115 @@
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright (c) 2024, QUEENS contributors.
+#
+# This file is part of QUEENS.
+#
+# QUEENS is free software: you can redistribute it and/or modify it under the terms of the GNU
+# Lesser General Public License as published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version. QUEENS is distributed in the hope that it will
+# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You
+# should have received a copy of the GNU Lesser General Public License along with QUEENS. If not,
+# see <https://www.gnu.org/licenses/>.
+#
+"""Unit tests for LearningRateDecay classes."""
+
+import numpy as np
+import pytest
+
+from queens.stochastic_optimizers.learning_rate_decay import (
+    DynamicLearningRateDecay,
+    LogLinearLearningRateDecay,
+    StepwiseLearningRateDecay,
+)
+
+
+def test_init_log_linear_learning_rate_decay():
+    """Test the init method of LogLinearLearningRateDecay class."""
+    slope = 0.7
+    learning_rate_decay = LogLinearLearningRateDecay(slope=slope)
+    assert learning_rate_decay.slope == slope
+    assert learning_rate_decay.iteration == 0
+
+
+def test_log_linear_learning_rate_decay():
+    """Test the call method of LogLinearLearningRateDecay class."""
+    learning_rate = 1.0
+    learning_rate_decay = LogLinearLearningRateDecay(slope=0.5)
+    num_iter = 101
+    for _ in range(num_iter):
+        learning_rate = learning_rate_decay(learning_rate, None, None)
+
+    assert learning_rate_decay.iteration == num_iter
+    np.testing.assert_array_almost_equal(learning_rate, 0.1)
+
+
+def test_init_stepwise_learning_rate_decay():
+    """Test the init method of StepwiseLearningRateDecay class."""
+    decay_factor = 0.1
+    decay_interval = 100
+    learning_rate_decay = StepwiseLearningRateDecay(
+        decay_factor=decay_factor, decay_interval=decay_interval
+    )
+    assert learning_rate_decay.decay_factor == decay_factor
+    assert learning_rate_decay.decay_interval == decay_interval
+    assert learning_rate_decay.iteration == 0
+
+
+def test_stepwise_learning_rate_decay():
+    """Test the call method of StepwiseLearningRateDecay class."""
+    learning_rate = 1.0
+    learning_rate_decay = StepwiseLearningRateDecay(decay_factor=0.1, decay_interval=10)
+    for _ in range(25):
+        learning_rate = learning_rate_decay(learning_rate, None, None)
+
+    assert learning_rate_decay.iteration == 3
+    np.testing.assert_array_almost_equal(learning_rate, 0.01)
+
+
+def test_init_dynamic_learning_rate_decay():
+    """Test the init method of DynamicLearningRateDecay class."""
+    alpha = 0.2
+    rho_min = 2.0
+    learning_rate_decay = DynamicLearningRateDecay(alpha=alpha, rho_min=rho_min)
+    assert learning_rate_decay.alpha == alpha
+    assert learning_rate_decay.rho_min == rho_min
+    assert learning_rate_decay.k_min == 2
+    assert learning_rate_decay.k == -1
+    assert learning_rate_decay.a == 0
+    assert learning_rate_decay.b == 0
+    assert learning_rate_decay.c == 0
+
+
+@pytest.mark.parametrize("alpha", [-1, 0, 1, 2])
+def test_init_dynamic_learning_rate_decay_invalid_alpha(alpha):
+    """Test the init method of DynamicLearningRateDecay class.
+
+    Test invalid values for alpha.
+    """
+    with pytest.raises(ValueError):
+        DynamicLearningRateDecay(alpha=alpha)
+
+
+@pytest.mark.parametrize("rho_min", [-1, 0])
+def test_init_dynamic_learning_rate_decay_invalid_rho_min(rho_min):
+    """Test the init method of DynamicLearningRateDecay class.
+
+    Test invalid values for rho_min.
+    """
+    with pytest.raises(ValueError):
+        DynamicLearningRateDecay(rho_min=rho_min)
+
+
+def test_dynamic_learning_rate_decay():
+    """Test the call method of DynamicLearningRateDecay class."""
+    np.random.seed(1)
+    learning_rate = 1.0
+    learning_rate_decay = DynamicLearningRateDecay()
+    params = np.array([1.0, 2.0, 3.0])
+    num_iter = 101
+    for _ in range(num_iter):
+        learning_rate = learning_rate_decay(learning_rate, params, None)
+        params += np.random.randn(3) * 0.1
+
+    np.testing.assert_array_almost_equal(learning_rate, 0.01)