Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Dynamic learning rate decay #80

Merged
merged 4 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions queens/stochastic_optimizers/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(
beta_1=0.9,
beta_2=0.999,
eps=1e-8,
learning_rate_decay=None,
):
"""Initialize optimizer.

Expand All @@ -68,6 +69,7 @@ def __init__(
beta_1 (float): :math:`beta_1` parameter as described in [1]
beta_2 (float): :math:`beta_1` parameter as described in [1]
eps (float): Nugget term to avoid a division by values close to zero
learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
"""
# pylint:disable=duplicate-code
super().__init__(
Expand All @@ -78,6 +80,7 @@ def __init__(
clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
clip_by_value_threshold=clip_by_value_threshold,
max_iteration=max_iteration,
learning_rate_decay=learning_rate_decay,
)
self.beta_1 = beta_1
self.beta_2 = beta_2
Expand Down
3 changes: 3 additions & 0 deletions queens/stochastic_optimizers/adamax.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(
beta_1=0.9,
beta_2=0.999,
eps=1e-8,
learning_rate_decay=None,
):
"""Initialize optimizer.

Expand All @@ -68,6 +69,7 @@ def __init__(
beta_1 (float): :math:`beta_1` parameter as described in [1]
beta_2 (float): :math:`beta_1` parameter as described in [1]
eps (float): Nugget term to avoid a division by values close to zero
learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
"""
super().__init__(
learning_rate=learning_rate,
Expand All @@ -77,6 +79,7 @@ def __init__(
clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
clip_by_value_threshold=clip_by_value_threshold,
max_iteration=max_iteration,
learning_rate_decay=learning_rate_decay,
)
self.beta_1 = beta_1
self.beta_2 = beta_2
Expand Down
118 changes: 117 additions & 1 deletion queens/stochastic_optimizers/learning_rate_decay.py
leahaeusel marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import abc
import logging

import numpy as np

_logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -65,6 +67,120 @@ def __call__(self, learning_rate, params, gradient):
Returns:
learning_rate (float): Adapted learning rate
"""
if self.iteration > 1:
learning_rate *= (self.iteration - 1) ** self.slope / self.iteration**self.slope
self.iteration += 1
learning_rate /= self.iteration**self.slope
return learning_rate


class StepwiseLearningRateDecay(LearningRateDecay):
"""Step-wise learning rate decay.

Attributes:
decay_factor (float): Decay factor
decay_interval (int): Decay interval
iteration (int): Iteration number
"""

def __init__(self, decay_factor, decay_interval):
"""Initialize StepwiseLearningRateDecay.

Args:
decay_factor (float): Decay factor
decay_interval (int): Decay interval
"""
self.decay_factor = decay_factor
self.decay_interval = decay_interval
self.iteration = 0

def __call__(self, learning_rate, params, gradient):
"""Adapt learning rate.

Args:
learning_rate (float): Current learning rate
params (np.array): Current parameters
gradient (np.array): Current gradient

Returns:
learning_rate (float): Adapted learning rate
"""
if self.iteration >= self.decay_interval:
learning_rate *= self.decay_factor
self.iteration = 0
_logger.info("learning_rate=%.2e", learning_rate)
else:
self.iteration += 1
return learning_rate


class DynamicLearningRateDecay(LearningRateDecay):
"""Dynamic learning rate decay.

Attributes:
alpha (float): Decay factor
rho_min (float): Threshold for signal-to-noise ratio
k_min (int): Minimum number of iterations before learning rate is decreased
k (int): Iteration number
a (np.array): Sum of parameters
b (np.array): Sum of squared parameters
c (np.array): Sum of parameters times iteration number
"""

def __init__(self, alpha=0.1, rho_min=1.0):
"""Initialize DynamicLearningRateDecay.

Args:
alpha (float): Decay factor
rho_min (float): Threshold for signal-to-noise ratio
"""
if alpha >= 1.0 or alpha <= 0.0:
raise ValueError("alpha must be between 0 and 1.")
if rho_min <= 0.0:
raise ValueError("rho_min must be greater than 0.")
self.alpha = alpha
self.rho_min = rho_min
self.k_min = 2
self._reset()

def __call__(self, learning_rate, params, gradient):
"""Adapt learning rate.

Args:
learning_rate (float): Current learning rate
params (np.array): Current parameters
gradient (np.array): Current gradient

Returns:
learning_rate (float): Adapted learning rate
"""
self.k += 1
self.a += params
self.b += params**2
self.c += self.k * params

if self.k >= self.k_min:
rho = 1 / (
(self.k * (self.k + 1) * (self.k + 2) / 12)
* (self.b - self.a**2 / (self.k + 1))
/ (self.c - self.k / 2 * self.a) ** 2
- 1
)
rho_mean = np.mean(rho)
if rho_mean < self.rho_min:
learning_rate *= self.alpha
_logger.info(
"Criterion reached after %i iterations: learning_rate=%.2e",
self.k,
learning_rate,
)
self.k_min = self.k
self._reset()

return learning_rate

def _reset(self):
"""Reset regression parameters."""
self.k = -1
self.a = 0
self.b = 0
self.c = 0
3 changes: 3 additions & 0 deletions queens/stochastic_optimizers/rms_prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
max_iteration=1e6,
beta=0.999,
eps=1e-8,
learning_rate_decay=None,
):
"""Initialize optimizer.

Expand All @@ -65,6 +66,7 @@ def __init__(
max_iteration (int): Maximum number of iterations
beta (float): :math:`beta` parameter as described in [1]
eps (float): Nugget term to avoid a division by values close to zero
learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
"""
super().__init__(
learning_rate=learning_rate,
Expand All @@ -74,6 +76,7 @@ def __init__(
clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
clip_by_value_threshold=clip_by_value_threshold,
max_iteration=max_iteration,
learning_rate_decay=learning_rate_decay,
)
self.beta = beta
self.v = ExponentialAveraging(coefficient=beta)
Expand Down
3 changes: 3 additions & 0 deletions queens/stochastic_optimizers/sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
clip_by_l2_norm_threshold=np.inf,
clip_by_value_threshold=np.inf,
max_iteration=1e6,
learning_rate_decay=None,
):
"""Initialize optimizer.

Expand All @@ -50,6 +51,7 @@ def __init__(
clip_by_l2_norm_threshold (float): Threshold to clip the gradient by L2-norm
clip_by_value_threshold (float): Threshold to clip the gradient components
max_iteration (int): Maximum number of iterations
learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
"""
super().__init__(
learning_rate=learning_rate,
Expand All @@ -59,6 +61,7 @@ def __init__(
clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
clip_by_value_threshold=clip_by_value_threshold,
max_iteration=max_iteration,
learning_rate_decay=learning_rate_decay,
)

def scheme_specific_gradient(self, gradient):
Expand Down
115 changes: 115 additions & 0 deletions tests/unit_tests/stochastic_optimizers/test_learning_rate_decay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#
# SPDX-License-Identifier: LGPL-3.0-or-later
# Copyright (c) 2024, QUEENS contributors.
#
# This file is part of QUEENS.
#
# QUEENS is free software: you can redistribute it and/or modify it under the terms of the GNU
# Lesser General Public License as published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version. QUEENS is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You
# should have received a copy of the GNU Lesser General Public License along with QUEENS. If not,
# see <https://www.gnu.org/licenses/>.
#
"""Unit tests for LearningRateDecay classes."""

import numpy as np
import pytest

from queens.stochastic_optimizers.learning_rate_decay import (
DynamicLearningRateDecay,
LogLinearLearningRateDecay,
StepwiseLearningRateDecay,
)


def test_init_log_linear_learning_rate_decay():
"""Test the init method of LogLinearLearningRateDecay class."""
slope = 0.7
learning_rate_decay = LogLinearLearningRateDecay(slope=slope)
assert learning_rate_decay.slope == slope
assert learning_rate_decay.iteration == 0


def test_log_linear_learning_rate_decay():
"""Test the call method of LogLinearLearningRateDecay class."""
learning_rate = 1.0
learning_rate_decay = LogLinearLearningRateDecay(slope=0.5)
num_iter = 101
for _ in range(num_iter):
learning_rate = learning_rate_decay(learning_rate, None, None)

assert learning_rate_decay.iteration == num_iter
np.testing.assert_array_almost_equal(learning_rate, 0.1)


def test_init_stepwise_learning_rate_decay():
"""Test the init method of StepwiseLearningRateDecay class."""
decay_factor = 0.1
decay_interval = 100
learning_rate_decay = StepwiseLearningRateDecay(
decay_factor=decay_factor, decay_interval=decay_interval
)
assert learning_rate_decay.decay_factor == decay_factor
assert learning_rate_decay.decay_interval == decay_interval
assert learning_rate_decay.iteration == 0


def test_stepwise_learning_rate_decay():
"""Test the call method of StepwiseLearningRateDecay class."""
learning_rate = 1.0
learning_rate_decay = StepwiseLearningRateDecay(decay_factor=0.1, decay_interval=10)
for _ in range(25):
learning_rate = learning_rate_decay(learning_rate, None, None)

assert learning_rate_decay.iteration == 3
np.testing.assert_array_almost_equal(learning_rate, 0.01)


def test_init_dynamic_learning_rate_decay():
"""Test the init method of DynamicLearningRateDecay class."""
alpha = 0.2
rho_min = 2.0
learning_rate_decay = DynamicLearningRateDecay(alpha=alpha, rho_min=rho_min)
assert learning_rate_decay.alpha == alpha
assert learning_rate_decay.rho_min == rho_min
assert learning_rate_decay.k_min == 2
assert learning_rate_decay.k == -1
assert learning_rate_decay.a == 0
assert learning_rate_decay.b == 0
assert learning_rate_decay.c == 0


@pytest.mark.parametrize("alpha", [-1, 0, 1, 2])
def test_init_dynamic_learning_rate_decay_invalid_alpha(alpha):
"""Test the init method of DynamicLearningRateDecay class.

Test invalid values for alpha.
"""
with pytest.raises(ValueError):
DynamicLearningRateDecay(alpha=alpha)


@pytest.mark.parametrize("rho_min", [-1, 0])
def test_init_dynamic_learning_rate_decay_invalid_rho_min(rho_min):
"""Test the init method of DynamicLearningRateDecay class.

Test invalid values for rho_min.
"""
with pytest.raises(ValueError):
DynamicLearningRateDecay(rho_min=rho_min)


def test_dynamic_learning_rate_decay():
"""Test the call method of DynamicLearningRateDecay class."""
np.random.seed(1)
leahaeusel marked this conversation as resolved.
Show resolved Hide resolved
learning_rate = 1.0
learning_rate_decay = DynamicLearningRateDecay()
params = np.array([1.0, 2.0, 3.0])
num_iter = 101
for _ in range(num_iter):
learning_rate = learning_rate_decay(learning_rate, params, None)
params += np.random.randn(3) * 0.1

np.testing.assert_array_almost_equal(learning_rate, 0.01)
Loading