Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Dynamic learning rate decay #80

Merged
merged 4 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions queens/stochastic_optimizers/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(
beta_1=0.9,
beta_2=0.999,
eps=1e-8,
learning_rate_decay=None,
):
"""Initialize optimizer.

Expand All @@ -68,6 +69,7 @@ def __init__(
beta_1 (float): :math:`beta_1` parameter as described in [1]
beta_2 (float): :math:`beta_1` parameter as described in [1]
eps (float): Nugget term to avoid a division by values close to zero
learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
"""
# pylint:disable=duplicate-code
super().__init__(
Expand All @@ -78,6 +80,7 @@ def __init__(
clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
clip_by_value_threshold=clip_by_value_threshold,
max_iteration=max_iteration,
learning_rate_decay=learning_rate_decay,
)
self.beta_1 = beta_1
self.beta_2 = beta_2
Expand Down
3 changes: 3 additions & 0 deletions queens/stochastic_optimizers/adamax.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(
beta_1=0.9,
beta_2=0.999,
eps=1e-8,
learning_rate_decay=None,
):
"""Initialize optimizer.

Expand All @@ -68,6 +69,7 @@ def __init__(
beta_1 (float): :math:`beta_1` parameter as described in [1]
beta_2 (float): :math:`beta_1` parameter as described in [1]
eps (float): Nugget term to avoid a division by values close to zero
learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
"""
super().__init__(
learning_rate=learning_rate,
Expand All @@ -77,6 +79,7 @@ def __init__(
clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
clip_by_value_threshold=clip_by_value_threshold,
max_iteration=max_iteration,
learning_rate_decay=learning_rate_decay,
)
self.beta_1 = beta_1
self.beta_2 = beta_2
Expand Down
118 changes: 117 additions & 1 deletion queens/stochastic_optimizers/learning_rate_decay.py
leahaeusel marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import abc
import logging

import numpy as np

_logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -65,6 +67,120 @@ def __call__(self, learning_rate, params, gradient):
Returns:
learning_rate (float): Adapted learning rate
"""
if self.iteration > 1:
learning_rate *= (self.iteration - 1) ** self.slope / self.iteration**self.slope
self.iteration += 1
learning_rate /= self.iteration**self.slope
return learning_rate


class StepwiseLearningRateDecay(LearningRateDecay):
"""Step-wise learning rate decay.

Attributes:
decay_factor (float): Decay factor
decay_interval (int): Decay interval
iteration (int): Iteration number
"""

def __init__(self, decay_factor, decay_interval):
"""Initialize StepwiseLearningRateDecay.

Args:
decay_factor (float): Decay factor
decay_interval (int): Decay interval
"""
self.decay_factor = decay_factor
self.decay_interval = decay_interval
self.iteration = 0

def __call__(self, learning_rate, params, gradient):
"""Adapt learning rate.

Args:
learning_rate (float): Current learning rate
params (np.array): Current parameters
gradient (np.array): Current gradient

Returns:
learning_rate (float): Adapted learning rate
"""
if self.iteration >= self.decay_interval:
learning_rate *= self.decay_factor
self.iteration = 0
_logger.info("learning_rate=%.2e", learning_rate)
else:
self.iteration += 1
return learning_rate


class DynamicLearningRateDecay:
leahaeusel marked this conversation as resolved.
Show resolved Hide resolved
"""Dynamic learning rate decay.

Attributes:
alpha (float): Decay factor
rho_min (float): Threshold for signal-to-noise ratio
k_min (int): Minimum number of iterations before learning rate is decreased
k (int): Iteration number
a (np.array): Sum of parameters
b (np.array): Sum of squared parameters
c (np.array): Sum of parameters times iteration number
"""

def __init__(self, alpha=0.1, rho_min=1.0):
"""Initialize DynamicLearningRateDecay.

Args:
alpha (float): Decay factor
rho_min (float): Threshold for signal-to-noise ratio
"""
if alpha >= 1.0 or alpha <= 0.0:
raise ValueError("alpha must be between 0 and 1.")
if rho_min <= 0.0:
raise ValueError("rho_min must be greater than 0.")
self.alpha = alpha
self.rho_min = rho_min
self.k_min = 2
self._reset()

def __call__(self, learning_rate, params, gradient):
"""Adapt learning rate.

Args:
learning_rate (float): Current learning rate
params (np.array): Current parameters
gradient (np.array): Current gradient

Returns:
learning_rate (float): Adapted learning rate
"""
self.k += 1
self.a += params
self.b += params**2
self.c += self.k * params

if self.k >= self.k_min:
rho = 1 / (
(self.k * (self.k + 1) * (self.k + 2) / 12)
* (self.b - self.a**2 / (self.k + 1))
/ (self.c - self.k / 2 * self.a) ** 2
- 1
)
rho_mean = np.mean(rho)
if rho_mean < self.rho_min:
learning_rate *= self.alpha
_logger.info(
"Criterion reached after %i iterations: learning_rate=%.2e",
self.k,
learning_rate,
)
self.k_min = self.k
self._reset()

return learning_rate

def _reset(self):
"""Reset regression parameters."""
self.k = -1
self.a = 0
self.b = 0
self.c = 0
3 changes: 3 additions & 0 deletions queens/stochastic_optimizers/rms_prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
max_iteration=1e6,
beta=0.999,
eps=1e-8,
learning_rate_decay=None,
):
"""Initialize optimizer.

Expand All @@ -65,6 +66,7 @@ def __init__(
max_iteration (int): Maximum number of iterations
beta (float): :math:`beta` parameter as described in [1]
eps (float): Nugget term to avoid a division by values close to zero
learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
"""
super().__init__(
learning_rate=learning_rate,
Expand All @@ -74,6 +76,7 @@ def __init__(
clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
clip_by_value_threshold=clip_by_value_threshold,
max_iteration=max_iteration,
learning_rate_decay=learning_rate_decay,
)
self.beta = beta
self.v = ExponentialAveraging(coefficient=beta)
Expand Down
3 changes: 3 additions & 0 deletions queens/stochastic_optimizers/sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
clip_by_l2_norm_threshold=np.inf,
clip_by_value_threshold=np.inf,
max_iteration=1e6,
learning_rate_decay=None,
):
"""Initialize optimizer.

Expand All @@ -50,6 +51,7 @@ def __init__(
clip_by_l2_norm_threshold (float): Threshold to clip the gradient by L2-norm
clip_by_value_threshold (float): Threshold to clip the gradient components
max_iteration (int): Maximum number of iterations
learning_rate_decay (LearningRateDecay): Object to schedule learning rate decay
"""
super().__init__(
learning_rate=learning_rate,
Expand All @@ -59,6 +61,7 @@ def __init__(
clip_by_l2_norm_threshold=clip_by_l2_norm_threshold,
clip_by_value_threshold=clip_by_value_threshold,
max_iteration=max_iteration,
learning_rate_decay=learning_rate_decay,
)

def scheme_specific_gradient(self, gradient):
Expand Down
Loading