Skip to content

Commit

Permalink
Make openstack retry more configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
ricolin committed Dec 12, 2024
1 parent df56de6 commit 355e742
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 40 deletions.
80 changes: 48 additions & 32 deletions staffeln/common/openstack.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,37 @@
from __future__ import annotations

import tenacity
from openstack import exceptions, proxy
from oslo_log import log

from staffeln.common import auth
from staffeln import conf
from staffeln.common import auth
from staffeln.i18n import _

import tenacity

CONF = conf.CONF
LOG = log.getLogger(__name__)


class RetryHTTPError(tenacity.retry_if_exception):
"""Retry strategy that retries if the exception is an ``HTTPError`` with
a abnormal status code.
"""

def __init__(self):
def is_http_error(exception):
# Make sure we don't retry on 404, as not found could be an
# expected status.
result = (isinstance(exception, exceptions.HttpException) and
exception.status_code != 404)
# Make sure we don't retry on codes in skip list (default: 404),
# as not found could be an expected status.
skip_codes = CONF.openstack.skip_retry_codes.replace(" ", "").split(",")
result = (
isinstance(exception, exceptions.HttpException)
and str(exception.status_code) not in skip_codes
)
if result:
LOG.debug(f"Getting HttpException {exception} (status "
f"code: {exception.status_code}), "
"retry till timeout...")
LOG.debug(
f"Getting HttpException {exception} (status "
f"code: {exception.status_code}), "
"retry till timeout..."
)
return result

super().__init__(predicate=is_http_error)
Expand All @@ -51,9 +56,10 @@ def set_project(self, project):
# user
@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_user_id(self):
user_name = self.conn.config.auth["username"]
if "user_domain_id" in self.conn.config.auth:
Expand All @@ -68,9 +74,10 @@ def get_user_id(self):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_role_assignments(self, project_id, user_id=None):
filters = {"project": project_id}
if user_id:
Expand All @@ -79,17 +86,19 @@ def get_role_assignments(self, project_id, user_id=None):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_user(self, user_id):
return self.conn.get_user(name_or_id=user_id)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_project_member_emails(self, project_id):
members = self.get_role_assignments(project_id)
emails = []
Expand All @@ -108,17 +117,19 @@ def get_project_member_emails(self, project_id):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_projects(self):
return self.conn.list_projects()

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_servers(self, project_id=None, all_projects=True, details=True):
if project_id is not None:
return self.conn.compute.servers(
Expand All @@ -131,17 +142,19 @@ def get_servers(self, project_id=None, all_projects=True, details=True):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_volume(self, uuid, project_id):
return self.conn.get_volume_by_id(uuid)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup(self, uuid, project_id=None):
try:
return self.conn.get_volume_backup(uuid)
Expand All @@ -167,9 +180,10 @@ def create_backup(

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def delete_backup(self, uuid, project_id=None, force=False):
# Note(Alex): v3 is not supporting force delete?
# conn.block_storage.delete_backup(
Expand All @@ -185,19 +199,21 @@ def delete_backup(self, uuid, project_id=None, force=False):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup_quota(self, project_id):
# quota = conn.get_volume_quotas(project_id)
quota = self._get_volume_quotas(project_id)
return quota.backups

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup_gigabytes_quota(self, project_id):
# quota = conn.get_volume_quotas(project_id)
quota = self._get_volume_quotas(project_id)
Expand Down
9 changes: 2 additions & 7 deletions staffeln/conductor/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,9 +635,7 @@ def create_volume_backup(self, task):
# backup gen was not created
def process_pre_failed_backup(self, task):
# 1.notify via email
reason = _(
"The backup creation for the volume %s was prefailed." % task.volume_id
)
reason = f"The backup creation for the volume {task.volume_id} was prefailed."
LOG.warn(reason)
task.reason = reason
task.backup_status = constants.BACKUP_FAILED
Expand All @@ -653,10 +651,7 @@ def process_failed_backup(self, task):
self.create_failed_backup_obj(task)
except OpenstackHttpException as ex:
LOG.warn(
_(
"Failed to delete volume backup %s. %s. Need "
"to delete manually." % (task.backup_id, str(ex))
)
f"Failed to delete volume backup {task.backup_id}. {str(ex)}. Need to delete manually."
)
task.reason = reason
task.backup_status = constants.BACKUP_FAILED
Expand Down
36 changes: 35 additions & 1 deletion staffeln/conf/conductor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
title="Conductor Options",
help=_("Options under this group are used " "to define Conductor's configuration."),
)
openstack_group = cfg.OptGroup(
"openstack",
title="OpenStack Options",
help=_(
"Options under this group are used "
"to define OpneStack related configuration."
),
)

backup_opts = [
cfg.IntOpt(
Expand Down Expand Up @@ -72,11 +80,35 @@
min=0,
help=_("Number of incremental backups between full backups."),
),
]

openstack_opts = [
cfg.IntOpt(
"retry_timeout",
default=300,
min=1,
help=_("The timeout for retry, the unit is one second."),
help=_(
"The timeout for retry OpenStackSDK HTTP exceptions, "
"the unit is one second."
),
),
cfg.IntOpt(
"max_retry_interval",
default=30,
min=0,
help=_(
"Max time interval for retry OpenStackSDK HTTP exceptions, "
"the unit is one second."
),
),
cfg.StrOpt(
"skip_retry_codes",
default="404,",
help=_(
"A comma separated string that provides a list of HTTP codes "
"to skip retry on for OpenStackSDK HTTP "
"exception. Default only `404` is skipped."
),
),
]

Expand Down Expand Up @@ -144,12 +176,14 @@ def register_opts(conf):
conf.register_group(conductor_group)
conf.register_opts(backup_opts, group=conductor_group)
conf.register_opts(rotation_opts, group=conductor_group)
conf.register_opts(openstack_opts, group=openstack_group)
conf.register_opts(coordination_opts, group=coordination_group)


def list_opts():
return {
"DEFAULT": rotation_opts,
conductor_group: backup_opts,
openstack_group: openstack_opts,
coordination_group: coordination_opts,
}

0 comments on commit 355e742

Please sign in to comment.