Skip to content

Commit

Permalink
Make openstack retry more configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
ricolin committed Nov 13, 2024
1 parent a8b9fa0 commit 9f94efa
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 40 deletions.
67 changes: 41 additions & 26 deletions staffeln/common/openstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ class RetryHTTPError(tenacity.retry_if_exception):

def __init__(self):
def is_http_error(exception):
# Make sure we don't retry on 404, as not found could be an
# expected status.
result = (isinstance(exception, exceptions.HttpException) and
exception.status_code != 404)
# Make sure we don't retry on codes in skip list (default: 404),
# as not found could be an expected status.
skip_codes = CONF.openstack.skip_retry_codes.replace(
' ', '').split(',')
result = (
isinstance(exception, exceptions.HttpException
) and str(exception.status_code) not in skip_codes
)
if result:
LOG.debug(f"Getting HttpException {exception} (status "
f"code: {exception.status_code}), "
Expand Down Expand Up @@ -48,9 +52,10 @@ def set_project(self, project):
# user
@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_user_id(self):
user_name = self.conn.config.auth["username"]
if "user_domain_id" in self.conn.config.auth:
Expand All @@ -65,9 +70,10 @@ def get_user_id(self):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_role_assignments(self, project_id, user_id=None):
filters = {"project": project_id}
if user_id:
Expand All @@ -76,17 +82,19 @@ def get_role_assignments(self, project_id, user_id=None):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_user(self, user_id):
return self.conn.get_user(name_or_id=user_id)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_project_member_emails(self, project_id):
members = self.get_role_assignments(project_id)
emails = []
Expand All @@ -105,17 +113,19 @@ def get_project_member_emails(self, project_id):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_projects(self):
return self.conn.list_projects()

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_servers(self, project_id=None, all_projects=True, details=True):
if project_id is not None:
return self.conn.compute.servers(
Expand All @@ -126,17 +136,19 @@ def get_servers(self, project_id=None, all_projects=True, details=True):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_volume(self, uuid, project_id):
return self.conn.get_volume_by_id(uuid)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup(self, uuid, project_id=None):
# return conn.block_storage.get_backup(
# project_id=project_id, backup_id=uuid,
Expand Down Expand Up @@ -169,9 +181,10 @@ def create_backup(

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def delete_backup(self, uuid, project_id=None, force=False):
# Note(Alex): v3 is not supporting force delete?
# conn.block_storage.delete_backup(
Expand All @@ -186,19 +199,21 @@ def delete_backup(self, uuid, project_id=None, force=False):

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup_quota(self, project_id):
# quota = conn.get_volume_quotas(project_id)
quota = self._get_volume_quotas(project_id)
return quota.backups

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=30),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout))
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup_gigabytes_quota(self, project_id):
# quota = conn.get_volume_quotas(project_id)
quota = self._get_volume_quotas(project_id)
Expand Down
30 changes: 29 additions & 1 deletion staffeln/conf/conductor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
title="Conductor Options",
help=_("Options under this group are used " "to define Conductor's configuration."),
)
openstack_group = cfg.OptGroup(
"openstack",
title="OpenStack Options",
help=_(
"Options under this group are used "
"to define OpneStack related configuration."
),
)

backup_opts = [
cfg.IntOpt(
Expand Down Expand Up @@ -67,11 +75,29 @@
min=0,
help=_("Number of incremental backups between full backups."),
),
]

openstack_opts = [
cfg.IntOpt(
"retry_timeout",
default=300,
min=1,
help=_("The timeout for retry, the unit is one second."),
help=_("The timeout for retry OpenStackSDK HTTP exceptions, "
"the unit is one second."),
),
cfg.IntOpt(
"max_retry_interval",
default=30,
min=0,
help=_("Max time interval for retry OpenStackSDK HTTP exceptions, "
"the unit is one second."),
),
cfg.StrOpt(
"skip_retry_codes",
default="404,",
help=_("A comma separated string that provides a list of HTTP codes "
"to skip retry on for OpenStackSDK HTTP "
"exception. Default only `404` is skipped.")
),
]

Expand Down Expand Up @@ -134,12 +160,14 @@ def register_opts(conf):
conf.register_group(conductor_group)
conf.register_opts(backup_opts, group=conductor_group)
conf.register_opts(rotation_opts, group=conductor_group)
conf.register_opts(openstack_opts, group=openstack_group)
conf.register_opts(coordination_opts, group=coordination_group)


def list_opts():
return {
"DEFAULT": rotation_opts,
conductor_group: backup_opts,
openstack_group: openstack_opts,
coordination_group: coordination_opts,
}
56 changes: 43 additions & 13 deletions staffeln/tests/common/test_openstacksdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from staffeln.common import auth
from staffeln.common import openstack as s_openstack
from staffeln import conf
from staffeln.tests import base

class OpenstackSDKTest(base.TestCase):
Expand All @@ -23,20 +24,35 @@ def setUp(self):
'get_backup_gigabytes_quota'
]
for i in func_list:
getattr(self.openstack, i).retry.sleep = self.m_sleep # pylint: disable=E1101
getattr(self.openstack, i).retry.stop = tenacity.stop_after_attempt(2) # pylint: disable=E1101

self.fake_user = mock.MagicMock(id='foo', email='[email protected]')
self.fake_volume = mock.MagicMock(id='fake_volume')
self.fake_backup = mock.MagicMock(id='fake_backup')
self.fake_role_assignment = mock.MagicMock(user='foo')
self.fake_role_assignment2 = mock.MagicMock(user={'id': 'bar'})

def _test_http_error(self, m_func, retry_func, status_code, call_count=1, **kwargs):
m_func.side_effect=openstack_exc.HttpException(http_status=status_code)
exc = self.assertRaises(openstack_exc.HttpException, getattr(self.openstack, retry_func), **kwargs)
getattr(self.openstack, i).retry.sleep = ( # pylint: disable=E1101
self.m_sleep
)
getattr(self.openstack, i).retry.stop = ( # pylint: disable=E1101
tenacity.stop_after_attempt(2)
)

self.fake_user = mock.MagicMock(id="foo", email="[email protected]")
self.fake_volume = mock.MagicMock(id="fake_volume")
self.fake_backup = mock.MagicMock(id="fake_backup")
self.fake_role_assignment = mock.MagicMock(user="foo")
self.fake_role_assignment2 = mock.MagicMock(user={"id": "bar"})

def _test_http_error(
self, m_func, retry_func, status_code, call_count=1,
**kwargs
):
m_func.side_effect = openstack_exc.HttpException(
http_status=status_code
)
exc = self.assertRaises(
openstack_exc.HttpException,
getattr(self.openstack, retry_func),
**kwargs,
)
self.assertEqual(status_code, exc.status_code)
if status_code != 404:
skip_retry_codes = conf.CONF.openstack.skip_retry_codes.replace(
' ', '').split(',')
if str(status_code) not in skip_retry_codes:
if call_count == 1:
self.m_sleep.assert_called_once_with(1.0)
else:
Expand All @@ -57,6 +73,20 @@ def test_get_servers(self):
def test_get_servers_non_http_error(self):
self._test_non_http_error(self.m_c.compute.servers, 'get_servers')

def test_get_servers_conf_skip_http_error(self):
conf.CONF.set_override('skip_retry_codes', '403,', 'openstack')
self._test_http_error(
self.m_c.compute.servers, "get_servers", status_code=403
)
self.assertEqual('403,', conf.CONF.openstack.skip_retry_codes)

def test_get_servers_conf_skip_http_error_not_hit(self):
conf.CONF.set_override('skip_retry_codes', '403,', 'openstack')
self._test_http_error(
self.m_c.compute.servers, "get_servers", status_code=404
)
self.assertEqual('403,', conf.CONF.openstack.skip_retry_codes)

def test_get_servers_404_http_error(self):
self._test_http_error(self.m_c.compute.servers, 'get_servers', status_code=404)

Expand Down

0 comments on commit 9f94efa

Please sign in to comment.