Skip to content

Commit

Permalink
feat: hierachical, multi-source settings manager
Browse files Browse the repository at this point in the history
DRAFT
  • Loading branch information
mih committed Sep 24, 2024
1 parent 7ce12b0 commit 208c0e1
Show file tree
Hide file tree
Showing 15 changed files with 735 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dist/
.coverage
.coverage*
docs/generated
docs/_build
*.swp
Expand Down
44 changes: 44 additions & 0 deletions datasalad/settings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Hierarchical, multi-source settings management
Validation of configuration item values
There are two ways to do validation and type conversion. on-access, or
on-load. Doing it on-load would allow to reject invalid configuration
immediately. But it might spend time on items that never get accessed.
On-access might waste cycles on repeated checks, and possible complain later
than useful. Here we nevertheless run a validator on-access in the default
implementation. Particular sources may want to override this, or ensure that
the stored value that is passed to a validator is already in the best possible
form to make re-validation the cheapest.
.. currentmodule:: datasalad.settings
.. autosummary::
:toctree: generated
Settings
Setting
Source
CachingSource
Environment
Defaults
"""

from .defaults import Defaults
from .env import Environment
from .setting import Setting
from .settings import Settings
from .source import (
CachingSource,
Source,
)

__all__ = [
'CachingSource',
'Defaults',
'Environment',
'Setting',
'Settings',
'Source',
]
34 changes: 34 additions & 0 deletions datasalad/settings/defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from __future__ import annotations

import logging

from datasalad.settings.setting import (
DynamicSetting,
Setting,
)
from datasalad.settings.source import InMemorySettings

lgr = logging.getLogger('datasalad.settings')


class Defaults(InMemorySettings):
"""
Defaults are not loaded from any source. Clients have to set any
items they want to see a default be known for. There would typically be
only one instance of this class, and it is then the true source of the
information by itself.
"""
def __getitem__(self, key: str) -> Setting:
item = super().__getitem__(key)
return item.eval() if isinstance(item, DynamicSetting) else item

def __setitem__(self, key: str, value: Setting) -> None:
if key in self:
# resetting is something that is an unusual event.
# __setitem__ does not allow for a dedicated "force" flag,
# so we leave a message at least
lgr.debug('Resetting %r default', key)
super().__setitem__(key, value)

def __str__(self):
return 'Defaults'
129 changes: 129 additions & 0 deletions datasalad/settings/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from __future__ import annotations

import logging
from os import environ
from typing import (
TYPE_CHECKING,
Any,
)

from datasalad.settings.setting import Setting
from datasalad.settings.source import Source

if TYPE_CHECKING:
from collections.abc import Collection

lgr = logging.getLogger('datasalad.settings')


class Environment(Source):
"""Process environment settings source
This is a stateless source implementation that gets and sets items directly
in the process environment.
Environment variables can be filtered by declaring a name prefix. More
complex filter rules can be implemented by replacing the
:meth:`include_var()` method in a subclass.
It is possible to transform environment variable name to setting keys (and
vice versa), by implementing the methods :meth:`get_key_from_varname()` and
:meth:`get_varname_from_key()`.
"""

is_writable = True

def __init__(
self,
*,
var_prefix: str | None = None,
):
super().__init__()
self._var_prefix = var_prefix

def reinit(self):
"""Does nothing"""

def load(self) -> None:
"""Does nothing"""

def __getitem__(self, key: str) -> Setting:
matching = {
k: v
for k, v in environ.items()
# search for any var that match the key when transformed
if self.include_var(name=k, value=v) and self.get_key_from_varname(k) == key
}
if not matching:
raise KeyError
if len(matching) > 1:
lgr.warning(
'Ambiguous key %r matching multiple ENV vars: %r',
key,
list(matching.keys()),
)
k, v = matching.popitem()
return Setting(value=v)

def __setitem__(self, key: str, value: Setting) -> None:
name = self.get_varname_from_key(key)
environ[name] = str(value.value)

def get(self, key, default: Any = None) -> Setting:
try:
return self[key]
except KeyError:
if isinstance(default, Setting):
return default
return Setting(value=default)

def keys(self) -> Collection:
"""Returns all keys that can be determined from the environment"""
return {
self.get_key_from_varname(k)
for k, v in environ.items()
if self.include_var(name=k, value=v)
}

def __str__(self):
return f'Environment[{self._var_prefix}]' if self._var_prefix else 'Environment'

def __repr__(self):
# TODO: list keys?
return 'Environment()'

def include_var(
self,
name: str,
value: str, # noqa: ARG002 (default implementation does not need it)
) -> bool:
"""Determine whether to source a setting from an environment variable
This default implementation tests whether the name of the variable
starts with the ``var_prefix`` given to the constructor.
Reimplement this method to perform custom tests.
"""
return name.startswith(self._var_prefix or '')

def get_key_from_varname(self, name: str) -> str:
"""Transform an environment variable name to a setting key
This default implementation performs returns the unchanged
name as a key.
Reimplement this method and ``get_varname_from_key()`` to perform
custom transformations.
"""
return name

def get_varname_from_key(self, key: str) -> str:
"""Transform a setting key to an environment variable name
This default implementation on checks for illegal names and
raises a ``ValueError``. Otherwise it returns the unchanged key.
"""
if '=' in key or '\0' in key:
msg = "illegal environment variable name (contains '=' or NUL)"
raise ValueError(msg)
return key
72 changes: 72 additions & 0 deletions datasalad/settings/setting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from __future__ import annotations

from typing import (
Any,
Callable,
)


class UnsetValue:
pass


class Setting:
def __init__(
self,
value: Any | UnsetValue = UnsetValue,
*,
coercer: Callable | None = None,
):
self._value = value
self._coercer = coercer

@property
def value(self) -> Any:
if self._coercer:
return self._coercer(self._value)
return self._value

@property
def coercer(self) -> Callable | None:
return self._coercer

def update(self, item: Setting) -> None:
for attr, unset in (
('_value', UnsetValue),
('_coercer', None),
):
val = getattr(item, attr, unset)
if val is unset:
continue
setattr(self, attr, val)

def __str__(self) -> str:
return str(self.value)

def __repr__(self) -> str:
return repr(self.value)


class DynamicSetting(Setting):
def __init__(
self,
value: Callable,
*,
coercer: Callable | None = None,
):
super().__init__(
value=value,
coercer=coercer,
)

@property
def value(self) -> Any:
return self.eval().value

def eval(self) -> Setting:
return Setting(
# do the dynamic evaluation
self._value(),
coercer=self.coercer,
)

77 changes: 77 additions & 0 deletions datasalad/settings/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from __future__ import annotations

from itertools import chain
from types import MappingProxyType
from typing import TYPE_CHECKING

if TYPE_CHECKING: # pragma: nocover
from datasalad.setttings import (
Setting,
Source,
)


class Settings:
"""Query different sources of configuration settings
This is query-centered. Manipulation is supported by
by individual configuration source implementations.
This separation is done for two reasons. 1) Query is
a much more frequent operation than write, and
2) consolidating different sources for read is sensible,
and doable, while a uniform semantics and behavior for
write are complicated due to the inherent differences
across sources.
"""

def __init__(
self,
sources: dict[str, Source],
):
# we keep the sources strictly separate.
# the order here matters and represents the
# precedence rule
self._sources = sources

@property
def sources(self) -> MappingProxyType:
return MappingProxyType(self._sources)

def __len__(self):
return len(self.keys())

def __getitem__(self, key) -> Setting:
# TODO: go from the back
# - start with the Setting class instance we get
# - update a copy of this particular instance with all information
# from sources with higher priority and flatten it across
# sources
# - this gives the most relevant information
# - a default source could decide to run a dynamic default
# function at that point
item: Setting | None = None
for s in reversed(self._sources.values()):
update_item = None
try:
update_item = s[key]
except KeyError:
# source does not have it, proceed
continue
if item is None:
item = update_item
continue
# we run the update() method of the first item we ever found.
# this will practically make the type produced by the lowest
# precendence source define the behavior. This is typically
# some kind of implementation default
item.update(update_item)
if item is None:
# there was nothing
raise KeyError
return item

def __contains__(self, key):
return any(key in s for s in self._sources.values())

def keys(self) -> set[str]:
return set(chain.from_iterable(s.keys() for s in self._sources.values()))
Loading

0 comments on commit 208c0e1

Please sign in to comment.