feat: hierachical, multi-source settings manager

DRAFT
datalad · Sep 24, 2024 · 208c0e1 · 208c0e1
1 parent 7ce12b0
commit 208c0e1
Show file tree

Hide file tree

Showing 15 changed files with 735 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,5 @@
 dist/
-.coverage
+.coverage*
 docs/generated
 docs/_build
 *.swp

diff --git a/datasalad/settings/__init__.py b/datasalad/settings/__init__.py
@@ -0,0 +1,44 @@
+"""Hierarchical, multi-source settings management
+
+Validation of configuration item values
+
+
+There are two ways to do validation and type conversion.  on-access, or
+on-load. Doing it on-load would allow to reject invalid configuration
+immediately. But it might spend time on items that never get accessed.
+On-access might waste cycles on repeated checks, and possible complain later
+than useful. Here we nevertheless run a validator on-access in the default
+implementation. Particular sources may want to override this, or ensure that
+the stored value that is passed to a validator is already in the best possible
+form to make re-validation the cheapest.
+
+.. currentmodule:: datasalad.settings
+.. autosummary::
+   :toctree: generated
+
+   Settings
+   Setting
+   Source
+   CachingSource
+   Environment
+   Defaults
+
+"""
+
+from .defaults import Defaults
+from .env import Environment
+from .setting import Setting
+from .settings import Settings
+from .source import (
+    CachingSource,
+    Source,
+)
+
+__all__ = [
+    'CachingSource',
+    'Defaults',
+    'Environment',
+    'Setting',
+    'Settings',
+    'Source',
+]
diff --git a/datasalad/settings/defaults.py b/datasalad/settings/defaults.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+import logging
+
+from datasalad.settings.setting import (
+    DynamicSetting,
+    Setting,
+)
+from datasalad.settings.source import InMemorySettings
+
+lgr = logging.getLogger('datasalad.settings')
+
+
+class Defaults(InMemorySettings):
+    """
+    Defaults are not loaded from any source. Clients have to set any
+    items they want to see a default be known for. There would typically be
+    only one instance of this class, and it is then the true source of the
+    information by itself.
+    """
+    def __getitem__(self, key: str) -> Setting:
+        item = super().__getitem__(key)
+        return item.eval() if isinstance(item, DynamicSetting) else item
+
+    def __setitem__(self, key: str, value: Setting) -> None:
+        if key in self:
+            # resetting is something that is an unusual event.
+            # __setitem__ does not allow for a dedicated "force" flag,
+            # so we leave a message at least
+            lgr.debug('Resetting %r default', key)
+        super().__setitem__(key, value)
+
+    def __str__(self):
+        return 'Defaults'
diff --git a/datasalad/settings/env.py b/datasalad/settings/env.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+import logging
+from os import environ
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+
+from datasalad.settings.setting import Setting
+from datasalad.settings.source import Source
+
+if TYPE_CHECKING:
+    from collections.abc import Collection
+
+lgr = logging.getLogger('datasalad.settings')
+
+
+class Environment(Source):
+    """Process environment settings source
+
+    This is a stateless source implementation that gets and sets items directly
+    in the process environment.
+
+    Environment variables can be filtered by declaring a name prefix. More
+    complex filter rules can be implemented by replacing the
+    :meth:`include_var()` method in a subclass.
+
+    It is possible to transform environment variable name to setting keys (and
+    vice versa), by implementing the methods :meth:`get_key_from_varname()` and
+    :meth:`get_varname_from_key()`.
+    """
+
+    is_writable = True
+
+    def __init__(
+        self,
+        *,
+        var_prefix: str | None = None,
+    ):
+        super().__init__()
+        self._var_prefix = var_prefix
+
+    def reinit(self):
+        """Does nothing"""
+
+    def load(self) -> None:
+        """Does nothing"""
+
+    def __getitem__(self, key: str) -> Setting:
+        matching = {
+            k: v
+            for k, v in environ.items()
+            # search for any var that match the key when transformed
+            if self.include_var(name=k, value=v) and self.get_key_from_varname(k) == key
+        }
+        if not matching:
+            raise KeyError
+        if len(matching) > 1:
+            lgr.warning(
+                'Ambiguous key %r matching multiple ENV vars: %r',
+                key,
+                list(matching.keys()),
+            )
+        k, v = matching.popitem()
+        return Setting(value=v)
+
+    def __setitem__(self, key: str, value: Setting) -> None:
+        name = self.get_varname_from_key(key)
+        environ[name] = str(value.value)
+
+    def get(self, key, default: Any = None) -> Setting:
+        try:
+            return self[key]
+        except KeyError:
+            if isinstance(default, Setting):
+                return default
+            return Setting(value=default)
+
+    def keys(self) -> Collection:
+        """Returns all keys that can be determined from the environment"""
+        return {
+            self.get_key_from_varname(k)
+            for k, v in environ.items()
+            if self.include_var(name=k, value=v)
+        }
+
+    def __str__(self):
+        return f'Environment[{self._var_prefix}]' if self._var_prefix else 'Environment'
+
+    def __repr__(self):
+        # TODO: list keys?
+        return 'Environment()'
+
+    def include_var(
+        self,
+        name: str,
+        value: str,  # noqa: ARG002 (default implementation does not need it)
+    ) -> bool:
+        """Determine whether to source a setting from an environment variable
+
+        This default implementation tests whether the name of the variable
+        starts with the ``var_prefix`` given to the constructor.
+
+        Reimplement this method to perform custom tests.
+        """
+        return name.startswith(self._var_prefix or '')
+
+    def get_key_from_varname(self, name: str) -> str:
+        """Transform an environment variable name to a setting key
+
+        This default implementation performs returns the unchanged
+        name as a key.
+
+        Reimplement this method and ``get_varname_from_key()`` to perform
+        custom transformations.
+        """
+        return name
+
+    def get_varname_from_key(self, key: str) -> str:
+        """Transform a setting key to an environment variable name
+
+        This default implementation on checks for illegal names and
+        raises a ``ValueError``. Otherwise it returns the unchanged key.
+        """
+        if '=' in key or '\0' in key:
+            msg = "illegal environment variable name (contains '=' or NUL)"
+            raise ValueError(msg)
+        return key
diff --git a/datasalad/settings/setting.py b/datasalad/settings/setting.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+from typing import (
+    Any,
+    Callable,
+)
+
+
+class UnsetValue:
+    pass
+
+
+class Setting:
+    def __init__(
+        self,
+        value: Any | UnsetValue = UnsetValue,
+        *,
+        coercer: Callable | None = None,
+    ):
+        self._value = value
+        self._coercer = coercer
+
+    @property
+    def value(self) -> Any:
+        if self._coercer:
+            return self._coercer(self._value)
+        return self._value
+
+    @property
+    def coercer(self) -> Callable | None:
+        return self._coercer
+
+    def update(self, item: Setting) -> None:
+        for attr, unset in (
+            ('_value', UnsetValue),
+            ('_coercer', None),
+        ):
+            val = getattr(item, attr, unset)
+            if val is unset:
+                continue
+            setattr(self, attr, val)
+
+    def __str__(self) -> str:
+        return str(self.value)
+
+    def __repr__(self) -> str:
+        return repr(self.value)
+
+
+class DynamicSetting(Setting):
+    def __init__(
+        self,
+        value: Callable,
+        *,
+        coercer: Callable | None = None,
+    ):
+        super().__init__(
+            value=value,
+            coercer=coercer,
+        )
+
+    @property
+    def value(self) -> Any:
+        return self.eval().value
+
+    def eval(self) -> Setting:
+        return Setting(
+            # do the dynamic evaluation
+            self._value(),
+            coercer=self.coercer,
+        )
+
diff --git a/datasalad/settings/settings.py b/datasalad/settings/settings.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from itertools import chain
+from types import MappingProxyType
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:  # pragma: nocover
+    from datasalad.setttings import (
+        Setting,
+        Source,
+    )
+
+
+class Settings:
+    """Query different sources of configuration settings
+
+    This is query-centered. Manipulation is supported by
+    by individual configuration source implementations.
+    This separation is done for two reasons. 1) Query is
+    a much more frequent operation than write, and
+    2) consolidating different sources for read is sensible,
+    and doable, while a uniform semantics and behavior for
+    write are complicated due to the inherent differences
+    across sources.
+    """
+
+    def __init__(
+        self,
+        sources: dict[str, Source],
+    ):
+        # we keep the sources strictly separate.
+        # the order here matters and represents the
+        # precedence rule
+        self._sources = sources
+
+    @property
+    def sources(self) -> MappingProxyType:
+        return MappingProxyType(self._sources)
+
+    def __len__(self):
+        return len(self.keys())
+
+    def __getitem__(self, key) -> Setting:
+        # TODO: go from the back
+        # - start with the Setting class instance we get
+        # - update a copy of this particular instance with all information
+        #   from sources with higher priority and flatten it across
+        #   sources
+        # - this gives the most relevant information
+        # - a default source could decide to run a dynamic default
+        #   function at that point
+        item: Setting | None = None
+        for s in reversed(self._sources.values()):
+            update_item = None
+            try:
+                update_item = s[key]
+            except KeyError:
+                # source does not have it, proceed
+                continue
+            if item is None:
+                item = update_item
+                continue
+            # we run the update() method of the first item we ever found.
+            # this will practically make the type produced by the lowest
+            # precendence source define the behavior. This is typically
+            # some kind of implementation default
+            item.update(update_item)
+        if item is None:
+            # there was nothing
+            raise KeyError
+        return item
+
+    def __contains__(self, key):
+        return any(key in s for s in self._sources.values())
+
+    def keys(self) -> set[str]:
+        return set(chain.from_iterable(s.keys() for s in self._sources.values()))