-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: classes to represent (bare) Git repositories and worktrees
There are no direct equivalents in legacy DataLad for the classes `Repo` and `Worktree`. The closest would be `GitRepo`, but this is a compound representation of a Git repository with an associated checkout. In comparison, `Repo` and `Worktree` is more focused, simpler models with leaner implementations. A `Repo` is a Git repository (location). It is conceptually identical to what is called GIT-DIR in the Git documentation. It refers to just the "bare" components of a repository, not to any potential checkout. A `Repo` features a dedicated `ConfigManager` that is derived from a global instances, sharing all common sources (actually using the exact same source instances). It adds a `git-local` and a `datalad-branch` configuration scope. The latter is added for semantic compatibility with legacy DataLad, which reads the committed configuration from the configured HEAD branch of a bare repo also. A `Worktree` is a primary checkout (of a non-bare) Git repository, or any (linked) additional Git worktree. A `Worktree` always has an associated `Repo`, accessible via its `.repo` property. With `extensions.worktreeConfig` enabled in Git, `Worktree` provides a tailored `ConfigManager` with an additional `git-worktree` scope. Otherwise is uses the manager of the underlying `Repo`. Both classes implement the "flyweight" pattern, like it is done in legacy DataLad. This means that, within the same process, creating instances of `Repo` and `Worktree` always yields the same instance for the same path location. This enables a straightforward implementation of cleanup routines, locking, but also sharing `ConfigManager` instances and their sources. In comparison to legacy DataLad, the flyweight pattern implementation is simplified. Only classes with a single `path` parameter are supported. Importantly, neither `Repo` nor `Worktree` class constructors support the actual creation of a repository or worktree "on disk".
- Loading branch information
Showing
10 changed files
with
498 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
"""Repository and worktree representations | ||
.. currentmodule:: datalad_core.repo | ||
.. autosummary:: | ||
:toctree: generated | ||
Repo | ||
Worktree | ||
""" | ||
|
||
__all__ = [ | ||
'Repo', | ||
'Worktree', | ||
] | ||
|
||
from .repo import Repo | ||
from .worktree import Worktree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import threading | ||
from abc import abstractmethod | ||
from pathlib import Path | ||
|
||
|
||
class PathBasedFlyweight(type): | ||
"""Metaclass for a path-based flyweight pattern | ||
See `https://en.wikipedia.org/wiki/Flyweight_pattern`_ for information | ||
on the pattern. | ||
There is a companion class :class:`Flyweighted`, which can be used as | ||
a base class for classes implementing this pattern. | ||
This implementation integrates the "factory" into the actual classes. | ||
Consuming code generally need not be aware of the flyweight pattern. | ||
To use this pattern, add this class as a metaclass to the class it shall | ||
be used with. Additionally there needs to be a class attribute | ||
`_unique_instances`, which should be a `WeakValueDictionary`. | ||
""" | ||
|
||
# to avoid parallel creation of (identical) instances | ||
_lock = threading.Lock() | ||
|
||
# ATM the implementation relies on the fact that the only | ||
# constructor argument to determine the identity of | ||
# a flyweighted entity is a `path`. As soon as we need | ||
# to add to this set of argument, this (or a derived) | ||
# implementation must be amended to ensure that we can | ||
# correctly tell if and when an instance can be treated | ||
# as "same" | ||
def __call__(cls, path: Path): | ||
id_ = path.absolute() | ||
|
||
# Thread lock following block so we do not fall victim to race | ||
# condition across threads trying to instantiate multiple instances. In | ||
# principle we better have a lock per id_ but that mean we might race | ||
# at getting "name specific lock" (Yarik did not research much), so | ||
# keeping it KISS -- just lock instantiation altogether, but could be | ||
# made smarter later on. | ||
with cls._lock: | ||
# ignore typing, because MIH does not know how to say that | ||
# `cls` is required to have a particular class attribute | ||
instance = cls._unique_instances.get(id_, None) # type: ignore | ||
if instance is None or not instance.flyweight_valid(): | ||
# we have no such instance yet or the existing one is | ||
# invalidated, so we instantiate. | ||
# Importantly, we take any args at face-value and do not | ||
# let generic code fiddle with them to preserve any and | ||
# all semantics of the instantiated class | ||
instance = type.__call__(cls, path) | ||
# ignore typing, because MIH does not know how to say that | ||
# `cls` is required to have a particular class attribute | ||
cls._unique_instances[id_] = instance # type: ignore | ||
|
||
return instance | ||
|
||
|
||
class Flyweighted: | ||
def __hash__(self): | ||
# the flyweight key is already determining unique instances | ||
# add the class name to distinguish from strings of a path | ||
return hash((self.__class__.__name__, self.__weakref__.key)) | ||
|
||
@classmethod | ||
def _close(cls, path): | ||
"""Finalize/clean-up when a flyweighted instance is garbage-collected | ||
This default implementation does nothing. | ||
This is a classmethod and not an instance method, and we also cannot | ||
accept any `self`-type arguments. This would create an additional | ||
reference to the object and thereby preventing it from being collected | ||
at all. | ||
""" | ||
|
||
@abstractmethod | ||
def flyweight_valid(self): | ||
"""Tests a cached instance whether it continues to be good to reuse | ||
This test runs on every object creation and should be kept as cheap as | ||
possible. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
""" """ | ||
|
||
from pathlib import Path | ||
from weakref import WeakValueDictionary | ||
|
||
from datalad_core.repo.flyweight import ( | ||
Flyweighted, | ||
PathBasedFlyweight, | ||
) | ||
from datalad_core.runners import ( | ||
call_git_lines, | ||
call_git_success, | ||
) | ||
|
||
|
||
class GitManaged(Flyweighted, metaclass=PathBasedFlyweight): | ||
"""Base class for more specialized Git-managed entities | ||
This class is primarily a technical helper for hosting common | ||
implementations. | ||
""" | ||
|
||
# flyweights | ||
_unique_instances: WeakValueDictionary = WeakValueDictionary() | ||
|
||
def __init__(self, path: Path): | ||
self._path = path.absolute() | ||
self._git_dir = None | ||
self._git_common_dir = None | ||
|
||
def __str__(self): | ||
return f'{self.__class__.__name__}({self._path})' | ||
|
||
def __repr__(self): | ||
return f'{self.__class__.__name__}({self._path!r})' | ||
|
||
def flyweight_valid(self): | ||
"""Test continued validity is an instance | ||
The test is performed by running ``git rev-parse --git-dir``, which | ||
would fail if the location is not (or no longer) managed by Git. | ||
""" | ||
# the ideas being that as long as Git itself can report a GITDIR, we | ||
# can assume to continue to be in a location managed by Git. | ||
# Derived classes may want to override this implementation with | ||
# something more specific (e.g., worktree did not move, etc) | ||
return call_git_success(['rev-parse', '--git-dir']) | ||
|
||
@property | ||
def path(self) -> Path: | ||
"""Absolute path of the Git-managed location""" | ||
# this is a property wrapper, because we may want to introduce | ||
# some invalidation tests at some point | ||
return self._path | ||
|
||
@property | ||
def git_dir(self): | ||
"""Path to the associated ``.git`` directory""" | ||
if self._git_dir is None: | ||
self._get_git_props() | ||
return self._git_dir | ||
|
||
@property | ||
def git_common_dir(self): | ||
"""Path to the associated common ``.git`` directory | ||
This will be identical to :attr:`git_dir`, except for a linked | ||
worktree with enabled ``extensions.worktreeConfig`` flag. | ||
""" | ||
if self._git_common_dir is None: | ||
self._get_git_props() | ||
return self._git_common_dir | ||
|
||
def _get_git_props(self): | ||
# pull a set of properties at once, assuming that one call is cheaper | ||
# than multiple | ||
prop_specs = ( | ||
('--git-dir', '_git_dir', Path), | ||
('--git-common-dir', '_git_common_dir', Path), | ||
) | ||
for spec, val in zip( | ||
prop_specs, | ||
call_git_lines( | ||
[ | ||
'-C', | ||
str(self._path), | ||
'rev-parse', | ||
'--path-format=absolute', | ||
*(p[0] for p in prop_specs), | ||
] | ||
), | ||
): | ||
setattr(self, spec[1], spec[2](val)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
from weakref import finalize | ||
|
||
if TYPE_CHECKING: | ||
from pathlib import Path | ||
|
||
from datasalad.settings import Settings | ||
|
||
from datalad_core.config import ( | ||
ConfigItem, | ||
ConfigManager, | ||
DataladBranchConfig, | ||
LocalGitConfig, | ||
get_manager, | ||
) | ||
from datalad_core.repo.gitmanaged import GitManaged | ||
|
||
|
||
class Repo(GitManaged): | ||
"""(The bare part of) an existing repository""" | ||
|
||
def __init__(self, path: Path): | ||
""" | ||
``path`` is the path to an existing repository (Git dir). | ||
""" | ||
super().__init__(path) | ||
self._config: ConfigManager | None = None | ||
|
||
# TODO: sanity check with: `git rev-parse --is-inside-git-dir <path>`? | ||
|
||
# Register a finalizer (instead of having a __del__ method). This will | ||
# be called by garbage collection as well as "atexit". By keeping the | ||
# reference here, we could also call it explicitly... eventually | ||
self._finalizer = finalize(self, Repo._close, self.path) | ||
|
||
@property | ||
def config(self) -> ConfigManager: | ||
"""Returns a ``ConfigManager`` tailored to the repository | ||
The returned instance reuses all source instances of the global | ||
manager. In addition, a :class:`LocalGitConfig`, and | ||
:class:`DataladBranchConfig` source are included in the list of | ||
scopes. The order of sources is: | ||
- ``git-command``: :class:`GitEnvironment` | ||
- ``git-local``: :class:`LocalGitConfig` | ||
- ``git-global``: :class:`GlobalGitConfig` | ||
- ``git-system``: :class:`SystemGitConfig` | ||
- ``datalad-branch``: :class:`DataladBranchConfig` | ||
- ``defaults``: :class:`ImplementationDefaults` | ||
""" | ||
if self._config is None: | ||
gman = get_manager() | ||
# would raise ValueError, if there is no repo at `path` | ||
loc = LocalGitConfig(self.path) | ||
dlbranch = DataladBranchConfig(self.path) | ||
for s in (loc, dlbranch): | ||
s.item_type = ConfigItem | ||
# we want to bypass all the source creations in the constructor, | ||
# and instead reuse them here to get cheap synchronization with | ||
# a "parent" manager | ||
lman = Settings.__new__(ConfigManager) | ||
Settings.__init__( | ||
lman, | ||
{ | ||
'git-command': gman.sources['git-command'], | ||
'git-local': loc, | ||
'git-global': gman.sources['git-global'], | ||
'git-system': gman.sources['git-system'], | ||
'datalad-branch': dlbranch, | ||
'defaults': gman.sources['defaults'], | ||
}, | ||
) | ||
self._config = lman | ||
return self._config |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from ..repo import Repo | ||
|
||
|
||
def test_repo(baregitrepo): | ||
repo = Repo(baregitrepo) | ||
assert str(repo) == f'Repo({baregitrepo})' | ||
assert repr(repo) == f'Repo({baregitrepo!r})' | ||
|
||
assert repo.config['core.bare'].value is True | ||
# do again to test cached retrieval (partial coverage) | ||
assert repo.config['core.bare'].value is True | ||
|
||
assert repo.path is baregitrepo | ||
assert repo.git_dir == baregitrepo | ||
assert repo.git_common_dir == baregitrepo | ||
# and again for the cached retrieval | ||
assert repo.git_dir == baregitrepo | ||
assert repo.git_common_dir == baregitrepo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
from datalad_core.config import ConfigItem | ||
from datalad_core.runners import call_git | ||
|
||
from ..worktree import Worktree | ||
|
||
|
||
def test_worktree(gitrepo): | ||
wt = Worktree(gitrepo) | ||
assert str(wt) == f'Worktree({gitrepo})' | ||
assert repr(wt) == f'Worktree({gitrepo!r})' | ||
assert str(gitrepo / '.git' / 'config') in str(wt.config) | ||
assert str(gitrepo / '.git' / 'config') in repr(wt.config) | ||
# sources without content are not reported | ||
assert 'DataladBranchConfig' not in str(wt.config) | ||
# only in repr() | ||
assert 'DataladBranchConfig' in repr(wt.config) | ||
|
||
# main worktree shares config manager with repo | ||
assert wt.config is wt.repo.config | ||
assert wt.config['core.bare'].value is False | ||
assert wt.path is gitrepo | ||
assert wt.repo.path != wt.path | ||
|
||
|
||
def test_secondary_worktree(gitrepo): | ||
test_key = 'brand.new.key' | ||
test_key2 = 'other.brand.new.key' | ||
branch = 'dummy' | ||
wt_path = gitrepo.parent / branch | ||
call_git( | ||
[ | ||
'-C', | ||
str(gitrepo), | ||
'worktree', | ||
'add', | ||
str(wt_path), | ||
] | ||
) | ||
wt1 = Worktree(gitrepo) | ||
wt2 = Worktree(wt_path) | ||
# and the repo is represented by the very same instance | ||
assert wt1.repo is wt2.repo | ||
assert wt1.config is wt2.config is wt1.repo.config | ||
|
||
# now enable a dedicated worktree config | ||
wt1.config.sources['git-local'][ | ||
# we use snake casing here. the section/variable | ||
# name is case-insensitive, and it must not matter | ||
'extensions.worktreeConfig' | ||
] = ConfigItem('true') | ||
wt1.enable_worktree_config() | ||
# we have to enable it for all linked worktrees manually. | ||
# the alternative would be a complex search/tracking of | ||
# related worktree instances, or a continuous on-access | ||
# reevalutation. Both are not worth the trouble, given that | ||
# such setup changes are presumbably rare | ||
wt2.enable_worktree_config() | ||
# and run it again, just to see that it does no harm | ||
wt2.enable_worktree_config() | ||
# and the repo is represented by the very same instance | ||
assert wt1.repo is wt2.repo | ||
# but the worktree config is no longer the same | ||
assert wt1.config is not wt2.config | ||
# setting the same key with different value in both worktrees | ||
wt1.config.sources['git-worktree'][test_key] = ConfigItem('wt1') | ||
wt2.config.sources['git-worktree'][test_key] = ConfigItem('wt2') | ||
assert wt1.config[test_key].value == 'wt1' | ||
assert wt2.config[test_key].value == 'wt2' | ||
# wt2's worktree scope is not "chained" after wt1, there is just | ||
# the one setting we found above | ||
assert len(wt2.config.getall(test_key)) == 1 | ||
wt2.config.sources['git-worktree'].reinit().load() | ||
assert len(wt2.config.getall(test_key)) == 1 | ||
# only set through the main worktree, but into the shared local scope | ||
assert test_key2 not in wt1.config | ||
assert test_key2 not in wt2.config | ||
wt1.config.sources['git-local'][test_key2] = ConfigItem('shared') | ||
assert test_key2 in wt1.config | ||
assert test_key2 in wt2.config | ||
|
||
# check that we can use Repo/Worktree objects as dict keys | ||
# (e.g. to group some results by repo/worktree) | ||
rwd = { | ||
wt1: 'wt1', | ||
wt2: 'wt2', | ||
wt1.repo: 'repo', | ||
} | ||
assert rwd[wt1] == 'wt1' | ||
assert rwd[wt2] == 'wt2' | ||
assert rwd[wt1.repo] == 'repo' |
Oops, something went wrong.