From 9a5c7b7c006cf2c8d949a04618991c67807cc654 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 31 Oct 2024 08:29:02 +0100 Subject: [PATCH 1/6] doc: add missing documentation on annex initialization --- datalad_core/repo/repo.py | 15 ++++++++++++++- datalad_core/repo/utils.py | 11 ++++++++++- datalad_core/repo/worktree.py | 15 +++++++++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/datalad_core/repo/repo.py b/datalad_core/repo/repo.py index 1d0e1e3..663ca93 100644 --- a/datalad_core/repo/repo.py +++ b/datalad_core/repo/repo.py @@ -99,7 +99,20 @@ def init_annex( *, autoenable_remotes: bool = True, ) -> BareRepoAnnex: - """ """ + """Initialize an annex in the repository + + This is done be calling ``git annex init``. If an annex already exists, + it will be reinitialized. + + The ``description`` parameter can be used to label the annex. + Otherwise, git-annex will auto-generate a description based on + username, hostname and the path of the repository. + + The boolean flag ``autoenable_remotes`` controls whether or not + git-annex should honor a special remote's configuration to get + auto-enable on initialization. + """ + # refuse for non-bare if self.config.get('core.bare', False).value is False: msg = ( 'Cannot initialize annex in a non-bare repository, ' diff --git a/datalad_core/repo/utils.py b/datalad_core/repo/utils.py index a543dfc..15efaa7 100644 --- a/datalad_core/repo/utils.py +++ b/datalad_core/repo/utils.py @@ -14,7 +14,16 @@ def init_annex_at( description: str | None = None, autoenable_remotes: bool = True, ) -> None: - """Call ``git-annex init`` at a given ``path``""" + """Call ``git-annex init`` at a given ``path`` + + The ``description`` parameter can be used to label the annex. Otherwise, + git-annex will auto-generate a description based on username, hostname, and + the path of the repository. + + The boolean flag ``autoenable_remotes`` controls whether or not + git-annex should honor a special remote's configuration to get + auto-enabled on initialization. + """ cmd = ['init'] if not autoenable_remotes: # no, we do not set --autoenable, this is a RepoAnnex feature diff --git a/datalad_core/repo/worktree.py b/datalad_core/repo/worktree.py index 5ad9542..4d5c605 100644 --- a/datalad_core/repo/worktree.py +++ b/datalad_core/repo/worktree.py @@ -162,8 +162,19 @@ def init_annex( *, autoenable_remotes: bool = True, ) -> Annex: - """ """ - # refuse for non-bare + """Initialize an annex in the repository associated with the worktree + + This is done be calling ``git annex init``. If an annex already exists, + it will be reinitialized. + + The ``description`` parameter can be used to label the annex. + Otherwise, git-annex will auto-generate a description based on + username, hostname and the path of the repository. + + The boolean flag ``autoenable_remotes`` controls whether or not + git-annex should honor a special remote's configuration to get + auto-enable on initialization. + """ init_annex_at( self.path, description=description, From d2689c790c8c9a394c0406623a347e0f4d6c1d68 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 1 Nov 2024 07:32:14 +0100 Subject: [PATCH 2/6] feat: enhance `call_git()` to return captured output --- datalad_core/runners/git.py | 20 +++++++++++++++----- datalad_core/runners/tests/test_callgit.py | 7 +++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/datalad_core/runners/git.py b/datalad_core/runners/git.py index a2794c4..83e0291 100644 --- a/datalad_core/runners/git.py +++ b/datalad_core/runners/git.py @@ -76,8 +76,9 @@ def call_git( *, cwd: Path | None = None, force_c_locale: bool = False, + text: bool | None = None, capture_output: bool = False, -) -> None: +) -> str | bytes | None: """Call Git with no output capture, raises on non-zero exit. If ``cwd`` is not None, the function changes the working directory to @@ -87,17 +88,26 @@ def call_git( is altered to ensure output according to the C locale. This is useful when output has to be processed in a locale invariant fashion. - If ``capture_output`` is ``True``, process output is captured. This is - necessary for reporting any error messaging via a ``CommandError`` exception. - By default process output is not captured. + If ``capture_output`` is ``True``, process output is captured (and not + relayed to the parent process/terminal). This is necessary for reporting + any error messaging via a ``CommandError`` exception. By default process + output is not captured. + + All other argument are pass on to ``subprocess.run()`` verbatim. + + If ``capture_output`` is enabled, the captured STDOUT is returned as + ``str`` or ``bytes``, depending on the value of ``text``. Otherwise + ``None`` is returned to indicate that no output was captured. """ - _call_git( + res = _call_git( args, capture_output=capture_output, cwd=cwd, check=True, + text=text, force_c_locale=force_c_locale, ) + return res.stdout if capture_output else None def call_git_success( diff --git a/datalad_core/runners/tests/test_callgit.py b/datalad_core/runners/tests/test_callgit.py index 008fa77..bcfbe00 100644 --- a/datalad_core/runners/tests/test_callgit.py +++ b/datalad_core/runners/tests/test_callgit.py @@ -11,8 +11,11 @@ def test_call_git(): - # smoke test - call_git(['--version']) + # by default no output is captured + assert call_git(['--version']) is None + # capture gives bytes unless text=True + assert b'version' in call_git(['--version'], capture_output=True) + assert 'version' in call_git(['--version'], text=True, capture_output=True) # raises properly with pytest.raises(CommandError): call_git(['notacommand']) From 6ff8e4165161c0b6bfe82f030db99773e0c71957 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 1 Nov 2024 07:34:33 +0100 Subject: [PATCH 3/6] feat: add test helper `call_git_addcommit()` This is a poor-man's datalad-save. We use this instead, because we do not actually want to have the inner workings of the tests depend on the very core of the functionality they are supposed to test. This helper merely calls git-add and git-commit in direct succession. For most practical applications is can immediately replace datalad-save, and it is faster. --- datalad_core/tests/__init__.py | 5 +++++ datalad_core/tests/utils.py | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 datalad_core/tests/utils.py diff --git a/datalad_core/tests/__init__.py b/datalad_core/tests/__init__.py index e69de29..46dacca 100644 --- a/datalad_core/tests/__init__.py +++ b/datalad_core/tests/__init__.py @@ -0,0 +1,5 @@ +__all__ = [ + 'call_git_addcommit', +] + +from .utils import call_git_addcommit diff --git a/datalad_core/tests/utils.py b/datalad_core/tests/utils.py new file mode 100644 index 0000000..72134ce --- /dev/null +++ b/datalad_core/tests/utils.py @@ -0,0 +1,22 @@ +from pathlib import ( + Path, +) +from datalad_core.runners import ( + call_git, +) + + +def call_git_addcommit( + cwd: Path, + paths: list[str | PurePath] | None = None, + *, + msg: str | None = None, +): + if paths is None: + paths = ['.'] + + if msg is None: + msg = 'done by call_git_addcommit()' + + call_git(['add'] + [str(p) for p in paths], cwd=cwd, capture_output=True) + call_git(['commit', '-m', msg], cwd=cwd, capture_output=True) From b586815d2c88b48cae1a8cf7aae0b91faf0a0eb4 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 1 Nov 2024 07:46:53 +0100 Subject: [PATCH 4/6] feat: add test fixtures related to symlink capabilities The fixtures - `symlinks_supported` - `skip_when_symlinks_not_supported` simplified test implementations that needs to behave differently (or not at all) when symlinks are not available on the filesystem the tests are using. --- conftest.py | 6 ++++++ datalad_core/tests/fixtures.py | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/conftest.py b/conftest.py index 45e8800..fc99839 100644 --- a/conftest.py +++ b/conftest.py @@ -6,6 +6,8 @@ 'baregitrepo', 'annexrepo', 'gitrepo', + 'skip_when_symlinks_not_supported', + 'symlinks_supported', 'verify_pristine_gitconfig_global', ] @@ -21,6 +23,10 @@ cfgman, # function-scope temporary Git repo gitrepo, + # function-scope auto-skip when `symlinks_supported` is False + skip_when_symlinks_not_supported, + # session-scope flag if symlinks are supported in test directories + symlinks_supported, # verify no test leave contaminated config behind verify_pristine_gitconfig_global, ) diff --git a/datalad_core/tests/fixtures.py b/datalad_core/tests/fixtures.py index f1a5f02..7fcad1c 100644 --- a/datalad_core/tests/fixtures.py +++ b/datalad_core/tests/fixtures.py @@ -140,3 +140,24 @@ def annexrepo(gitrepo) -> Generator[Path]: capture_output=True, ) return gitrepo + + +@pytest.fixture(autouse=False, scope='session') +def symlinks_supported(tmp_path_factory) -> bool: + """Returns whether creating symlinks is supported in test directories""" + testdir = tmp_path_factory.mktemp('symlink_check') + target = testdir / 'target' + source = testdir / 'source' + try: + target.touch() + source.symlink_to(target) + except Exception: # noqa: BLE001 + return False + return True + + +@pytest.fixture(autouse=False, scope='function') # noqa: PT003 +def skip_when_symlinks_not_supported(symlinks_supported): + if not symlinks_supported: + msg = 'skipped, symlinks are not supported in the test directory' + raise pytest.skip(msg) From 9b2bb1f4706ceb3f79cd27bd3fb91f7566b6f427 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 1 Nov 2024 07:52:00 +0100 Subject: [PATCH 5/6] feat: test helper `modify_dataset()` and `modified_dataset` This is a more flexible and more robust variant of the datalad-next fixture `modified_dataset`, split into a test helper and the actual fixture. Unlike the datalad-next version, it does not require any datalad commands to work, just plain Git. --- conftest.py | 3 + datalad_core/tests/fixtures.py | 87 +++++++++++++++++- datalad_core/tests/test_fixtures.py | 6 ++ datalad_core/tests/test_utils.py | 12 +++ datalad_core/tests/utils.py | 135 +++++++++++++++++++++++++++- 5 files changed, 239 insertions(+), 4 deletions(-) create mode 100644 datalad_core/tests/test_fixtures.py create mode 100644 datalad_core/tests/test_utils.py diff --git a/conftest.py b/conftest.py index fc99839..caef96e 100644 --- a/conftest.py +++ b/conftest.py @@ -6,6 +6,7 @@ 'baregitrepo', 'annexrepo', 'gitrepo', + 'modified_dataset', 'skip_when_symlinks_not_supported', 'symlinks_supported', 'verify_pristine_gitconfig_global', @@ -23,6 +24,8 @@ cfgman, # function-scope temporary Git repo gitrepo, + # session-scope repository with complex unsafed modifications + modified_dataset, # function-scope auto-skip when `symlinks_supported` is False skip_when_symlinks_not_supported, # session-scope flag if symlinks are supported in test directories diff --git a/datalad_core/tests/fixtures.py b/datalad_core/tests/fixtures.py index 7fcad1c..2809c17 100644 --- a/datalad_core/tests/fixtures.py +++ b/datalad_core/tests/fixtures.py @@ -14,7 +14,11 @@ import pytest from datalad_core.config import get_manager -from datalad_core.runners import call_git +from datalad_core.runners import ( + call_git, + call_git_lines, +) +from datalad_core.tests.utils import modify_dataset magic_marker = 'c4d0de12-8008-11ef-86ea-3776083add61' standard_gitconfig = f"""\ @@ -161,3 +165,84 @@ def skip_when_symlinks_not_supported(symlinks_supported): if not symlinks_supported: msg = 'skipped, symlinks are not supported in the test directory' raise pytest.skip(msg) + + +@pytest.fixture(scope='session') +def modified_dataset(tmp_path_factory): + """Produces a dataset with various modifications + + The fixture is module-scope, aiming to be reused by many tests focused + on reporting. It does not support any further modification. The fixture + will fail, if any such modification is detected. Use the helper + ``modify_dataset()`` to apply these modification to an existing repository + to circumwent these restriction. + + ``git status`` will report:: + + > git status -uall + On branch dl-test-branch + Changes to be committed: + (use "git restore --staged ..." to unstage) + new file: dir_m/file_a + new file: file_a + new file: file_am + + Changes not staged for commit: + (use "git add/rm ..." to update what will be committed) + (use "git restore ..." to discard changes in working directory) + (commit or discard the untracked or modified content in submodules) + deleted: dir_d/file_d + deleted: dir_m/file_d + modified: dir_m/file_m + deleted: dir_sm/sm_d + modified: dir_sm/sm_m (modified content) + modified: dir_sm/sm_mu (modified content, untracked content) + modified: dir_sm/sm_n (new commits) + modified: dir_sm/sm_nm (new commits, modified content) + modified: dir_sm/sm_nmu (new commits, modified content, untracked content) + modified: dir_sm/sm_u (untracked content) + modified: file_am + deleted: file_d + modified: file_m + + Untracked files: + (use "git add ..." to include in what will be committed) + dir_m/dir_u/file_u + dir_m/file_u + dir_u/file_u + file_u + + + Suffix indicates the ought-to state (multiple possible): + + a - added + c - clean + d - deleted + n - new commits + m - modified + u - untracked content + + Prefix indicated the item type: + + file - file + sm - submodule + dir - directory + """ + path = tmp_path_factory.mktemp('gitrepo') + call_git( + ['init'], + cwd=path, + capture_output=True, + ) + # the returned status promise is the comparison reference + status_promise = modify_dataset(path).splitlines() + + yield path + # compare with initial git-status output, if there are any + # differences the assumptions of any consuming test could be + # invalidated. The modifying code must be found and fixed + if status_promise != call_git_lines( + ['status', '-uall', '--porcelain=v1'], cwd=path + ): + msg = 'Unexpected modification of the testbed' + raise AssertionError(msg) diff --git a/datalad_core/tests/test_fixtures.py b/datalad_core/tests/test_fixtures.py new file mode 100644 index 0000000..1157d9a --- /dev/null +++ b/datalad_core/tests/test_fixtures.py @@ -0,0 +1,6 @@ +def test_modified_dataset_fixture(modified_dataset): + assert modified_dataset + # the test is to do nothing. + # the dataset modification will be done, the promised capture + # before the test starts. the test will then do nothing, and + # the promise is reevaluated at the end and must succeed. diff --git a/datalad_core/tests/test_utils.py b/datalad_core/tests/test_utils.py new file mode 100644 index 0000000..4113cc4 --- /dev/null +++ b/datalad_core/tests/test_utils.py @@ -0,0 +1,12 @@ +from datalad_core.runners import call_git +from datalad_core.tests.utils import modify_dataset + + +def test_modify_dataset(gitrepo): + promise = modify_dataset(gitrepo) + assert promise == call_git( + ['status', '-uall', '--porcelain=v1'], + cwd=gitrepo, + capture_output=True, + text=True, + ) diff --git a/datalad_core/tests/utils.py b/datalad_core/tests/utils.py index 72134ce..2783761 100644 --- a/datalad_core/tests/utils.py +++ b/datalad_core/tests/utils.py @@ -1,6 +1,16 @@ -from pathlib import ( - Path, -) +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import ( + Path, + PurePath, + ) + +from shutil import rmtree + +from datalad_core.repo import Worktree from datalad_core.runners import ( call_git, ) @@ -20,3 +30,122 @@ def call_git_addcommit( call_git(['add'] + [str(p) for p in paths], cwd=cwd, capture_output=True) call_git(['commit', '-m', msg], cwd=cwd, capture_output=True) + + +# Target `git status -uall --porcelain=v1` of `modify_dataset()` result +modify_dataset_promise = """\ + D dir_d/file_d +A dir_m/file_a + D dir_m/file_d + M dir_m/file_m + D dir_sm/sm_d + M dir_sm/sm_m + M dir_sm/sm_mu + M dir_sm/sm_n + M dir_sm/sm_nm + M dir_sm/sm_nmu + M dir_sm/sm_u +A file_a +AM file_am + D file_d + M file_m +?? dir_m/dir_u/file_u +?? dir_m/file_u +?? dir_u/file_u +?? file_u +""" + + +def modify_dataset(path: Path) -> str: + """Applies the modification for the ``modified_dataset`` fixture + + ``path`` is a directory in an existing Git repository. + + This is provided as a separate function for the case where the + modification themselves need to be modified. The fixture checks + that this does not happen. + + The function returns the ``git status -uall --porcelain=v1`` + report that it aimed to create. This is not a status report + queried after this function ran, but a "promise" that can be used + to inspect the state of a repository. + """ + ds_dir = path / 'dir_m' + ds_dir_d = path / 'dir_d' + dirsm = path / 'dir_sm' + for d in (ds_dir, ds_dir_d, dirsm): + d.mkdir() + (ds_dir / 'file_m').touch() + (path / 'file_m').touch() + dss: dict[str, Path] = {} + smnames = ( + 'sm_d', + 'sm_c', + 'sm_n', + 'sm_m', + 'sm_nm', + 'sm_u', + 'sm_mu', + 'sm_nmu', + 'droppedsm_c', + ) + for smname in smnames: + sds_path = dirsm / smname + sds_path.mkdir() + sds = Worktree.init_at(sds_path) + # we need some content for a commit + (sds_path / '.gitkeep').touch() + # for the plain modification, commit the reference right here + if smname in ('sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'): + (sds_path / 'file_m').touch() + call_git_addcommit(sds_path) + dss[smname] = sds.path + call_git( + ['submodule', 'add', f'./{sds_path}', f'{sds_path.relative_to(path)}'], + cwd=path, + capture_output=True, + ) + # files in superdataset to be deleted + for d in (ds_dir_d, ds_dir, path): + (d / 'file_d').touch() + dss['.'] = path + dss['dir'] = ds_dir + call_git_addcommit(path) + call_git( + ['submodule', 'deinit', '--force', str(dirsm / 'droppedsm_c')], + cwd=path, + capture_output=True, + ) + # a new commit + for smname in ('.', 'sm_n', 'sm_nm', 'sm_nmu'): + sub = dss[smname] + (sub / 'file_c').touch() + call_git_addcommit(sub) + # modified file + for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'): + pobj = dss[smname] + (pobj / 'file_m').write_text('modify!') + # untracked + for smname in ('.', 'dir', 'sm_u', 'sm_mu', 'sm_nmu'): + pobj = dss[smname] + (pobj / 'file_u').touch() + (pobj / 'dirempty_u').mkdir() + (pobj / 'dir_u').mkdir() + (pobj / 'dir_u' / 'file_u').touch() + # delete items + rmtree(dss['sm_d']) + rmtree(ds_dir_d) + (ds_dir / 'file_d').unlink() + (path / 'file_d').unlink() + # added items + for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'): + pobj = dss[smname] + (pobj / 'file_a').write_text('added') + call_git(['add', 'file_a'], cwd=pobj, capture_output=True) + # added and then modified file + file_am_obj = path / 'file_am' + file_am_obj.write_text('added') + call_git(['add', 'file_am'], cwd=path, capture_output=True) + file_am_obj.write_text('modified') + + return modify_dataset_promise From 5a4541a802e499f375a32b0f3e9caa2258b6a988 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 1 Nov 2024 09:41:18 +0100 Subject: [PATCH 6/6] feat: `rmtree()` test helper This is no more than a wrapper around `shutil.rmtree()`, but with an error handler that tries to set write permissions on error. This is a common-enough fix needed on the windows platform that it seems worth providing a helper for. Legacy DataLad also has one, but it's implementation seems to be very much stuck in Python 2 times. --- datalad_core/repo/tests/test_repo.py | 4 ++-- datalad_core/tests/__init__.py | 6 +++++- datalad_core/tests/utils.py | 31 +++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/datalad_core/repo/tests/test_repo.py b/datalad_core/repo/tests/test_repo.py index 65e4f1f..cea32d9 100644 --- a/datalad_core/repo/tests/test_repo.py +++ b/datalad_core/repo/tests/test_repo.py @@ -1,7 +1,7 @@ -from shutil import rmtree - import pytest +from datalad_core.tests import rmtree + from ..repo import Repo diff --git a/datalad_core/tests/__init__.py b/datalad_core/tests/__init__.py index 46dacca..95b341b 100644 --- a/datalad_core/tests/__init__.py +++ b/datalad_core/tests/__init__.py @@ -1,5 +1,9 @@ __all__ = [ 'call_git_addcommit', + 'rmtree', ] -from .utils import call_git_addcommit +from .utils import ( + call_git_addcommit, + rmtree, +) diff --git a/datalad_core/tests/utils.py b/datalad_core/tests/utils.py index 2783761..8996ab8 100644 --- a/datalad_core/tests/utils.py +++ b/datalad_core/tests/utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -8,7 +9,7 @@ PurePath, ) -from shutil import rmtree +from shutil import rmtree as shutil_rmtree from datalad_core.repo import Worktree from datalad_core.runners import ( @@ -149,3 +150,31 @@ def modify_dataset(path: Path) -> str: file_am_obj.write_text('modified') return modify_dataset_promise + + +def _rmtree_onerror(func, path, exc_info): # noqa: ARG001 + """ + Error handler for ``shutil.rmtree``. + + If the error is due to an access error (read only file) + it attempts to add write permission and then retries. + + If the error is for another reason it re-raises the error. + """ + import stat + + # Is the error an access error? + if not os.access(path, os.W_OK): + os.chmod(path, stat.S_IWUSR) + func(path) + else: + raise + + +def rmtree(path: Path) -> None: + """``shutil.rmtree()`` with an error handler that sets write permissions""" + shutil_rmtree( + path, + # deprecated with PY3.12 -> onexc= + onerror=_rmtree_onerror, + )