diff --git a/conftest.py b/conftest.py index 45e8800..caef96e 100644 --- a/conftest.py +++ b/conftest.py @@ -6,6 +6,9 @@ 'baregitrepo', 'annexrepo', 'gitrepo', + 'modified_dataset', + 'skip_when_symlinks_not_supported', + 'symlinks_supported', 'verify_pristine_gitconfig_global', ] @@ -21,6 +24,12 @@ cfgman, # function-scope temporary Git repo gitrepo, + # session-scope repository with complex unsafed modifications + modified_dataset, + # function-scope auto-skip when `symlinks_supported` is False + skip_when_symlinks_not_supported, + # session-scope flag if symlinks are supported in test directories + symlinks_supported, # verify no test leave contaminated config behind verify_pristine_gitconfig_global, ) diff --git a/datalad_core/repo/repo.py b/datalad_core/repo/repo.py index 1d0e1e3..663ca93 100644 --- a/datalad_core/repo/repo.py +++ b/datalad_core/repo/repo.py @@ -99,7 +99,20 @@ def init_annex( *, autoenable_remotes: bool = True, ) -> BareRepoAnnex: - """ """ + """Initialize an annex in the repository + + This is done be calling ``git annex init``. If an annex already exists, + it will be reinitialized. + + The ``description`` parameter can be used to label the annex. + Otherwise, git-annex will auto-generate a description based on + username, hostname and the path of the repository. + + The boolean flag ``autoenable_remotes`` controls whether or not + git-annex should honor a special remote's configuration to get + auto-enable on initialization. + """ + # refuse for non-bare if self.config.get('core.bare', False).value is False: msg = ( 'Cannot initialize annex in a non-bare repository, ' diff --git a/datalad_core/repo/tests/test_repo.py b/datalad_core/repo/tests/test_repo.py index 65e4f1f..cea32d9 100644 --- a/datalad_core/repo/tests/test_repo.py +++ b/datalad_core/repo/tests/test_repo.py @@ -1,7 +1,7 @@ -from shutil import rmtree - import pytest +from datalad_core.tests import rmtree + from ..repo import Repo diff --git a/datalad_core/repo/utils.py b/datalad_core/repo/utils.py index a543dfc..15efaa7 100644 --- a/datalad_core/repo/utils.py +++ b/datalad_core/repo/utils.py @@ -14,7 +14,16 @@ def init_annex_at( description: str | None = None, autoenable_remotes: bool = True, ) -> None: - """Call ``git-annex init`` at a given ``path``""" + """Call ``git-annex init`` at a given ``path`` + + The ``description`` parameter can be used to label the annex. Otherwise, + git-annex will auto-generate a description based on username, hostname, and + the path of the repository. + + The boolean flag ``autoenable_remotes`` controls whether or not + git-annex should honor a special remote's configuration to get + auto-enabled on initialization. + """ cmd = ['init'] if not autoenable_remotes: # no, we do not set --autoenable, this is a RepoAnnex feature diff --git a/datalad_core/repo/worktree.py b/datalad_core/repo/worktree.py index 5ad9542..4d5c605 100644 --- a/datalad_core/repo/worktree.py +++ b/datalad_core/repo/worktree.py @@ -162,8 +162,19 @@ def init_annex( *, autoenable_remotes: bool = True, ) -> Annex: - """ """ - # refuse for non-bare + """Initialize an annex in the repository associated with the worktree + + This is done be calling ``git annex init``. If an annex already exists, + it will be reinitialized. + + The ``description`` parameter can be used to label the annex. + Otherwise, git-annex will auto-generate a description based on + username, hostname and the path of the repository. + + The boolean flag ``autoenable_remotes`` controls whether or not + git-annex should honor a special remote's configuration to get + auto-enable on initialization. + """ init_annex_at( self.path, description=description, diff --git a/datalad_core/runners/git.py b/datalad_core/runners/git.py index a2794c4..83e0291 100644 --- a/datalad_core/runners/git.py +++ b/datalad_core/runners/git.py @@ -76,8 +76,9 @@ def call_git( *, cwd: Path | None = None, force_c_locale: bool = False, + text: bool | None = None, capture_output: bool = False, -) -> None: +) -> str | bytes | None: """Call Git with no output capture, raises on non-zero exit. If ``cwd`` is not None, the function changes the working directory to @@ -87,17 +88,26 @@ def call_git( is altered to ensure output according to the C locale. This is useful when output has to be processed in a locale invariant fashion. - If ``capture_output`` is ``True``, process output is captured. This is - necessary for reporting any error messaging via a ``CommandError`` exception. - By default process output is not captured. + If ``capture_output`` is ``True``, process output is captured (and not + relayed to the parent process/terminal). This is necessary for reporting + any error messaging via a ``CommandError`` exception. By default process + output is not captured. + + All other argument are pass on to ``subprocess.run()`` verbatim. + + If ``capture_output`` is enabled, the captured STDOUT is returned as + ``str`` or ``bytes``, depending on the value of ``text``. Otherwise + ``None`` is returned to indicate that no output was captured. """ - _call_git( + res = _call_git( args, capture_output=capture_output, cwd=cwd, check=True, + text=text, force_c_locale=force_c_locale, ) + return res.stdout if capture_output else None def call_git_success( diff --git a/datalad_core/runners/tests/test_callgit.py b/datalad_core/runners/tests/test_callgit.py index 008fa77..bcfbe00 100644 --- a/datalad_core/runners/tests/test_callgit.py +++ b/datalad_core/runners/tests/test_callgit.py @@ -11,8 +11,11 @@ def test_call_git(): - # smoke test - call_git(['--version']) + # by default no output is captured + assert call_git(['--version']) is None + # capture gives bytes unless text=True + assert b'version' in call_git(['--version'], capture_output=True) + assert 'version' in call_git(['--version'], text=True, capture_output=True) # raises properly with pytest.raises(CommandError): call_git(['notacommand']) diff --git a/datalad_core/tests/__init__.py b/datalad_core/tests/__init__.py index e69de29..95b341b 100644 --- a/datalad_core/tests/__init__.py +++ b/datalad_core/tests/__init__.py @@ -0,0 +1,9 @@ +__all__ = [ + 'call_git_addcommit', + 'rmtree', +] + +from .utils import ( + call_git_addcommit, + rmtree, +) diff --git a/datalad_core/tests/fixtures.py b/datalad_core/tests/fixtures.py index f1a5f02..2809c17 100644 --- a/datalad_core/tests/fixtures.py +++ b/datalad_core/tests/fixtures.py @@ -14,7 +14,11 @@ import pytest from datalad_core.config import get_manager -from datalad_core.runners import call_git +from datalad_core.runners import ( + call_git, + call_git_lines, +) +from datalad_core.tests.utils import modify_dataset magic_marker = 'c4d0de12-8008-11ef-86ea-3776083add61' standard_gitconfig = f"""\ @@ -140,3 +144,105 @@ def annexrepo(gitrepo) -> Generator[Path]: capture_output=True, ) return gitrepo + + +@pytest.fixture(autouse=False, scope='session') +def symlinks_supported(tmp_path_factory) -> bool: + """Returns whether creating symlinks is supported in test directories""" + testdir = tmp_path_factory.mktemp('symlink_check') + target = testdir / 'target' + source = testdir / 'source' + try: + target.touch() + source.symlink_to(target) + except Exception: # noqa: BLE001 + return False + return True + + +@pytest.fixture(autouse=False, scope='function') # noqa: PT003 +def skip_when_symlinks_not_supported(symlinks_supported): + if not symlinks_supported: + msg = 'skipped, symlinks are not supported in the test directory' + raise pytest.skip(msg) + + +@pytest.fixture(scope='session') +def modified_dataset(tmp_path_factory): + """Produces a dataset with various modifications + + The fixture is module-scope, aiming to be reused by many tests focused + on reporting. It does not support any further modification. The fixture + will fail, if any such modification is detected. Use the helper + ``modify_dataset()`` to apply these modification to an existing repository + to circumwent these restriction. + + ``git status`` will report:: + + > git status -uall + On branch dl-test-branch + Changes to be committed: + (use "git restore --staged ..." to unstage) + new file: dir_m/file_a + new file: file_a + new file: file_am + + Changes not staged for commit: + (use "git add/rm ..." to update what will be committed) + (use "git restore ..." to discard changes in working directory) + (commit or discard the untracked or modified content in submodules) + deleted: dir_d/file_d + deleted: dir_m/file_d + modified: dir_m/file_m + deleted: dir_sm/sm_d + modified: dir_sm/sm_m (modified content) + modified: dir_sm/sm_mu (modified content, untracked content) + modified: dir_sm/sm_n (new commits) + modified: dir_sm/sm_nm (new commits, modified content) + modified: dir_sm/sm_nmu (new commits, modified content, untracked content) + modified: dir_sm/sm_u (untracked content) + modified: file_am + deleted: file_d + modified: file_m + + Untracked files: + (use "git add ..." to include in what will be committed) + dir_m/dir_u/file_u + dir_m/file_u + dir_u/file_u + file_u + + + Suffix indicates the ought-to state (multiple possible): + + a - added + c - clean + d - deleted + n - new commits + m - modified + u - untracked content + + Prefix indicated the item type: + + file - file + sm - submodule + dir - directory + """ + path = tmp_path_factory.mktemp('gitrepo') + call_git( + ['init'], + cwd=path, + capture_output=True, + ) + # the returned status promise is the comparison reference + status_promise = modify_dataset(path).splitlines() + + yield path + # compare with initial git-status output, if there are any + # differences the assumptions of any consuming test could be + # invalidated. The modifying code must be found and fixed + if status_promise != call_git_lines( + ['status', '-uall', '--porcelain=v1'], cwd=path + ): + msg = 'Unexpected modification of the testbed' + raise AssertionError(msg) diff --git a/datalad_core/tests/test_fixtures.py b/datalad_core/tests/test_fixtures.py new file mode 100644 index 0000000..1157d9a --- /dev/null +++ b/datalad_core/tests/test_fixtures.py @@ -0,0 +1,6 @@ +def test_modified_dataset_fixture(modified_dataset): + assert modified_dataset + # the test is to do nothing. + # the dataset modification will be done, the promised capture + # before the test starts. the test will then do nothing, and + # the promise is reevaluated at the end and must succeed. diff --git a/datalad_core/tests/test_utils.py b/datalad_core/tests/test_utils.py new file mode 100644 index 0000000..4113cc4 --- /dev/null +++ b/datalad_core/tests/test_utils.py @@ -0,0 +1,12 @@ +from datalad_core.runners import call_git +from datalad_core.tests.utils import modify_dataset + + +def test_modify_dataset(gitrepo): + promise = modify_dataset(gitrepo) + assert promise == call_git( + ['status', '-uall', '--porcelain=v1'], + cwd=gitrepo, + capture_output=True, + text=True, + ) diff --git a/datalad_core/tests/utils.py b/datalad_core/tests/utils.py new file mode 100644 index 0000000..8996ab8 --- /dev/null +++ b/datalad_core/tests/utils.py @@ -0,0 +1,180 @@ +from __future__ import annotations + +import os +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import ( + Path, + PurePath, + ) + +from shutil import rmtree as shutil_rmtree + +from datalad_core.repo import Worktree +from datalad_core.runners import ( + call_git, +) + + +def call_git_addcommit( + cwd: Path, + paths: list[str | PurePath] | None = None, + *, + msg: str | None = None, +): + if paths is None: + paths = ['.'] + + if msg is None: + msg = 'done by call_git_addcommit()' + + call_git(['add'] + [str(p) for p in paths], cwd=cwd, capture_output=True) + call_git(['commit', '-m', msg], cwd=cwd, capture_output=True) + + +# Target `git status -uall --porcelain=v1` of `modify_dataset()` result +modify_dataset_promise = """\ + D dir_d/file_d +A dir_m/file_a + D dir_m/file_d + M dir_m/file_m + D dir_sm/sm_d + M dir_sm/sm_m + M dir_sm/sm_mu + M dir_sm/sm_n + M dir_sm/sm_nm + M dir_sm/sm_nmu + M dir_sm/sm_u +A file_a +AM file_am + D file_d + M file_m +?? dir_m/dir_u/file_u +?? dir_m/file_u +?? dir_u/file_u +?? file_u +""" + + +def modify_dataset(path: Path) -> str: + """Applies the modification for the ``modified_dataset`` fixture + + ``path`` is a directory in an existing Git repository. + + This is provided as a separate function for the case where the + modification themselves need to be modified. The fixture checks + that this does not happen. + + The function returns the ``git status -uall --porcelain=v1`` + report that it aimed to create. This is not a status report + queried after this function ran, but a "promise" that can be used + to inspect the state of a repository. + """ + ds_dir = path / 'dir_m' + ds_dir_d = path / 'dir_d' + dirsm = path / 'dir_sm' + for d in (ds_dir, ds_dir_d, dirsm): + d.mkdir() + (ds_dir / 'file_m').touch() + (path / 'file_m').touch() + dss: dict[str, Path] = {} + smnames = ( + 'sm_d', + 'sm_c', + 'sm_n', + 'sm_m', + 'sm_nm', + 'sm_u', + 'sm_mu', + 'sm_nmu', + 'droppedsm_c', + ) + for smname in smnames: + sds_path = dirsm / smname + sds_path.mkdir() + sds = Worktree.init_at(sds_path) + # we need some content for a commit + (sds_path / '.gitkeep').touch() + # for the plain modification, commit the reference right here + if smname in ('sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'): + (sds_path / 'file_m').touch() + call_git_addcommit(sds_path) + dss[smname] = sds.path + call_git( + ['submodule', 'add', f'./{sds_path}', f'{sds_path.relative_to(path)}'], + cwd=path, + capture_output=True, + ) + # files in superdataset to be deleted + for d in (ds_dir_d, ds_dir, path): + (d / 'file_d').touch() + dss['.'] = path + dss['dir'] = ds_dir + call_git_addcommit(path) + call_git( + ['submodule', 'deinit', '--force', str(dirsm / 'droppedsm_c')], + cwd=path, + capture_output=True, + ) + # a new commit + for smname in ('.', 'sm_n', 'sm_nm', 'sm_nmu'): + sub = dss[smname] + (sub / 'file_c').touch() + call_git_addcommit(sub) + # modified file + for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'): + pobj = dss[smname] + (pobj / 'file_m').write_text('modify!') + # untracked + for smname in ('.', 'dir', 'sm_u', 'sm_mu', 'sm_nmu'): + pobj = dss[smname] + (pobj / 'file_u').touch() + (pobj / 'dirempty_u').mkdir() + (pobj / 'dir_u').mkdir() + (pobj / 'dir_u' / 'file_u').touch() + # delete items + rmtree(dss['sm_d']) + rmtree(ds_dir_d) + (ds_dir / 'file_d').unlink() + (path / 'file_d').unlink() + # added items + for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'): + pobj = dss[smname] + (pobj / 'file_a').write_text('added') + call_git(['add', 'file_a'], cwd=pobj, capture_output=True) + # added and then modified file + file_am_obj = path / 'file_am' + file_am_obj.write_text('added') + call_git(['add', 'file_am'], cwd=path, capture_output=True) + file_am_obj.write_text('modified') + + return modify_dataset_promise + + +def _rmtree_onerror(func, path, exc_info): # noqa: ARG001 + """ + Error handler for ``shutil.rmtree``. + + If the error is due to an access error (read only file) + it attempts to add write permission and then retries. + + If the error is for another reason it re-raises the error. + """ + import stat + + # Is the error an access error? + if not os.access(path, os.W_OK): + os.chmod(path, stat.S_IWUSR) + func(path) + else: + raise + + +def rmtree(path: Path) -> None: + """``shutil.rmtree()`` with an error handler that sets write permissions""" + shutil_rmtree( + path, + # deprecated with PY3.12 -> onexc= + onerror=_rmtree_onerror, + )