Skip to content

Commit

Permalink
feat(iter_submodules): add match_containing mode
Browse files Browse the repository at this point in the history
This alters the `pathspec` evaluation to yield submodules that
*may* have content matching any pathspec, rather than the
pathspec having to match a submodule item directly.

This feature can be useful for implementing submodule recursion
around Git commands that do not support submodule recursion
directly.
  • Loading branch information
mih committed May 23, 2024
1 parent ff86307 commit aaaaefe
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 4 deletions.
42 changes: 38 additions & 4 deletions datalad_next/iter_collections/gitworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,25 +244,59 @@ def iter_submodules(
path: Path,
*,
pathspecs: list[str | GitPathSpec] | None = None,
match_containing: bool = False,
) -> Generator[GitTreeItem, None, None]:
"""Given a path, report all submodules of a repository worktree underneath
This is a thin convenience wrapper around ``iter_gitworktree()``.
With ``match_containing`` set to the default ``False``, this is merely a
convenience wrapper around ``iter_gitworktree()`` that selectively reports
on submodules. With ``match_containing=True`` and ``pathspecs`` given, the
yielded items corresponding to submodules where the given ``pathsspecs``
*could* match content. This includes submodules that are not available
locally, because no actual matching of pathspecs to submodule content is
performed -- only an evaluation of the submodule item itself.
"""
if not pathspecs:
# force flag to be sensible to simplify internal logic
match_containing = False
else:
pathspecs = [GitPathSpec.from_pathspec_str(ps) for ps in pathspecs]

for item in iter_gitworktree(
path,
untracked=None,
link_target=False,
fp=False,
recursive='repository',
pathspecs=pathspecs,
# if we want to match submodules that contain pathspecs matches
# we cannot give the job to Git, it won't report anything,
# but we need to match manually below
pathspecs=None if match_containing else pathspecs,
):
# exclude non-submodules, or a submodule that was found at
# the root path -- which would indicate that the submodule
# itself it not around, only its record in the parent
if item.gittype == GitTreeItemType.submodule \
and item.name != PurePath('.'):
if item.gittype != GitTreeItemType.submodule \
or item.name == PurePath('.'):
continue

if not match_containing:
yield item
continue

# does any pathspec match the "inside" of the current submodule's
# path
# we are using any() here to return as fast as possible.
# theoretically, we could also port all of them and enhance
# GitTreeItem to carry them outside, but we have no idea
# about the outside use case here, and cannot assume the additional
# cost is worth it
if any(ps.for_subdir(str(item.name)) for ps in pathspecs):
yield item
continue

# no match
continue


def _get_item(
Expand Down
16 changes: 16 additions & 0 deletions datalad_next/iter_collections/tests/test_itergitworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,19 @@ def test_iter_submodules(modified_dataset):
res = list(iter_submodules(p, pathspecs=[':(exclude)*/sm_c']))
assert len(res) == len(all_sm) - 1
assert not any(str(r.name) == 'dir_sm/sm_c' for r in res)

# test pathspecs matching inside submodules
# baseline, pointing inside a submodule gives no matching results
assert not list(iter_submodules(p, pathspecs=['dir_sm/sm_c/.datalad']))
# we can discover the submodule that could have content that matches
# the pathspec
res = list(iter_submodules(p, pathspecs=['dir_sm/sm_c/.datalad'],
match_containing=True))
assert len(res) == 1
assert str(res[0].name) == 'dir_sm/sm_c'
# if we use a wildcard that matches any submodule, we also get all of them
# and this includes the dropped submodule, because iter_submodules()
# make no assumptions on what this information will be used for
res = list(iter_submodules(p, pathspecs=['*/.datalad'],
match_containing=True))
assert len(res) == len(all_sm)

0 comments on commit aaaaefe

Please sign in to comment.