Skip to content

Commit

Permalink
filter-repo: rewrite the stash too
Browse files Browse the repository at this point in the history
While filter-repo by default will abort if there are any stashes in the
git repository (because that makes it look like not a clean clone), it
is not uncommon for people to want to work on repositories that are not
clean clones.  In such cases, rather than simply dropping all their
stashes, it is nicer if we can "rewrite the stashes" too.

To do so, we:
  * get the revision for each stash (getting the merge commit ID is
    sufficient since revision walking then includes all the parents)
  * get a copy of the contents of the .git/log/refs/stash file
  * provide the revisions for each stash to fast-export so that it
    iterates over the stashes too
  * between the reflog expiration and repacking steps, write out the
    contents of the .git/log/refs/stash file after first substituting
    for all the rewritten commits

Interestingly, the issue was caused by running
  git reflog expire --expire=now --all && git gc --prune=now --aggressive
upon completion of the filtering; it is the reflog expiration step in
particular that wipes out all the stashes.  So, any page that recommends
running both of those steps or any tool that recommends running those is
putting their users' stashes at risk.  As far as I can tell, that makes
git-filter-repo (after this commit) the first repository filtering tool
to not have the stash deletion problem.

Signed-off-by: Elijah Newren <[email protected]>
  • Loading branch information
newren committed Aug 1, 2024
1 parent a12d742 commit 5cfd52f
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 6 deletions.
2 changes: 1 addition & 1 deletion contrib/filter-repo-demos/bfg-ish
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ class BFG_ish:
if not os.path.isabs(os.fsdecode(bfg_args.repo)):
bfg_args.repo = os.fsencode(os.path.join(work_dir, os.fsdecode(bfg_args.repo)))

fr.RepoFilter.cleanup(bfg_args.repo, repack=True, reset=need_another_reset)
self.filter.cleanup(bfg_args.repo, repack=True, reset=need_another_reset)

if __name__ == '__main__':
bfg = BFG_ish()
Expand Down
46 changes: 41 additions & 5 deletions git-filter-repo
Original file line number Diff line number Diff line change
Expand Up @@ -2853,12 +2853,14 @@ class RepoFilter(object):
self._finalize_handled = False
self._orig_refs = None
self._newnames = {}
self._stash = None

# Cache a few message translations for performance reasons
self._parsed_message = _("Parsed %d commits")

# Compile some regexes and cache those
self._hash_re = re.compile(br'(\b[0-9a-f]{7,40}\b)')
self._full_hash_re = re.compile(br'(\b[0-9a-f]{40}\b)')

def _handle_arg_callbacks(self):
def make_callback(argname, str):
Expand Down Expand Up @@ -3036,8 +3038,8 @@ class RepoFilter(object):
if len(output.splitlines()) > 1:
abort(_('you have multiple worktrees'))

@staticmethod
def cleanup(repo, repack, reset, run_quietly=False, show_debuginfo=False):
def cleanup(self, repo, repack, reset,
run_quietly=False, show_debuginfo=False):
''' cleanup repo; if repack then expire reflogs and do a gc --prune=now.
if reset then do a reset --hard. Optionally also curb output if
run_quietly is True, or go the opposite direction and show extra
Expand All @@ -3058,6 +3060,8 @@ class RepoFilter(object):
if show_debuginfo:
print("[DEBUG] Running{}: {}".format(location_info, ' '.join(cmd)))
subproc.call(cmd, cwd=repo)
if cmd[0:3] == 'git reflog expire'.split():
self._write_stash()

def _get_rename(self, old_hash):
# If we already know the rename, just return it
Expand Down Expand Up @@ -3115,6 +3119,13 @@ class RepoFilter(object):
assert new_hash is not None
return new_hash[0:orig_len]

def _translate_full_commit_hash(self, matchobj):
old_hash = matchobj.group(1)
new_hash = self._get_rename(old_hash)
if new_hash is None:
return old_hash
return new_hash

def _trim_extra_parents(self, orig_parents, parents):
'''Due to pruning of empty commits, some parents could be non-existent
(None) or otherwise redundant. Remove the non-existent parents, and
Expand Down Expand Up @@ -3685,12 +3696,37 @@ class RepoFilter(object):
# Handle sanity checks, though currently none needed for export-only cases
self._run_sanity_checks()

def _read_stash(self):
if self._orig_refs and b'refs/stash' in self._orig_refs and \
self._args.refs == ['--all']:
repo_working_dir = self._args.source or b'.'
git_dir = GitUtils.determine_git_dir(repo_working_dir)
stash = os.path.join(git_dir, b'logs', b'refs', b'stash')
if os.path.exists(stash):
with open(stash, 'br') as f:
self._stash = f.read()
out = subproc.check_output('git rev-list -g refs/stash'.split(),
cwd=repo_working_dir)
self._args.refs.extend(decode(out.strip()).split())

def _write_stash(self):
if self._stash:
target_working_dir = self._args.target or b'.'
git_dir = GitUtils.determine_git_dir(target_working_dir)
stash = os.path.join(git_dir, b'logs', b'refs', b'stash')
with open(stash, 'bw') as f:
self._stash = self._full_hash_re.sub(self._translate_full_commit_hash,
self._stash)
f.write(self._stash)
print(_("Rewrote the stash."))

def _setup_input(self, use_done_feature):
if self._args.stdin:
self._input = sys.stdin.detach()
sys.stdin = None # Make sure no one tries to accidentally use it
self._fe_orig = None
else:
self._read_stash()
skip_blobs = (self._blob_callback is None and
self._args.replace_text is None and
self._args.source == self._args.target)
Expand Down Expand Up @@ -4038,9 +4074,9 @@ class RepoFilter(object):
# If we need a repack, then nuke the reflogs and repack.
# If we need a reset, do a reset --hard
reset = not GitUtils.is_repository_bare(target_working_dir)
RepoFilter.cleanup(target_working_dir, self._args.repack, reset,
run_quietly=self._args.quiet,
show_debuginfo=self._args.debug)
self.cleanup(target_working_dir, self._args.repack, reset,
run_quietly=self._args.quiet,
show_debuginfo=self._args.debug)

# Let user know how long it took
print(_("Completely finished after {:.2f} seconds.")
Expand Down
22 changes: 22 additions & 0 deletions t/t9390-filter-repo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1918,4 +1918,26 @@ test_expect_success 'empty author ident' '
)
'

test_expect_success 'rewrite stash' '
test_create_repo rewrite_stash &&
(
cd rewrite_stash &&
git init &&
test_write_lines 1 2 3 4 5 6 7 8 9 10 >numbers &&
git add numbers &&
git commit -qm initial &&
echo 11 >>numbers &&
git stash push -m "add eleven" &&
echo foobar >>numbers &&
git stash push -m "add foobar" &&
git filter-repo --force --path-rename numbers:values &&
git stash list >output &&
test 2 -eq $(cat output | wc -l)
)
'

test_done

0 comments on commit 5cfd52f

Please sign in to comment.