Skip to content

Commit

Permalink
git: skip ignored files while walking worktree
Browse files Browse the repository at this point in the history
Skip ignored files when walking through the worktree.

This signigifantly improves the performance of `Status()`:
In a repository with 3M ignored files `Status` now takes 5 s instead of 160 s.
  • Loading branch information
silkeh committed Jan 13, 2025
1 parent d90b620 commit 233345e
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 21 deletions.
137 changes: 137 additions & 0 deletions plumbing/format/gitignore/noder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package gitignore

import (
"slices"

"github.com/go-git/go-git/v5/utils/merkletrie/noder"
)

var _ noder.Noder = (*MatchNoder)(nil)

// MatchNoder is an implementation of [noder.Noder] that only includes nodes based on a pattern.
type MatchNoder struct {
noder.Noder

matcher Matcher
invert bool
path []string
children []noder.Noder
}

// IgnoreNoder returns a [MatchNoder] that filters out the given pattern.
func IgnoreNoder(m Matcher, n noder.Noder) *MatchNoder {
var path []string
if name := n.Name(); name != "." {
path = []string{name}
}

return &MatchNoder{matcher: m, invert: true, Noder: n, path: path}
}

// Children returns matched children.
// It implements [noder.Noder].
func (n *MatchNoder) Children() ([]noder.Noder, error) {
if len(n.children) > 0 {
return n.children, nil
}

children, err := n.Noder.Children()
if err != nil {
return nil, err
}

n.children = n.ignoreChildren(children)

return n.children, nil
}

func (n *MatchNoder) ignoreChildren(children []noder.Noder) []noder.Noder {
found := make([]noder.Noder, 0, len(children))

for _, child := range children {
path := append(n.path, child.Name())
if n.match(path, child.IsDir()) {
continue
}

found = append(found, n.newChild(child, path))
}

return found
}

func (n *MatchNoder) match(path []string, isDir bool) bool {
if n.matcher != nil && n.matcher.Match(path, isDir) {
return n.invert
}

return !n.invert
}

func (n *MatchNoder) newChild(child noder.Noder, path []string) noder.Noder {
if !child.IsDir() {
return child
}

return &MatchNoder{
matcher: n.matcher,
invert: n.invert,
Noder: child,
path: slices.Clone(path),
}
}

// NumChildren returns the number of children.
// It implements [noder.Noder].
func (n *MatchNoder) NumChildren() (int, error) {
children, err := n.Children()
if err != nil {
return 0, err
}

return len(children), nil
}

// PathIgnored returns true if the given [noder.Path] is ignored.
func (n *MatchNoder) PathIgnored(path noder.Path) bool {
return n.match(n.noderPaths(path), path.IsDir())
}

// FindPath returns the corresponding [noder.Path] from the tree if there is one.
// It does not apply patterns, allowing retrieval of ignored nodes.
func (n *MatchNoder) FindPath(p noder.Path) (path noder.Path, found bool) {
node := n.Noder

for i := range p {
node, found = n.findChild(node, p[i].Name())
if !found {
return nil, false
}

path = append(path, node)
}

return
}

func (n *MatchNoder) findChild(node noder.Noder, name string) (noder.Noder, bool) {
children, _ := node.Children()

for _, child := range children {
if child.Name() == name {
return child, true
}
}

return nil, false
}

func (n *MatchNoder) noderPaths(path noder.Path) []string {
parts := make([]string, len(path))

for i, p := range path {
parts[i] = p.Name()
}

return parts
}
144 changes: 144 additions & 0 deletions plumbing/format/gitignore/noder_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package gitignore_test

import (
"io/fs"
"testing"

"github.com/go-git/go-git/v5/plumbing/format/gitignore"
"github.com/go-git/go-git/v5/utils/merkletrie/noder"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

type mockNoder struct {
hash []byte
string string
name string
isDir bool
children []noder.Noder
childrenErr error
skip bool
}

func (m mockNoder) Hash() []byte { return m.hash }
func (m mockNoder) String() string { return m.string }
func (m mockNoder) Name() string { return m.name }
func (m mockNoder) IsDir() bool { return m.isDir }
func (m mockNoder) Children() ([]noder.Noder, error) { return m.children, m.childrenErr }
func (m mockNoder) NumChildren() (int, error) { return len(m.children), m.childrenErr }
func (m mockNoder) Skip() bool { return m.skip }

func TestMatchNoder_Children(t *testing.T) {
mock := mockNoder{
name: ".",
children: []noder.Noder{
mockNoder{name: "volcano"},
mockNoder{name: "caldera"},
mockNoder{name: "super", isDir: true, children: []noder.Noder{
mockNoder{name: "caldera", children: []noder.Noder{}},
}},
},
}
patterns := []gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
}

tests := map[string]struct {
Matcher gitignore.Matcher
Noder mockNoder
ExpErr error
ExpChildren []noder.Noder
Skip bool
}{
"children": {
Matcher: gitignore.NewMatcher([]gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
}),
Noder: mock,
ExpChildren: []noder.Noder{
mock.children[1],
gitignore.IgnoreNoder(gitignore.NewMatcher(patterns), mock.children[2]),
},
},
"error": {
Matcher: gitignore.NewMatcher([]gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
}),
Noder: mockNoder{name: ".", childrenErr: fs.ErrNotExist},
ExpErr: fs.ErrNotExist,
},
}

for name, tc := range tests {
t.Run(name, func(t *testing.T) {
ignoreNoder := gitignore.IgnoreNoder(tc.Matcher, tc.Noder)

children, err := ignoreNoder.Children()
require.ErrorIs(t, err, tc.ExpErr)
assert.Equal(t, tc.ExpChildren, children)

// Do it twice for the cached children
children, err = ignoreNoder.Children()
require.ErrorIs(t, err, tc.ExpErr)
assert.Equal(t, tc.ExpChildren, children)

num, err := ignoreNoder.NumChildren()
require.ErrorIs(t, err, tc.ExpErr)
assert.Equal(t, len(tc.ExpChildren), num)
})
}
}

func TestMatchNoder_PathIgnored(t *testing.T) {
matcher := gitignore.NewMatcher([]gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
})

found := gitignore.IgnoreNoder(matcher, mockNoder{name: "."}).PathIgnored([]noder.Noder{
mockNoder{name: "head"},
mockNoder{name: "middle"},
mockNoder{name: "volcano"},
})
assert.True(t, found)

found = gitignore.IgnoreNoder(matcher, mockNoder{name: "."}).PathIgnored([]noder.Noder{
mockNoder{name: "head"},
mockNoder{name: "middle"},
mockNoder{name: "potato"},
})
assert.False(t, found)
}

func TestMatchNoder_FindPath(t *testing.T) {
mock := mockNoder{
name: ".",
children: []noder.Noder{
mockNoder{name: "volcano"},
mockNoder{name: "super", isDir: true, children: []noder.Noder{
mockNoder{name: "volcano", children: []noder.Noder{}},
}},
},
}
matcher := gitignore.NewMatcher([]gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
})

node, found := gitignore.IgnoreNoder(matcher, mock).FindPath([]noder.Noder{
mockNoder{name: "super"},
mockNoder{name: "volcano"},
})
assert.True(t, found)
assert.Equal(t, noder.Path{mock.children[1], mock.children[1].(mockNoder).children[0]}, node)

node, found = gitignore.IgnoreNoder(matcher, mock).FindPath([]noder.Noder{
mockNoder{name: "super"},
mockNoder{name: "caldera"},
})
assert.False(t, found)
assert.Nil(t, node)
}
41 changes: 31 additions & 10 deletions utils/merkletrie/difftree.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ import (
"errors"
"fmt"

"github.com/go-git/go-git/v5/plumbing/format/gitignore"
"github.com/go-git/go-git/v5/utils/merkletrie/noder"
)

Expand Down Expand Up @@ -297,8 +298,14 @@ func DiffTreeContext(ctx context.Context, fromTree, toTree noder.Noder,
case noMoreNoders:
return ret, nil
case onlyFromRemains:
if err = ret.AddRecursiveDelete(from); err != nil {
return nil, err
if node, ok := ignoredNode(toTree, from); ok {
if err = diffNodesSameName(&ret, ii, ii.from.current, node); err != nil {
return nil, err
}
} else {
if err = ret.AddRecursiveDelete(from); err != nil {
return nil, err
}
}
if err = ii.nextFrom(); err != nil {
return nil, err
Expand Down Expand Up @@ -353,8 +360,10 @@ func diffNodes(changes *Changes, ii *doubleIter) error {
// compare their full paths as strings
switch from.Compare(to) {
case -1:
if err = changes.AddRecursiveDelete(from); err != nil {
return err
if ok := isIgnoredNode(to[0], from); !ok {
if err = changes.AddRecursiveDelete(from); err != nil {
return err
}
}
if err = ii.nextFrom(); err != nil {
return err
Expand All @@ -367,19 +376,16 @@ func diffNodes(changes *Changes, ii *doubleIter) error {
return err
}
default:
if err := diffNodesSameName(changes, ii); err != nil {
if err := diffNodesSameName(changes, ii, ii.from.current, ii.to.current); err != nil {
return err
}
}

return nil
}

func diffNodesSameName(changes *Changes, ii *doubleIter) error {
from := ii.from.current
to := ii.to.current

status, err := ii.compare()
func diffNodesSameName(changes *Changes, ii *doubleIter, from, to noder.Path) error {
status, err := ii.compareNoders(from, to)
if err != nil {
return err
}
Expand Down Expand Up @@ -451,3 +457,18 @@ func diffDirs(changes *Changes, ii *doubleIter) error {

return nil
}

func isIgnoredNode(tree noder.Noder, path noder.Path) bool {
in, ok := tree.(*gitignore.MatchNoder)

return ok && in.PathIgnored(path)
}

func ignoredNode(tree noder.Noder, path noder.Path) (noder.Path, bool) {
in, ok := tree.(*gitignore.MatchNoder)
if !ok || !in.PathIgnored(path) {
return nil, false
}

return in.FindPath(path)
}
14 changes: 9 additions & 5 deletions utils/merkletrie/doubleiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,21 +137,25 @@ const (
// Compare returns the comparison between the current elements in the
// merkletries.
func (d *doubleIter) compare() (s comparison, err error) {
s.sameHash = d.hashEqual(d.from.current, d.to.current)
return d.compareNoders(d.from.current, d.to.current)
}

func (d *doubleIter) compareNoders(from, to noder.Noder) (s comparison, err error) {
s.sameHash = d.hashEqual(from, to)

fromIsDir := d.from.current.IsDir()
toIsDir := d.to.current.IsDir()
fromIsDir := from.IsDir()
toIsDir := to.IsDir()

s.bothAreDirs = fromIsDir && toIsDir
s.bothAreFiles = !fromIsDir && !toIsDir
s.fileAndDir = !s.bothAreDirs && !s.bothAreFiles

fromNumChildren, err := d.from.current.NumChildren()
fromNumChildren, err := from.NumChildren()
if err != nil {
return comparison{}, fmt.Errorf("from: %s", err)
}

toNumChildren, err := d.to.current.NumChildren()
toNumChildren, err := to.NumChildren()
if err != nil {
return comparison{}, fmt.Errorf("to: %s", err)
}
Expand Down
Loading

0 comments on commit 233345e

Please sign in to comment.