diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-11-16 15:48:06 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-11-16 15:48:06 +0300 |
commit | 724e844c814b91bb4a98f7eb3607756b7504657d (patch) | |
tree | 87fdcab33ffcec1fdf7d640ad15a1fba094276c9 /internal | |
parent | 8f04e23b9f004d70635aaeecd30d837a584572f8 (diff) | |
parent | 61c5912f7d9dcb2068b04ab1594281e8d45d8975 (diff) |
Merge branch 'pks-commit-tree-entries' into 'master'
commit: Refactor recursive `GetTreeEntries()` for efficiency
Closes #3888
See merge request gitlab-org/gitaly!4052
Diffstat (limited to 'internal')
-rw-r--r-- | internal/git/gittest/tree.go | 4 | ||||
-rw-r--r-- | internal/git/lstree/entries.go | 38 | ||||
-rw-r--r-- | internal/git/lstree/last_commits.go | 101 | ||||
-rw-r--r-- | internal/git/lstree/list_entries.go | 90 | ||||
-rw-r--r-- | internal/git/lstree/list_entries_test.go | 158 | ||||
-rw-r--r-- | internal/git/lstree/parser.go | 88 | ||||
-rw-r--r-- | internal/git/lstree/parser_test.go (renamed from internal/git/lstree/last_commits_test.go) | 64 | ||||
-rw-r--r-- | internal/git/lstree/testhelper_test.go | 11 | ||||
-rw-r--r-- | internal/gitaly/service/commit/tree_entries.go | 125 | ||||
-rw-r--r-- | internal/gitaly/service/commit/tree_entries_test.go | 77 | ||||
-rw-r--r-- | internal/gitaly/service/operations/submodules_test.go | 2 | ||||
-rw-r--r-- | internal/metadata/featureflag/ff_tree_entries_via_ls_tree.go | 5 | ||||
-rw-r--r-- | internal/testhelper/featureset.go | 16 |
13 files changed, 619 insertions, 160 deletions
diff --git a/internal/git/gittest/tree.go b/internal/git/gittest/tree.go index 757f58a77..8895a414b 100644 --- a/internal/git/gittest/tree.go +++ b/internal/git/gittest/tree.go @@ -75,14 +75,14 @@ func RequireTree(t testing.TB, cfg config.Cfg, repoPath, treeish string, expecte func WriteTree(t testing.TB, cfg config.Cfg, repoPath string, entries []TreeEntry) git.ObjectID { t.Helper() - require.NotEmpty(t, entries) - var tree bytes.Buffer for _, entry := range entries { var entryType string switch entry.Mode { case "100644": entryType = "blob" + case "100755": + entryType = "blob" case "040000": entryType = "tree" default: diff --git a/internal/git/lstree/entries.go b/internal/git/lstree/entries.go new file mode 100644 index 000000000..4a729fec0 --- /dev/null +++ b/internal/git/lstree/entries.go @@ -0,0 +1,38 @@ +package lstree + +import "gitlab.com/gitlab-org/gitaly/v14/internal/git" + +// ObjectType is an Enum for the type of object of +// the ls-tree entry, which can be can be tree, blob or commit +type ObjectType int + +// Entry represents a single ls-tree entry +type Entry struct { + Mode []byte + Type ObjectType + ObjectID git.ObjectID + Path string +} + +// Entries holds every ls-tree Entry +type Entries []Entry + +// Enum values for ObjectType +const ( + Tree ObjectType = iota + Blob + Submodule +) + +func (e Entries) Len() int { + return len(e) +} + +func (e Entries) Swap(i, j int) { + e[i], e[j] = e[j], e[i] +} + +// Less sorts entries by type in the order [*tree *blobs *submodules] +func (e Entries) Less(i, j int) bool { + return e[i].Type < e[j].Type +} diff --git a/internal/git/lstree/last_commits.go b/internal/git/lstree/last_commits.go deleted file mode 100644 index def66b5b1..000000000 --- a/internal/git/lstree/last_commits.go +++ /dev/null @@ -1,101 +0,0 @@ -package lstree - -import ( - "bufio" - "bytes" - "errors" - "io" -) - -// ObjectType is an Enum for the type of object of -// the ls-tree entry, which can be can be tree, blob or commit -type ObjectType int - -// Entry represents a single ls-tree entry -type Entry struct { - Mode []byte - Type ObjectType - Oid string - Path string -} - -// Entries holds every ls-tree Entry -type Entries []Entry - -// Parser holds the necessary state for parsing the ls-tree output -type Parser struct { - reader *bufio.Reader -} - -// Enum values for ObjectType -const ( - Tree ObjectType = iota - Blob - Submodule -) - -// ErrParse is returned when the parse of an entry was unsuccessful -var ErrParse = errors.New("failed to parse git ls-tree response") - -func (e Entries) Len() int { - return len(e) -} - -func (e Entries) Swap(i, j int) { - e[i], e[j] = e[j], e[i] -} - -// Less sorts entries by type in the order [*tree *blobs *submodules] -func (e Entries) Less(i, j int) bool { - return e[i].Type < e[j].Type -} - -// NewParser returns a new Parser -func NewParser(src io.Reader) *Parser { - return &Parser{ - reader: bufio.NewReader(src), - } -} - -// NextEntry reads from git ls-tree --z --full-name command -// parses the tree entry and returns a *Entry. -func (p *Parser) NextEntry() (*Entry, error) { - data, err := p.reader.ReadBytes(0x00) - if err != nil { - return nil, err - } - - // We expect each `data` to be <mode> SP <type> SP <object> TAB <path>\0. - split := bytes.SplitN(data, []byte(" "), 3) - if len(split) != 3 { - return nil, ErrParse - } - - objectAndFile := bytes.SplitN(split[len(split)-1], []byte("\t"), 2) - if len(objectAndFile) != 2 { - return nil, ErrParse - } - - objectType, err := toEnum(string(split[1])) - if err != nil { - return nil, err - } - - // We know that the last byte in 'path' will be a zero byte. - path := string(bytes.TrimRight(objectAndFile[1], "\x00")) - - return &Entry{Mode: split[0], Type: objectType, Oid: string(objectAndFile[0]), Path: path}, nil -} - -func toEnum(s string) (ObjectType, error) { - switch s { - case "tree": - return Tree, nil - case "blob": - return Blob, nil - case "commit": - return Submodule, nil - default: - return -1, ErrParse - } -} diff --git a/internal/git/lstree/list_entries.go b/internal/git/lstree/list_entries.go new file mode 100644 index 000000000..ab27d6f5d --- /dev/null +++ b/internal/git/lstree/list_entries.go @@ -0,0 +1,90 @@ +package lstree + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "strings" + + "gitlab.com/gitlab-org/gitaly/v14/internal/git" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo" +) + +var ( + // ErrNotExist indicates that the requested tree does not exist, either because the revision + // is invalid or because the path is not valid. + ErrNotExist = errors.New("invalid object name") + // ErrNotTreeish indicates that the requested revision or path does not resolve to a tree + // object. + ErrNotTreeish = errors.New("not treeish") +) + +// ListEntriesConfig is configuration that can be passed to ListEntries. +type ListEntriesConfig struct { + // Recursive indicates whether the listing shall be recursive or not. + Recursive bool + // RelativePath is the relative path at which listing of entries should be started. + RelativePath string +} + +// ListEntries lists tree entries for the given treeish. By default, this will do a non-recursive +// listing starting from the root of the given treeish. This behaviour can be changed by passing a +// config. +func ListEntries( + ctx context.Context, + repo *localrepo.Repo, + treeish git.Revision, + cfg *ListEntriesConfig, +) ([]*Entry, error) { + if cfg == nil { + cfg = &ListEntriesConfig{} + } + + flags := []git.Option{git.Flag{Name: "-z"}} + if cfg.Recursive { + flags = append(flags, + git.Flag{Name: "-r"}, + git.Flag{Name: "-t"}, + ) + } + + var stderr bytes.Buffer + cmd, err := repo.Exec(ctx, git.SubCmd{ + Name: "ls-tree", + Args: []string{fmt.Sprintf("%s:%s", treeish, cfg.RelativePath)}, + Flags: flags, + }, git.WithStderr(&stderr)) + if err != nil { + return nil, fmt.Errorf("spawning git-ls-tree: %w", err) + } + + parser := NewParser(cmd) + var entries []*Entry + for { + entry, err := parser.NextEntry() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + + return nil, fmt.Errorf("parsing tree entry: %w", err) + } + + entries = append(entries, entry) + } + + if err := cmd.Wait(); err != nil { + errorMessage := stderr.String() + if strings.HasPrefix(errorMessage, "fatal: not a tree object") { + return nil, ErrNotTreeish + } else if strings.HasPrefix(errorMessage, "fatal: Not a valid object name") { + return nil, ErrNotExist + } + + return nil, fmt.Errorf("waiting for git-ls-tree: %w, stderr: %q", err, errorMessage) + } + + return entries, nil +} diff --git a/internal/git/lstree/list_entries_test.go b/internal/git/lstree/list_entries_test.go new file mode 100644 index 000000000..ae376ace0 --- /dev/null +++ b/internal/git/lstree/list_entries_test.go @@ -0,0 +1,158 @@ +package lstree + +import ( + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v14/internal/git" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg" +) + +func TestListEntries(t *testing.T) { + cfg := testcfg.Build(t) + + ctx, cancel := testhelper.Context() + defer cancel() + + repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("blob contents")) + emptyTreeID := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{}) + treeWithBlob := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + {OID: blobID, Mode: "100644", Path: "nonexecutable"}, + {OID: blobID, Mode: "100755", Path: "executable"}, + }) + treeWithSubtree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + {OID: emptyTreeID, Mode: "040000", Path: "subdir"}, + }) + treeWithNestedSubtrees := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + {OID: treeWithSubtree, Mode: "040000", Path: "nested-subdir"}, + }) + treeWithSubtreeAndBlob := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + {OID: treeWithSubtree, Mode: "040000", Path: "subdir"}, + {OID: blobID, Mode: "100644", Path: "blob"}, + }) + treeWithSubtreeContainingBlob := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + {OID: treeWithSubtreeAndBlob, Mode: "040000", Path: "subdir"}, + }) + + for _, tc := range []struct { + desc string + treeish git.Revision + cfg *ListEntriesConfig + expectedResults []*Entry + expectedErr error + }{ + { + desc: "empty tree", + treeish: emptyTreeID.Revision(), + }, + { + desc: "tree with blob", + treeish: treeWithBlob.Revision(), + expectedResults: []*Entry{ + {Mode: []byte("100755"), Type: Blob, ObjectID: blobID, Path: "executable"}, + {Mode: []byte("100644"), Type: Blob, ObjectID: blobID, Path: "nonexecutable"}, + }, + }, + { + desc: "tree with subtree", + treeish: treeWithSubtree.Revision(), + expectedResults: []*Entry{ + {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "subdir"}, + }, + }, + { + desc: "nested trees", + treeish: treeWithNestedSubtrees.Revision(), + expectedResults: []*Entry{ + {Mode: []byte("040000"), Type: Tree, ObjectID: treeWithSubtree, Path: "nested-subdir"}, + }, + }, + { + desc: "recursive nested trees", + treeish: treeWithNestedSubtrees.Revision(), + cfg: &ListEntriesConfig{ + Recursive: true, + }, + expectedResults: []*Entry{ + {Mode: []byte("040000"), Type: Tree, ObjectID: treeWithSubtree, Path: "nested-subdir"}, + {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "nested-subdir/subdir"}, + }, + }, + { + desc: "nested subtree", + treeish: treeWithNestedSubtrees.Revision(), + cfg: &ListEntriesConfig{ + RelativePath: "nested-subdir", + }, + expectedResults: []*Entry{ + {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "subdir"}, + }, + }, + { + desc: "nested recursive subtree", + treeish: treeWithSubtreeContainingBlob.Revision(), + cfg: &ListEntriesConfig{ + RelativePath: "subdir", + Recursive: true, + }, + expectedResults: []*Entry{ + {Mode: []byte("100644"), Type: Blob, ObjectID: blobID, Path: "blob"}, + {Mode: []byte("040000"), Type: Tree, ObjectID: treeWithSubtree, Path: "subdir"}, + {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "subdir/subdir"}, + }, + }, + { + desc: "recursive nested trees and blobs", + treeish: treeWithSubtreeAndBlob.Revision(), + cfg: &ListEntriesConfig{ + Recursive: true, + }, + expectedResults: []*Entry{ + {Mode: []byte("100644"), Type: Blob, ObjectID: blobID, Path: "blob"}, + {Mode: []byte("040000"), Type: Tree, ObjectID: treeWithSubtree, Path: "subdir"}, + {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "subdir/subdir"}, + }, + }, + { + desc: "listing blob fails", + treeish: blobID.Revision(), + // We get a NotExist error here because it's invalid to suffix an object ID + // which resolves to a blob with a colon (":") given that it's not possible + // to resolve a subpath. + expectedErr: ErrNotExist, + }, + { + desc: "valid revision with invalid path", + treeish: treeWithSubtree.Revision(), + cfg: &ListEntriesConfig{ + RelativePath: "does-not-exist", + }, + expectedErr: ErrNotExist, + }, + { + desc: "valid revision with path pointing to blob", + treeish: treeWithSubtreeAndBlob.Revision(), + cfg: &ListEntriesConfig{ + RelativePath: "blob", + }, + expectedErr: ErrNotTreeish, + }, + { + desc: "listing nonexistent object fails", + treeish: "does-not-exist", + expectedErr: ErrNotExist, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + results, err := ListEntries(ctx, repo, tc.treeish, tc.cfg) + require.Equal(t, tc.expectedResults, results) + require.Equal(t, tc.expectedErr, err) + }) + } +} diff --git a/internal/git/lstree/parser.go b/internal/git/lstree/parser.go new file mode 100644 index 000000000..13fe2de66 --- /dev/null +++ b/internal/git/lstree/parser.go @@ -0,0 +1,88 @@ +package lstree + +import ( + "bufio" + "errors" + "fmt" + "io" + + "gitlab.com/gitlab-org/gitaly/v14/internal/git" +) + +// ErrParse is returned when the parse of an entry was unsuccessful +var ErrParse = errors.New("failed to parse git ls-tree response") + +// Parser holds the necessary state for parsing the ls-tree output +type Parser struct { + reader *bufio.Reader +} + +// NewParser returns a new Parser +func NewParser(src io.Reader) *Parser { + return &Parser{ + reader: bufio.NewReader(src), + } +} + +// NextEntry reads a tree entry as it would be written by `git ls-tree -z`. +func (p *Parser) NextEntry() (*Entry, error) { + // Each tree entry is expected to have a format of `<mode> SP <type> SP <objectid> TAB <path> NUL`. + + treeEntryMode, err := p.reader.ReadBytes(' ') + if err != nil { + if errors.Is(err, io.EOF) { + return nil, io.EOF + } + + return nil, fmt.Errorf("reading mode: %w", err) + } + treeEntryMode = treeEntryMode[:len(treeEntryMode)-1] + + treeEntryType, err := p.reader.ReadBytes(' ') + if err != nil { + return nil, fmt.Errorf("reading type: %w", err) + } + treeEntryType = treeEntryType[:len(treeEntryType)-1] + + treeEntryID, err := p.reader.ReadBytes('\t') + if err != nil { + return nil, fmt.Errorf("reading OID: %w", err) + } + treeEntryID = treeEntryID[:len(treeEntryID)-1] + + treeEntryPath, err := p.reader.ReadBytes(0x00) + if err != nil { + return nil, fmt.Errorf("reading path: %w", err) + } + treeEntryPath = treeEntryPath[:len(treeEntryPath)-1] + + objectType, err := toEnum(string(treeEntryType)) + if err != nil { + return nil, err + } + + objectID, err := git.NewObjectIDFromHex(string(treeEntryID)) + if err != nil { + return nil, err + } + + return &Entry{ + Mode: treeEntryMode, + Type: objectType, + ObjectID: objectID, + Path: string(treeEntryPath), + }, nil +} + +func toEnum(s string) (ObjectType, error) { + switch s { + case "tree": + return Tree, nil + case "blob": + return Blob, nil + case "commit": + return Submodule, nil + default: + return -1, ErrParse + } +} diff --git a/internal/git/lstree/last_commits_test.go b/internal/git/lstree/parser_test.go index d12e97247..f1a4dfab8 100644 --- a/internal/git/lstree/last_commits_test.go +++ b/internal/git/lstree/parser_test.go @@ -19,28 +19,28 @@ func TestParser(t *testing.T) { filename: "testdata/z-lstree.txt", entries: Entries{ { - Mode: []byte("100644"), - Type: Blob, - Oid: "dfaa3f97ca337e20154a98ac9d0be76ddd1fcc82", - Path: ".gitignore", + Mode: []byte("100644"), + Type: Blob, + ObjectID: "dfaa3f97ca337e20154a98ac9d0be76ddd1fcc82", + Path: ".gitignore", }, { - Mode: []byte("100644"), - Type: Blob, - Oid: "0792c58905eff3432b721f8c4a64363d8e28d9ae", - Path: ".gitmodules", + Mode: []byte("100644"), + Type: Blob, + ObjectID: "0792c58905eff3432b721f8c4a64363d8e28d9ae", + Path: ".gitmodules", }, { - Mode: []byte("040000"), - Type: Tree, - Oid: "3c122d2b7830eca25235131070602575cf8b41a1", - Path: "encoding", + Mode: []byte("040000"), + Type: Tree, + ObjectID: "3c122d2b7830eca25235131070602575cf8b41a1", + Path: "encoding", }, { - Mode: []byte("160000"), - Type: Submodule, - Oid: "79bceae69cb5750d6567b223597999bfa91cb3b9", - Path: "gitlab-shell", + Mode: []byte("160000"), + Type: Submodule, + ObjectID: "79bceae69cb5750d6567b223597999bfa91cb3b9", + Path: "gitlab-shell", }, }, }, @@ -49,28 +49,28 @@ func TestParser(t *testing.T) { filename: "testdata/z-lstree-irregular.txt", entries: Entries{ { - Mode: []byte("100644"), - Type: Blob, - Oid: "dfaa3f97ca337e20154a98ac9d0be76ddd1fcc82", - Path: ".gitignore", + Mode: []byte("100644"), + Type: Blob, + ObjectID: "dfaa3f97ca337e20154a98ac9d0be76ddd1fcc82", + Path: ".gitignore", }, { - Mode: []byte("100644"), - Type: Blob, - Oid: "0792c58905eff3432b721f8c4a64363d8e28d9ae", - Path: ".gitmodules", + Mode: []byte("100644"), + Type: Blob, + ObjectID: "0792c58905eff3432b721f8c4a64363d8e28d9ae", + Path: ".gitmodules", }, { - Mode: []byte("040000"), - Type: Tree, - Oid: "3c122d2b7830eca25235131070602575cf8b41a1", - Path: "some encoding", + Mode: []byte("040000"), + Type: Tree, + ObjectID: "3c122d2b7830eca25235131070602575cf8b41a1", + Path: "some encoding", }, { - Mode: []byte("160000"), - Type: Submodule, - Oid: "79bceae69cb5750d6567b223597999bfa91cb3b9", - Path: "gitlab-shell", + Mode: []byte("160000"), + Type: Submodule, + ObjectID: "79bceae69cb5750d6567b223597999bfa91cb3b9", + Path: "gitlab-shell", }, }, }, diff --git a/internal/git/lstree/testhelper_test.go b/internal/git/lstree/testhelper_test.go new file mode 100644 index 000000000..484c397e6 --- /dev/null +++ b/internal/git/lstree/testhelper_test.go @@ -0,0 +1,11 @@ +package lstree + +import ( + "testing" + + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" +) + +func TestMain(m *testing.M) { + testhelper.Run(m) +} diff --git a/internal/gitaly/service/commit/tree_entries.go b/internal/gitaly/service/commit/tree_entries.go index 73e13e9c6..98eb0b7f9 100644 --- a/internal/gitaly/service/commit/tree_entries.go +++ b/internal/gitaly/service/commit/tree_entries.go @@ -2,6 +2,7 @@ package commit import ( "context" + "errors" "fmt" "sort" @@ -9,8 +10,10 @@ import ( log "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitaly/v14/internal/git" "gitlab.com/gitlab-org/gitaly/v14/internal/git/catfile" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v14/internal/git/lstree" "gitlab.com/gitlab-org/gitaly/v14/internal/helper/chunk" + "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -59,10 +62,9 @@ func populateFlatPath( return nil } -func sendTreeEntries( +func (s *server) sendTreeEntries( stream gitalypb.CommitService_GetTreeEntriesServer, - objectReader catfile.ObjectReader, - objectInfoReader catfile.ObjectInfoReader, + repo *localrepo.Repo, revision, path string, recursive bool, sort gitalypb.GetTreeEntriesRequest_SortBy, @@ -70,13 +72,104 @@ func sendTreeEntries( ) error { ctx := stream.Context() - entries, err := treeEntries(ctx, objectReader, objectInfoReader, revision, path, "", recursive) - if err != nil { - return err + var entries []*gitalypb.TreeEntry + + // When we want to do a recursive listing, then it's a _lot_ more efficient to let + // git-ls-tree(1) handle this for us. In theory, we'd also want to do this for the + // non-recursive case. But in practice, we must populate a so-called "flat path" when doing + // a non-recursive listing, where the flat path of a directory entry points to the first + // subdirectory which has more than a single entry. + // + // Answering this query efficiently is not possible with Git's tooling, and solving it via + // git-ls-tree(1) is worse than using a long-lived catfile process. We thus fall back to + // using catfile readers to answer these non-recursive queries. + if recursive && featureflag.TreeEntriesViaLsTree.IsEnabled(ctx) { + if path == "." { + path = "" + } + + rootTreeInfo, err := repo.ResolveRevision(ctx, git.Revision(revision+"^{tree}")) + if err != nil { + if catfile.IsNotFound(err) { + return nil + } + + return err + } + + treeEntries, err := lstree.ListEntries(ctx, repo, git.Revision(revision), &lstree.ListEntriesConfig{ + Recursive: recursive, + RelativePath: path, + }) + if err != nil { + // Design wart: we do not return an error if the request does not + // point to a tree object, but just return nothing. + if errors.Is(err, lstree.ErrNotTreeish) { + return nil + } + + return fmt.Errorf("listing tree entries: %w", err) + } + + entries = make([]*gitalypb.TreeEntry, 0, len(treeEntries)) + for _, entry := range treeEntries { + objectID, err := entry.ObjectID.Bytes() + if err != nil { + return fmt.Errorf("converting tree entry OID: %w", err) + } + + treeEntry, err := newTreeEntry( + revision, + rootTreeInfo.String(), + path, + []byte(entry.Path), + objectID, + entry.Mode, + ) + if err != nil { + return fmt.Errorf("converting tree entry: %w", err) + } + + entries = append(entries, treeEntry) + } + } else { + objectReader, err := s.catfileCache.ObjectReader(stream.Context(), repo) + if err != nil { + return err + } + + objectInfoReader, err := s.catfileCache.ObjectInfoReader(stream.Context(), repo) + if err != nil { + return err + } + + entries, err = treeEntries(ctx, objectReader, objectInfoReader, revision, path, "", recursive) + if err != nil { + return err + } + + if !recursive { + // When we're not doing a recursive request, then we need to populate flat + // paths. A flat path of a tree entry refers to the first subtree of that + // entry which either has at least one blob or more than two subtrees. In + // other terms, it refers to the first "non-empty" subtree such that it's + // easy to skip navigating the intermediate subtrees which wouldn't carry + // any interesting information anyway. + // + // Unfortunately, computing flat paths is _really_ inefficient: for each + // tree entry, we recurse up to 10 levels deep into that subtree. We do so + // by requesting the tree entries via a catfile process, which to the best + // of my knowledge is as good as we can get. Doing this via git-ls-tree(1) + // wouldn't fly: we'd have to spawn a separate process for each of the + // subtrees, which is a lot of overhead. + if err := populateFlatPath(ctx, objectReader, objectInfoReader, entries); err != nil { + return err + } + } } // We sort before we paginate to ensure consistent results with ListLastCommitsForTree - entries, err = sortTrees(entries, sort) + entries, err := sortTrees(entries, sort) if err != nil { return err } @@ -89,12 +182,6 @@ func sendTreeEntries( } } - if !recursive { - if err := populateFlatPath(ctx, objectReader, objectInfoReader, entries); err != nil { - return err - } - } - treeSender := &treeEntriesSender{stream: stream} if cursor != "" { @@ -189,19 +276,9 @@ func (s *server) GetTreeEntries(in *gitalypb.GetTreeEntriesRequest, stream gital repo := s.localrepo(in.GetRepository()) - objectReader, err := s.catfileCache.ObjectReader(stream.Context(), repo) - if err != nil { - return err - } - - objectInfoReader, err := s.catfileCache.ObjectInfoReader(stream.Context(), repo) - if err != nil { - return err - } - revision := string(in.GetRevision()) path := string(in.GetPath()) - return sendTreeEntries(stream, objectReader, objectInfoReader, revision, path, in.Recursive, in.GetSort(), in.GetPaginationParams()) + return s.sendTreeEntries(stream, repo, revision, path, in.Recursive, in.GetSort(), in.GetPaginationParams()) } func paginateTreeEntries(entries []*gitalypb.TreeEntry, p *gitalypb.PaginationParameter) ([]*gitalypb.TreeEntry, string, error) { diff --git a/internal/gitaly/service/commit/tree_entries_test.go b/internal/gitaly/service/commit/tree_entries_test.go index 3e5213309..1c952d68c 100644 --- a/internal/gitaly/service/commit/tree_entries_test.go +++ b/internal/gitaly/service/commit/tree_entries_test.go @@ -1,6 +1,8 @@ package commit import ( + "context" + "errors" "fmt" "io" "os" @@ -11,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v14/internal/helper/text" + "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb" "google.golang.org/grpc/codes" @@ -653,3 +656,77 @@ func drainTreeEntriesResponse(c gitalypb.CommitService_GetTreeEntriesClient) err } return err } + +func BenchmarkGetTreeEntries(b *testing.B) { + testhelper.NewFeatureSets([]featureflag.FeatureFlag{ + featureflag.TreeEntriesViaLsTree, + }).Bench(b, benchmarkGetTreeEntries) +} + +func benchmarkGetTreeEntries(b *testing.B, ctx context.Context) { + cfg, client := setupCommitService(b) + + repo, _ := gittest.CloneRepo(b, cfg, cfg.Storages[0], gittest.CloneRepoOpts{ + SourceRepo: "benchmark.git", + }) + + for _, tc := range []struct { + desc string + request *gitalypb.GetTreeEntriesRequest + expectedEntries int + }{ + { + desc: "recursive from root", + request: &gitalypb.GetTreeEntriesRequest{ + Repository: repo, + Revision: []byte("9659e770b131abb4e01d74306819192cd553c258"), + Path: []byte("."), + Recursive: true, + }, + expectedEntries: 61027, + }, + { + desc: "non-recursive from root", + request: &gitalypb.GetTreeEntriesRequest{ + Repository: repo, + Revision: []byte("9659e770b131abb4e01d74306819192cd553c258"), + Path: []byte("."), + Recursive: false, + }, + expectedEntries: 101, + }, + { + desc: "recursive from subdirectory", + request: &gitalypb.GetTreeEntriesRequest{ + Repository: repo, + Revision: []byte("9659e770b131abb4e01d74306819192cd553c258"), + Path: []byte("lib/gitlab"), + Recursive: true, + }, + expectedEntries: 2642, + }, + } { + b.Run(tc.desc, func(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + stream, err := client.GetTreeEntries(ctx, tc.request) + require.NoError(b, err) + + entriesReceived := 0 + for { + response, err := stream.Recv() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + require.NoError(b, err) + } + + entriesReceived += len(response.Entries) + } + require.Equal(b, tc.expectedEntries, entriesReceived) + } + }) + } +} diff --git a/internal/gitaly/service/operations/submodules_test.go b/internal/gitaly/service/operations/submodules_test.go index 8b2051043..c9dea3574 100644 --- a/internal/gitaly/service/operations/submodules_test.go +++ b/internal/gitaly/service/operations/submodules_test.go @@ -97,7 +97,7 @@ func TestSuccessfulUserUpdateSubmoduleRequest(t *testing.T) { parsedEntry, err := parser.NextEntry() require.NoError(t, err) require.Equal(t, testCase.submodule, parsedEntry.Path) - require.Equal(t, testCase.commitSha, parsedEntry.Oid) + require.Equal(t, testCase.commitSha, parsedEntry.ObjectID.String()) }) } } diff --git a/internal/metadata/featureflag/ff_tree_entries_via_ls_tree.go b/internal/metadata/featureflag/ff_tree_entries_via_ls_tree.go new file mode 100644 index 000000000..8491e4c6a --- /dev/null +++ b/internal/metadata/featureflag/ff_tree_entries_via_ls_tree.go @@ -0,0 +1,5 @@ +package featureflag + +// TreeEntriesViaLsTree switches over retrieval of tree entries from using git-cat-file(1) to +// instead use git-ls-tree(1). +var TreeEntriesViaLsTree = NewFeatureFlag("tree_entries_via_ls_tree", false) diff --git a/internal/testhelper/featureset.go b/internal/testhelper/featureset.go index 3d5214388..cb89a36af 100644 --- a/internal/testhelper/featureset.go +++ b/internal/testhelper/featureset.go @@ -104,3 +104,19 @@ func (s FeatureSets) Run(t *testing.T, test func(t *testing.T, ctx context.Conte }) } } + +// Bench executes the given benchmarking function for each of the FeatureSets. The passed in +// context has the feature flags set accordingly. +func (s FeatureSets) Bench(b *testing.B, test func(b *testing.B, ctx context.Context)) { + b.Helper() + + for _, featureSet := range s { + b.Run(featureSet.Desc(), func(b *testing.B) { + ctx, cancel := Context() + defer cancel() + ctx = featureSet.Disable(ctx) + + test(b, ctx) + }) + } +} |