Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2021-11-16 15:48:06 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2021-11-16 15:48:06 +0300
commit724e844c814b91bb4a98f7eb3607756b7504657d (patch)
tree87fdcab33ffcec1fdf7d640ad15a1fba094276c9 /internal
parent8f04e23b9f004d70635aaeecd30d837a584572f8 (diff)
parent61c5912f7d9dcb2068b04ab1594281e8d45d8975 (diff)
Merge branch 'pks-commit-tree-entries' into 'master'
commit: Refactor recursive `GetTreeEntries()` for efficiency Closes #3888 See merge request gitlab-org/gitaly!4052
Diffstat (limited to 'internal')
-rw-r--r--internal/git/gittest/tree.go4
-rw-r--r--internal/git/lstree/entries.go38
-rw-r--r--internal/git/lstree/last_commits.go101
-rw-r--r--internal/git/lstree/list_entries.go90
-rw-r--r--internal/git/lstree/list_entries_test.go158
-rw-r--r--internal/git/lstree/parser.go88
-rw-r--r--internal/git/lstree/parser_test.go (renamed from internal/git/lstree/last_commits_test.go)64
-rw-r--r--internal/git/lstree/testhelper_test.go11
-rw-r--r--internal/gitaly/service/commit/tree_entries.go125
-rw-r--r--internal/gitaly/service/commit/tree_entries_test.go77
-rw-r--r--internal/gitaly/service/operations/submodules_test.go2
-rw-r--r--internal/metadata/featureflag/ff_tree_entries_via_ls_tree.go5
-rw-r--r--internal/testhelper/featureset.go16
13 files changed, 619 insertions, 160 deletions
diff --git a/internal/git/gittest/tree.go b/internal/git/gittest/tree.go
index 757f58a77..8895a414b 100644
--- a/internal/git/gittest/tree.go
+++ b/internal/git/gittest/tree.go
@@ -75,14 +75,14 @@ func RequireTree(t testing.TB, cfg config.Cfg, repoPath, treeish string, expecte
func WriteTree(t testing.TB, cfg config.Cfg, repoPath string, entries []TreeEntry) git.ObjectID {
t.Helper()
- require.NotEmpty(t, entries)
-
var tree bytes.Buffer
for _, entry := range entries {
var entryType string
switch entry.Mode {
case "100644":
entryType = "blob"
+ case "100755":
+ entryType = "blob"
case "040000":
entryType = "tree"
default:
diff --git a/internal/git/lstree/entries.go b/internal/git/lstree/entries.go
new file mode 100644
index 000000000..4a729fec0
--- /dev/null
+++ b/internal/git/lstree/entries.go
@@ -0,0 +1,38 @@
+package lstree
+
+import "gitlab.com/gitlab-org/gitaly/v14/internal/git"
+
+// ObjectType is an Enum for the type of object of
+// the ls-tree entry, which can be can be tree, blob or commit
+type ObjectType int
+
+// Entry represents a single ls-tree entry
+type Entry struct {
+ Mode []byte
+ Type ObjectType
+ ObjectID git.ObjectID
+ Path string
+}
+
+// Entries holds every ls-tree Entry
+type Entries []Entry
+
+// Enum values for ObjectType
+const (
+ Tree ObjectType = iota
+ Blob
+ Submodule
+)
+
+func (e Entries) Len() int {
+ return len(e)
+}
+
+func (e Entries) Swap(i, j int) {
+ e[i], e[j] = e[j], e[i]
+}
+
+// Less sorts entries by type in the order [*tree *blobs *submodules]
+func (e Entries) Less(i, j int) bool {
+ return e[i].Type < e[j].Type
+}
diff --git a/internal/git/lstree/last_commits.go b/internal/git/lstree/last_commits.go
deleted file mode 100644
index def66b5b1..000000000
--- a/internal/git/lstree/last_commits.go
+++ /dev/null
@@ -1,101 +0,0 @@
-package lstree
-
-import (
- "bufio"
- "bytes"
- "errors"
- "io"
-)
-
-// ObjectType is an Enum for the type of object of
-// the ls-tree entry, which can be can be tree, blob or commit
-type ObjectType int
-
-// Entry represents a single ls-tree entry
-type Entry struct {
- Mode []byte
- Type ObjectType
- Oid string
- Path string
-}
-
-// Entries holds every ls-tree Entry
-type Entries []Entry
-
-// Parser holds the necessary state for parsing the ls-tree output
-type Parser struct {
- reader *bufio.Reader
-}
-
-// Enum values for ObjectType
-const (
- Tree ObjectType = iota
- Blob
- Submodule
-)
-
-// ErrParse is returned when the parse of an entry was unsuccessful
-var ErrParse = errors.New("failed to parse git ls-tree response")
-
-func (e Entries) Len() int {
- return len(e)
-}
-
-func (e Entries) Swap(i, j int) {
- e[i], e[j] = e[j], e[i]
-}
-
-// Less sorts entries by type in the order [*tree *blobs *submodules]
-func (e Entries) Less(i, j int) bool {
- return e[i].Type < e[j].Type
-}
-
-// NewParser returns a new Parser
-func NewParser(src io.Reader) *Parser {
- return &Parser{
- reader: bufio.NewReader(src),
- }
-}
-
-// NextEntry reads from git ls-tree --z --full-name command
-// parses the tree entry and returns a *Entry.
-func (p *Parser) NextEntry() (*Entry, error) {
- data, err := p.reader.ReadBytes(0x00)
- if err != nil {
- return nil, err
- }
-
- // We expect each `data` to be <mode> SP <type> SP <object> TAB <path>\0.
- split := bytes.SplitN(data, []byte(" "), 3)
- if len(split) != 3 {
- return nil, ErrParse
- }
-
- objectAndFile := bytes.SplitN(split[len(split)-1], []byte("\t"), 2)
- if len(objectAndFile) != 2 {
- return nil, ErrParse
- }
-
- objectType, err := toEnum(string(split[1]))
- if err != nil {
- return nil, err
- }
-
- // We know that the last byte in 'path' will be a zero byte.
- path := string(bytes.TrimRight(objectAndFile[1], "\x00"))
-
- return &Entry{Mode: split[0], Type: objectType, Oid: string(objectAndFile[0]), Path: path}, nil
-}
-
-func toEnum(s string) (ObjectType, error) {
- switch s {
- case "tree":
- return Tree, nil
- case "blob":
- return Blob, nil
- case "commit":
- return Submodule, nil
- default:
- return -1, ErrParse
- }
-}
diff --git a/internal/git/lstree/list_entries.go b/internal/git/lstree/list_entries.go
new file mode 100644
index 000000000..ab27d6f5d
--- /dev/null
+++ b/internal/git/lstree/list_entries.go
@@ -0,0 +1,90 @@
+package lstree
+
+import (
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "strings"
+
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
+)
+
+var (
+ // ErrNotExist indicates that the requested tree does not exist, either because the revision
+ // is invalid or because the path is not valid.
+ ErrNotExist = errors.New("invalid object name")
+ // ErrNotTreeish indicates that the requested revision or path does not resolve to a tree
+ // object.
+ ErrNotTreeish = errors.New("not treeish")
+)
+
+// ListEntriesConfig is configuration that can be passed to ListEntries.
+type ListEntriesConfig struct {
+ // Recursive indicates whether the listing shall be recursive or not.
+ Recursive bool
+ // RelativePath is the relative path at which listing of entries should be started.
+ RelativePath string
+}
+
+// ListEntries lists tree entries for the given treeish. By default, this will do a non-recursive
+// listing starting from the root of the given treeish. This behaviour can be changed by passing a
+// config.
+func ListEntries(
+ ctx context.Context,
+ repo *localrepo.Repo,
+ treeish git.Revision,
+ cfg *ListEntriesConfig,
+) ([]*Entry, error) {
+ if cfg == nil {
+ cfg = &ListEntriesConfig{}
+ }
+
+ flags := []git.Option{git.Flag{Name: "-z"}}
+ if cfg.Recursive {
+ flags = append(flags,
+ git.Flag{Name: "-r"},
+ git.Flag{Name: "-t"},
+ )
+ }
+
+ var stderr bytes.Buffer
+ cmd, err := repo.Exec(ctx, git.SubCmd{
+ Name: "ls-tree",
+ Args: []string{fmt.Sprintf("%s:%s", treeish, cfg.RelativePath)},
+ Flags: flags,
+ }, git.WithStderr(&stderr))
+ if err != nil {
+ return nil, fmt.Errorf("spawning git-ls-tree: %w", err)
+ }
+
+ parser := NewParser(cmd)
+ var entries []*Entry
+ for {
+ entry, err := parser.NextEntry()
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ break
+ }
+
+ return nil, fmt.Errorf("parsing tree entry: %w", err)
+ }
+
+ entries = append(entries, entry)
+ }
+
+ if err := cmd.Wait(); err != nil {
+ errorMessage := stderr.String()
+ if strings.HasPrefix(errorMessage, "fatal: not a tree object") {
+ return nil, ErrNotTreeish
+ } else if strings.HasPrefix(errorMessage, "fatal: Not a valid object name") {
+ return nil, ErrNotExist
+ }
+
+ return nil, fmt.Errorf("waiting for git-ls-tree: %w, stderr: %q", err, errorMessage)
+ }
+
+ return entries, nil
+}
diff --git a/internal/git/lstree/list_entries_test.go b/internal/git/lstree/list_entries_test.go
new file mode 100644
index 000000000..ae376ace0
--- /dev/null
+++ b/internal/git/lstree/list_entries_test.go
@@ -0,0 +1,158 @@
+package lstree
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg"
+)
+
+func TestListEntries(t *testing.T) {
+ cfg := testcfg.Build(t)
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("blob contents"))
+ emptyTreeID := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{})
+ treeWithBlob := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {OID: blobID, Mode: "100644", Path: "nonexecutable"},
+ {OID: blobID, Mode: "100755", Path: "executable"},
+ })
+ treeWithSubtree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {OID: emptyTreeID, Mode: "040000", Path: "subdir"},
+ })
+ treeWithNestedSubtrees := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {OID: treeWithSubtree, Mode: "040000", Path: "nested-subdir"},
+ })
+ treeWithSubtreeAndBlob := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {OID: treeWithSubtree, Mode: "040000", Path: "subdir"},
+ {OID: blobID, Mode: "100644", Path: "blob"},
+ })
+ treeWithSubtreeContainingBlob := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {OID: treeWithSubtreeAndBlob, Mode: "040000", Path: "subdir"},
+ })
+
+ for _, tc := range []struct {
+ desc string
+ treeish git.Revision
+ cfg *ListEntriesConfig
+ expectedResults []*Entry
+ expectedErr error
+ }{
+ {
+ desc: "empty tree",
+ treeish: emptyTreeID.Revision(),
+ },
+ {
+ desc: "tree with blob",
+ treeish: treeWithBlob.Revision(),
+ expectedResults: []*Entry{
+ {Mode: []byte("100755"), Type: Blob, ObjectID: blobID, Path: "executable"},
+ {Mode: []byte("100644"), Type: Blob, ObjectID: blobID, Path: "nonexecutable"},
+ },
+ },
+ {
+ desc: "tree with subtree",
+ treeish: treeWithSubtree.Revision(),
+ expectedResults: []*Entry{
+ {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "subdir"},
+ },
+ },
+ {
+ desc: "nested trees",
+ treeish: treeWithNestedSubtrees.Revision(),
+ expectedResults: []*Entry{
+ {Mode: []byte("040000"), Type: Tree, ObjectID: treeWithSubtree, Path: "nested-subdir"},
+ },
+ },
+ {
+ desc: "recursive nested trees",
+ treeish: treeWithNestedSubtrees.Revision(),
+ cfg: &ListEntriesConfig{
+ Recursive: true,
+ },
+ expectedResults: []*Entry{
+ {Mode: []byte("040000"), Type: Tree, ObjectID: treeWithSubtree, Path: "nested-subdir"},
+ {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "nested-subdir/subdir"},
+ },
+ },
+ {
+ desc: "nested subtree",
+ treeish: treeWithNestedSubtrees.Revision(),
+ cfg: &ListEntriesConfig{
+ RelativePath: "nested-subdir",
+ },
+ expectedResults: []*Entry{
+ {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "subdir"},
+ },
+ },
+ {
+ desc: "nested recursive subtree",
+ treeish: treeWithSubtreeContainingBlob.Revision(),
+ cfg: &ListEntriesConfig{
+ RelativePath: "subdir",
+ Recursive: true,
+ },
+ expectedResults: []*Entry{
+ {Mode: []byte("100644"), Type: Blob, ObjectID: blobID, Path: "blob"},
+ {Mode: []byte("040000"), Type: Tree, ObjectID: treeWithSubtree, Path: "subdir"},
+ {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "subdir/subdir"},
+ },
+ },
+ {
+ desc: "recursive nested trees and blobs",
+ treeish: treeWithSubtreeAndBlob.Revision(),
+ cfg: &ListEntriesConfig{
+ Recursive: true,
+ },
+ expectedResults: []*Entry{
+ {Mode: []byte("100644"), Type: Blob, ObjectID: blobID, Path: "blob"},
+ {Mode: []byte("040000"), Type: Tree, ObjectID: treeWithSubtree, Path: "subdir"},
+ {Mode: []byte("040000"), Type: Tree, ObjectID: emptyTreeID, Path: "subdir/subdir"},
+ },
+ },
+ {
+ desc: "listing blob fails",
+ treeish: blobID.Revision(),
+ // We get a NotExist error here because it's invalid to suffix an object ID
+ // which resolves to a blob with a colon (":") given that it's not possible
+ // to resolve a subpath.
+ expectedErr: ErrNotExist,
+ },
+ {
+ desc: "valid revision with invalid path",
+ treeish: treeWithSubtree.Revision(),
+ cfg: &ListEntriesConfig{
+ RelativePath: "does-not-exist",
+ },
+ expectedErr: ErrNotExist,
+ },
+ {
+ desc: "valid revision with path pointing to blob",
+ treeish: treeWithSubtreeAndBlob.Revision(),
+ cfg: &ListEntriesConfig{
+ RelativePath: "blob",
+ },
+ expectedErr: ErrNotTreeish,
+ },
+ {
+ desc: "listing nonexistent object fails",
+ treeish: "does-not-exist",
+ expectedErr: ErrNotExist,
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ results, err := ListEntries(ctx, repo, tc.treeish, tc.cfg)
+ require.Equal(t, tc.expectedResults, results)
+ require.Equal(t, tc.expectedErr, err)
+ })
+ }
+}
diff --git a/internal/git/lstree/parser.go b/internal/git/lstree/parser.go
new file mode 100644
index 000000000..13fe2de66
--- /dev/null
+++ b/internal/git/lstree/parser.go
@@ -0,0 +1,88 @@
+package lstree
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "io"
+
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git"
+)
+
+// ErrParse is returned when the parse of an entry was unsuccessful
+var ErrParse = errors.New("failed to parse git ls-tree response")
+
+// Parser holds the necessary state for parsing the ls-tree output
+type Parser struct {
+ reader *bufio.Reader
+}
+
+// NewParser returns a new Parser
+func NewParser(src io.Reader) *Parser {
+ return &Parser{
+ reader: bufio.NewReader(src),
+ }
+}
+
+// NextEntry reads a tree entry as it would be written by `git ls-tree -z`.
+func (p *Parser) NextEntry() (*Entry, error) {
+ // Each tree entry is expected to have a format of `<mode> SP <type> SP <objectid> TAB <path> NUL`.
+
+ treeEntryMode, err := p.reader.ReadBytes(' ')
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ return nil, io.EOF
+ }
+
+ return nil, fmt.Errorf("reading mode: %w", err)
+ }
+ treeEntryMode = treeEntryMode[:len(treeEntryMode)-1]
+
+ treeEntryType, err := p.reader.ReadBytes(' ')
+ if err != nil {
+ return nil, fmt.Errorf("reading type: %w", err)
+ }
+ treeEntryType = treeEntryType[:len(treeEntryType)-1]
+
+ treeEntryID, err := p.reader.ReadBytes('\t')
+ if err != nil {
+ return nil, fmt.Errorf("reading OID: %w", err)
+ }
+ treeEntryID = treeEntryID[:len(treeEntryID)-1]
+
+ treeEntryPath, err := p.reader.ReadBytes(0x00)
+ if err != nil {
+ return nil, fmt.Errorf("reading path: %w", err)
+ }
+ treeEntryPath = treeEntryPath[:len(treeEntryPath)-1]
+
+ objectType, err := toEnum(string(treeEntryType))
+ if err != nil {
+ return nil, err
+ }
+
+ objectID, err := git.NewObjectIDFromHex(string(treeEntryID))
+ if err != nil {
+ return nil, err
+ }
+
+ return &Entry{
+ Mode: treeEntryMode,
+ Type: objectType,
+ ObjectID: objectID,
+ Path: string(treeEntryPath),
+ }, nil
+}
+
+func toEnum(s string) (ObjectType, error) {
+ switch s {
+ case "tree":
+ return Tree, nil
+ case "blob":
+ return Blob, nil
+ case "commit":
+ return Submodule, nil
+ default:
+ return -1, ErrParse
+ }
+}
diff --git a/internal/git/lstree/last_commits_test.go b/internal/git/lstree/parser_test.go
index d12e97247..f1a4dfab8 100644
--- a/internal/git/lstree/last_commits_test.go
+++ b/internal/git/lstree/parser_test.go
@@ -19,28 +19,28 @@ func TestParser(t *testing.T) {
filename: "testdata/z-lstree.txt",
entries: Entries{
{
- Mode: []byte("100644"),
- Type: Blob,
- Oid: "dfaa3f97ca337e20154a98ac9d0be76ddd1fcc82",
- Path: ".gitignore",
+ Mode: []byte("100644"),
+ Type: Blob,
+ ObjectID: "dfaa3f97ca337e20154a98ac9d0be76ddd1fcc82",
+ Path: ".gitignore",
},
{
- Mode: []byte("100644"),
- Type: Blob,
- Oid: "0792c58905eff3432b721f8c4a64363d8e28d9ae",
- Path: ".gitmodules",
+ Mode: []byte("100644"),
+ Type: Blob,
+ ObjectID: "0792c58905eff3432b721f8c4a64363d8e28d9ae",
+ Path: ".gitmodules",
},
{
- Mode: []byte("040000"),
- Type: Tree,
- Oid: "3c122d2b7830eca25235131070602575cf8b41a1",
- Path: "encoding",
+ Mode: []byte("040000"),
+ Type: Tree,
+ ObjectID: "3c122d2b7830eca25235131070602575cf8b41a1",
+ Path: "encoding",
},
{
- Mode: []byte("160000"),
- Type: Submodule,
- Oid: "79bceae69cb5750d6567b223597999bfa91cb3b9",
- Path: "gitlab-shell",
+ Mode: []byte("160000"),
+ Type: Submodule,
+ ObjectID: "79bceae69cb5750d6567b223597999bfa91cb3b9",
+ Path: "gitlab-shell",
},
},
},
@@ -49,28 +49,28 @@ func TestParser(t *testing.T) {
filename: "testdata/z-lstree-irregular.txt",
entries: Entries{
{
- Mode: []byte("100644"),
- Type: Blob,
- Oid: "dfaa3f97ca337e20154a98ac9d0be76ddd1fcc82",
- Path: ".gitignore",
+ Mode: []byte("100644"),
+ Type: Blob,
+ ObjectID: "dfaa3f97ca337e20154a98ac9d0be76ddd1fcc82",
+ Path: ".gitignore",
},
{
- Mode: []byte("100644"),
- Type: Blob,
- Oid: "0792c58905eff3432b721f8c4a64363d8e28d9ae",
- Path: ".gitmodules",
+ Mode: []byte("100644"),
+ Type: Blob,
+ ObjectID: "0792c58905eff3432b721f8c4a64363d8e28d9ae",
+ Path: ".gitmodules",
},
{
- Mode: []byte("040000"),
- Type: Tree,
- Oid: "3c122d2b7830eca25235131070602575cf8b41a1",
- Path: "some encoding",
+ Mode: []byte("040000"),
+ Type: Tree,
+ ObjectID: "3c122d2b7830eca25235131070602575cf8b41a1",
+ Path: "some encoding",
},
{
- Mode: []byte("160000"),
- Type: Submodule,
- Oid: "79bceae69cb5750d6567b223597999bfa91cb3b9",
- Path: "gitlab-shell",
+ Mode: []byte("160000"),
+ Type: Submodule,
+ ObjectID: "79bceae69cb5750d6567b223597999bfa91cb3b9",
+ Path: "gitlab-shell",
},
},
},
diff --git a/internal/git/lstree/testhelper_test.go b/internal/git/lstree/testhelper_test.go
new file mode 100644
index 000000000..484c397e6
--- /dev/null
+++ b/internal/git/lstree/testhelper_test.go
@@ -0,0 +1,11 @@
+package lstree
+
+import (
+ "testing"
+
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
+)
+
+func TestMain(m *testing.M) {
+ testhelper.Run(m)
+}
diff --git a/internal/gitaly/service/commit/tree_entries.go b/internal/gitaly/service/commit/tree_entries.go
index 73e13e9c6..98eb0b7f9 100644
--- a/internal/gitaly/service/commit/tree_entries.go
+++ b/internal/gitaly/service/commit/tree_entries.go
@@ -2,6 +2,7 @@ package commit
import (
"context"
+ "errors"
"fmt"
"sort"
@@ -9,8 +10,10 @@ import (
log "github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/v14/internal/git"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/catfile"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/lstree"
"gitlab.com/gitlab-org/gitaly/v14/internal/helper/chunk"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@@ -59,10 +62,9 @@ func populateFlatPath(
return nil
}
-func sendTreeEntries(
+func (s *server) sendTreeEntries(
stream gitalypb.CommitService_GetTreeEntriesServer,
- objectReader catfile.ObjectReader,
- objectInfoReader catfile.ObjectInfoReader,
+ repo *localrepo.Repo,
revision, path string,
recursive bool,
sort gitalypb.GetTreeEntriesRequest_SortBy,
@@ -70,13 +72,104 @@ func sendTreeEntries(
) error {
ctx := stream.Context()
- entries, err := treeEntries(ctx, objectReader, objectInfoReader, revision, path, "", recursive)
- if err != nil {
- return err
+ var entries []*gitalypb.TreeEntry
+
+ // When we want to do a recursive listing, then it's a _lot_ more efficient to let
+ // git-ls-tree(1) handle this for us. In theory, we'd also want to do this for the
+ // non-recursive case. But in practice, we must populate a so-called "flat path" when doing
+ // a non-recursive listing, where the flat path of a directory entry points to the first
+ // subdirectory which has more than a single entry.
+ //
+ // Answering this query efficiently is not possible with Git's tooling, and solving it via
+ // git-ls-tree(1) is worse than using a long-lived catfile process. We thus fall back to
+ // using catfile readers to answer these non-recursive queries.
+ if recursive && featureflag.TreeEntriesViaLsTree.IsEnabled(ctx) {
+ if path == "." {
+ path = ""
+ }
+
+ rootTreeInfo, err := repo.ResolveRevision(ctx, git.Revision(revision+"^{tree}"))
+ if err != nil {
+ if catfile.IsNotFound(err) {
+ return nil
+ }
+
+ return err
+ }
+
+ treeEntries, err := lstree.ListEntries(ctx, repo, git.Revision(revision), &lstree.ListEntriesConfig{
+ Recursive: recursive,
+ RelativePath: path,
+ })
+ if err != nil {
+ // Design wart: we do not return an error if the request does not
+ // point to a tree object, but just return nothing.
+ if errors.Is(err, lstree.ErrNotTreeish) {
+ return nil
+ }
+
+ return fmt.Errorf("listing tree entries: %w", err)
+ }
+
+ entries = make([]*gitalypb.TreeEntry, 0, len(treeEntries))
+ for _, entry := range treeEntries {
+ objectID, err := entry.ObjectID.Bytes()
+ if err != nil {
+ return fmt.Errorf("converting tree entry OID: %w", err)
+ }
+
+ treeEntry, err := newTreeEntry(
+ revision,
+ rootTreeInfo.String(),
+ path,
+ []byte(entry.Path),
+ objectID,
+ entry.Mode,
+ )
+ if err != nil {
+ return fmt.Errorf("converting tree entry: %w", err)
+ }
+
+ entries = append(entries, treeEntry)
+ }
+ } else {
+ objectReader, err := s.catfileCache.ObjectReader(stream.Context(), repo)
+ if err != nil {
+ return err
+ }
+
+ objectInfoReader, err := s.catfileCache.ObjectInfoReader(stream.Context(), repo)
+ if err != nil {
+ return err
+ }
+
+ entries, err = treeEntries(ctx, objectReader, objectInfoReader, revision, path, "", recursive)
+ if err != nil {
+ return err
+ }
+
+ if !recursive {
+ // When we're not doing a recursive request, then we need to populate flat
+ // paths. A flat path of a tree entry refers to the first subtree of that
+ // entry which either has at least one blob or more than two subtrees. In
+ // other terms, it refers to the first "non-empty" subtree such that it's
+ // easy to skip navigating the intermediate subtrees which wouldn't carry
+ // any interesting information anyway.
+ //
+ // Unfortunately, computing flat paths is _really_ inefficient: for each
+ // tree entry, we recurse up to 10 levels deep into that subtree. We do so
+ // by requesting the tree entries via a catfile process, which to the best
+ // of my knowledge is as good as we can get. Doing this via git-ls-tree(1)
+ // wouldn't fly: we'd have to spawn a separate process for each of the
+ // subtrees, which is a lot of overhead.
+ if err := populateFlatPath(ctx, objectReader, objectInfoReader, entries); err != nil {
+ return err
+ }
+ }
}
// We sort before we paginate to ensure consistent results with ListLastCommitsForTree
- entries, err = sortTrees(entries, sort)
+ entries, err := sortTrees(entries, sort)
if err != nil {
return err
}
@@ -89,12 +182,6 @@ func sendTreeEntries(
}
}
- if !recursive {
- if err := populateFlatPath(ctx, objectReader, objectInfoReader, entries); err != nil {
- return err
- }
- }
-
treeSender := &treeEntriesSender{stream: stream}
if cursor != "" {
@@ -189,19 +276,9 @@ func (s *server) GetTreeEntries(in *gitalypb.GetTreeEntriesRequest, stream gital
repo := s.localrepo(in.GetRepository())
- objectReader, err := s.catfileCache.ObjectReader(stream.Context(), repo)
- if err != nil {
- return err
- }
-
- objectInfoReader, err := s.catfileCache.ObjectInfoReader(stream.Context(), repo)
- if err != nil {
- return err
- }
-
revision := string(in.GetRevision())
path := string(in.GetPath())
- return sendTreeEntries(stream, objectReader, objectInfoReader, revision, path, in.Recursive, in.GetSort(), in.GetPaginationParams())
+ return s.sendTreeEntries(stream, repo, revision, path, in.Recursive, in.GetSort(), in.GetPaginationParams())
}
func paginateTreeEntries(entries []*gitalypb.TreeEntry, p *gitalypb.PaginationParameter) ([]*gitalypb.TreeEntry, string, error) {
diff --git a/internal/gitaly/service/commit/tree_entries_test.go b/internal/gitaly/service/commit/tree_entries_test.go
index 3e5213309..1c952d68c 100644
--- a/internal/gitaly/service/commit/tree_entries_test.go
+++ b/internal/gitaly/service/commit/tree_entries_test.go
@@ -1,6 +1,8 @@
package commit
import (
+ "context"
+ "errors"
"fmt"
"io"
"os"
@@ -11,6 +13,7 @@ import (
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v14/internal/helper/text"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
"google.golang.org/grpc/codes"
@@ -653,3 +656,77 @@ func drainTreeEntriesResponse(c gitalypb.CommitService_GetTreeEntriesClient) err
}
return err
}
+
+func BenchmarkGetTreeEntries(b *testing.B) {
+ testhelper.NewFeatureSets([]featureflag.FeatureFlag{
+ featureflag.TreeEntriesViaLsTree,
+ }).Bench(b, benchmarkGetTreeEntries)
+}
+
+func benchmarkGetTreeEntries(b *testing.B, ctx context.Context) {
+ cfg, client := setupCommitService(b)
+
+ repo, _ := gittest.CloneRepo(b, cfg, cfg.Storages[0], gittest.CloneRepoOpts{
+ SourceRepo: "benchmark.git",
+ })
+
+ for _, tc := range []struct {
+ desc string
+ request *gitalypb.GetTreeEntriesRequest
+ expectedEntries int
+ }{
+ {
+ desc: "recursive from root",
+ request: &gitalypb.GetTreeEntriesRequest{
+ Repository: repo,
+ Revision: []byte("9659e770b131abb4e01d74306819192cd553c258"),
+ Path: []byte("."),
+ Recursive: true,
+ },
+ expectedEntries: 61027,
+ },
+ {
+ desc: "non-recursive from root",
+ request: &gitalypb.GetTreeEntriesRequest{
+ Repository: repo,
+ Revision: []byte("9659e770b131abb4e01d74306819192cd553c258"),
+ Path: []byte("."),
+ Recursive: false,
+ },
+ expectedEntries: 101,
+ },
+ {
+ desc: "recursive from subdirectory",
+ request: &gitalypb.GetTreeEntriesRequest{
+ Repository: repo,
+ Revision: []byte("9659e770b131abb4e01d74306819192cd553c258"),
+ Path: []byte("lib/gitlab"),
+ Recursive: true,
+ },
+ expectedEntries: 2642,
+ },
+ } {
+ b.Run(tc.desc, func(b *testing.B) {
+ b.ReportAllocs()
+
+ for i := 0; i < b.N; i++ {
+ stream, err := client.GetTreeEntries(ctx, tc.request)
+ require.NoError(b, err)
+
+ entriesReceived := 0
+ for {
+ response, err := stream.Recv()
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ break
+ }
+ require.NoError(b, err)
+ }
+
+ entriesReceived += len(response.Entries)
+ }
+ require.Equal(b, tc.expectedEntries, entriesReceived)
+ }
+ })
+ }
+}
diff --git a/internal/gitaly/service/operations/submodules_test.go b/internal/gitaly/service/operations/submodules_test.go
index 8b2051043..c9dea3574 100644
--- a/internal/gitaly/service/operations/submodules_test.go
+++ b/internal/gitaly/service/operations/submodules_test.go
@@ -97,7 +97,7 @@ func TestSuccessfulUserUpdateSubmoduleRequest(t *testing.T) {
parsedEntry, err := parser.NextEntry()
require.NoError(t, err)
require.Equal(t, testCase.submodule, parsedEntry.Path)
- require.Equal(t, testCase.commitSha, parsedEntry.Oid)
+ require.Equal(t, testCase.commitSha, parsedEntry.ObjectID.String())
})
}
}
diff --git a/internal/metadata/featureflag/ff_tree_entries_via_ls_tree.go b/internal/metadata/featureflag/ff_tree_entries_via_ls_tree.go
new file mode 100644
index 000000000..8491e4c6a
--- /dev/null
+++ b/internal/metadata/featureflag/ff_tree_entries_via_ls_tree.go
@@ -0,0 +1,5 @@
+package featureflag
+
+// TreeEntriesViaLsTree switches over retrieval of tree entries from using git-cat-file(1) to
+// instead use git-ls-tree(1).
+var TreeEntriesViaLsTree = NewFeatureFlag("tree_entries_via_ls_tree", false)
diff --git a/internal/testhelper/featureset.go b/internal/testhelper/featureset.go
index 3d5214388..cb89a36af 100644
--- a/internal/testhelper/featureset.go
+++ b/internal/testhelper/featureset.go
@@ -104,3 +104,19 @@ func (s FeatureSets) Run(t *testing.T, test func(t *testing.T, ctx context.Conte
})
}
}
+
+// Bench executes the given benchmarking function for each of the FeatureSets. The passed in
+// context has the feature flags set accordingly.
+func (s FeatureSets) Bench(b *testing.B, test func(b *testing.B, ctx context.Context)) {
+ b.Helper()
+
+ for _, featureSet := range s {
+ b.Run(featureSet.Desc(), func(b *testing.B) {
+ ctx, cancel := Context()
+ defer cancel()
+ ctx = featureSet.Disable(ctx)
+
+ test(b, ctx)
+ })
+ }
+}