diff options
author | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2019-04-05 11:35:26 +0300 |
---|---|---|
committer | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2019-04-05 11:35:26 +0300 |
commit | 985f5bc963aa070ceb9f3de20825fd7367050173 (patch) | |
tree | 6e1135cee41c49746859938e30a8c1f33d9562db | |
parent | 49401ee6c8aea5decd94d83d0067242b0da442cf (diff) | |
parent | acea635498d15ecd3138bbf931a9757e70de612e (diff) |
Merge branch 'get-blobs-performance' into 'master'
Improve GetBlobs performance for fetching lots of files
See merge request gitlab-org/gitaly!1165
-rw-r--r-- | changelogs/unreleased/get-blobs-performance.yml | 5 | ||||
-rw-r--r-- | internal/service/blob/get_blobs.go | 4 | ||||
-rw-r--r-- | internal/service/commit/tree_entries_helper.go | 58 | ||||
-rw-r--r-- | internal/service/commit/tree_entry.go | 2 |
4 files changed, 50 insertions, 19 deletions
diff --git a/changelogs/unreleased/get-blobs-performance.yml b/changelogs/unreleased/get-blobs-performance.yml new file mode 100644 index 000000000..68e152952 --- /dev/null +++ b/changelogs/unreleased/get-blobs-performance.yml @@ -0,0 +1,5 @@ +--- +title: Improve GetBlobs performance for fetching lots of files +merge_request: 1165 +author: +type: performance diff --git a/internal/service/blob/get_blobs.go b/internal/service/blob/get_blobs.go index 4bb70ee5f..f0fad5a81 100644 --- a/internal/service/blob/get_blobs.go +++ b/internal/service/blob/get_blobs.go @@ -13,11 +13,13 @@ import ( ) func sendGetBlobsResponse(req *gitalypb.GetBlobsRequest, stream gitalypb.BlobService_GetBlobsServer, c *catfile.Batch) error { + tef := commit.NewTreeEntryFinder(c) + for _, revisionPath := range req.RevisionPaths { revision := revisionPath.Revision path := revisionPath.Path - treeEntry, err := commit.TreeEntryForRevisionAndPath(c, revision, string(path)) + treeEntry, err := tef.FindByRevisionAndPath(revision, string(path)) if err != nil { return err } diff --git a/internal/service/commit/tree_entries_helper.go b/internal/service/commit/tree_entries_helper.go index 369b1b2a1..c2fed8773 100644 --- a/internal/service/commit/tree_entries_helper.go +++ b/internal/service/commit/tree_entries_helper.go @@ -12,6 +12,47 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/git/catfile" ) +type revisionPath struct{ revision, path string } + +// TreeEntryFinder is a struct for searching through a tree with caching. +type TreeEntryFinder struct { + c *catfile.Batch + treeCache map[revisionPath][]*gitalypb.TreeEntry +} + +// NewTreeEntryFinder initializes a TreeEntryFinder with an empty tree cache. +func NewTreeEntryFinder(c *catfile.Batch) *TreeEntryFinder { + return &TreeEntryFinder{ + c: c, + treeCache: make(map[revisionPath][]*gitalypb.TreeEntry), + } +} + +// FindByRevisionAndPath returns a TreeEntry struct for the object present at the revision/path pair. +func (tef *TreeEntryFinder) FindByRevisionAndPath(revision, path string) (*gitalypb.TreeEntry, error) { + dir := pathPkg.Dir(path) + cacheKey := revisionPath{revision: revision, path: dir} + entries, ok := tef.treeCache[cacheKey] + + if !ok { + var err error + entries, err = treeEntries(tef.c, revision, dir, "", false) + if err != nil { + return nil, err + } + + tef.treeCache[cacheKey] = entries + } + + for _, entry := range entries { + if string(entry.Path) == path { + return entry, nil + } + } + + return nil, nil +} + const oidSize = 20 func extractEntryInfoFromTreeData(treeData *bytes.Buffer, commitOid, rootOid, rootPath string, treeInfo *catfile.ObjectInfo) ([]*gitalypb.TreeEntry, error) { @@ -122,20 +163,3 @@ func treeEntries(c *catfile.Batch, revision, path string, rootOid string, recurs return orderedEntries, nil } - -// TreeEntryForRevisionAndPath returns a TreeEntry struct for the object present at the revision/path pair. -func TreeEntryForRevisionAndPath(c *catfile.Batch, revision, path string) (*gitalypb.TreeEntry, error) { - entries, err := treeEntries(c, revision, pathPkg.Dir(path), "", false) - if err != nil { - return nil, err - } - - for _, entry := range entries { - if string(entry.Path) == path { - entry.RootOid = "" // Not sure why we do this - return entry, nil - } - } - - return nil, nil -} diff --git a/internal/service/commit/tree_entry.go b/internal/service/commit/tree_entry.go index 0b054803d..f00c4cf44 100644 --- a/internal/service/commit/tree_entry.go +++ b/internal/service/commit/tree_entry.go @@ -14,7 +14,7 @@ import ( ) func sendTreeEntry(stream gitalypb.CommitService_TreeEntryServer, c *catfile.Batch, revision, path string, limit int64) error { - treeEntry, err := TreeEntryForRevisionAndPath(c, revision, path) + treeEntry, err := NewTreeEntryFinder(c).FindByRevisionAndPath(revision, path) if err != nil { return err } |