Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-06-21 11:37:29 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-06-21 15:16:12 +0300
commit67fcf0ffe920e6f7f17f0dbe00a0bdb12566b9f3 (patch)
treea7cab8b61ce5c1aa51c4681ae42a399c6cd05919
parentd0ce2a54d0a399d05691ba2b84f4f4203b0e4eb9 (diff)
localrepo: Speed up calculating size for repo with excluded alternatespks-repo-size-fix-alternate-refs-perf-regression
When calculating a repository's size, we optionally allow the caller to exclude the size of any object pools the repository is connected to. This causes us to add `--not --alternate-refs` to the git-rev-list(1) command, which will thus exclude all objects from disk usage calculation that are reachable by the alternate. As it turns out though, we're hitting a performance edge case: we ask git-rev-list(1) to use bitmaps to calculate the size, but in the case of a pooled repository only the object pool itself will have a bitmap. This means that by definition, the bitmap can only contain objects that we wish to exclude from the disk calculations anyway. All objects that are not reachable by the pool are thus known to not be contained in any bitmap. Because of this using bitmaps is extremely inefficient as shown by the following benchmark, which is performed in `gitlab-org/gitlab`: Benchmark 1: git rev-list --all --objects --disk-usage Time (mean ± σ): 13.290 s ± 0.085 s [User: 13.023 s, System: 0.255 s] Range (min … max): 13.160 s … 13.355 s 5 runs Benchmark 2: git rev-list --all --objects --disk-usage --use-bitmap-index Time (mean ± σ): 3.588 s ± 0.016 s [User: 3.326 s, System: 0.259 s] Range (min … max): 3.576 s … 3.616 s 5 runs Benchmark 3: git rev-list --not --alternate-refs --not --all --objects --disk-usage Time (mean ± σ): 6.828 s ± 0.056 s [User: 6.601 s, System: 0.363 s] Range (min … max): 6.761 s … 6.897 s 5 runs Benchmark 4: git rev-list --not --alternate-refs --not --all --objects --disk-usage --use-bitmap-index Time (mean ± σ): 68.105 s ± 0.383 s [User: 67.471 s, System: 0.744 s] Range (min … max): 67.663 s … 68.509 s 5 runs Summary 'git rev-list --all --objects --disk-usage --use-bitmap-index' ran 1.90 ± 0.02 times faster than 'git rev-list --not --alternate-refs --not --all --objects --disk-usage' 3.70 ± 0.03 times faster than 'git rev-list --all --objects --disk-usage' 18.98 ± 0.14 times faster than 'git rev-list --not --alternate-refs --not --all --objects --disk-usage --use-bitmap-index' As you can see in benchmark #1 and #2, bitmaps speed up disk usage calculations when not using alternate references. But the use of bitmaps severely degrades performance by almost a factor of 10 as soon as we use them in combination with `--alternate-refs` as shown in #4. On the other hand, when we disable the use of bitmaps with alternate refs we are only about twice as slow as compared to not iterating over alternate refs. Interestingly, we never hit this issue in production until recently. This is because of a configuration issue we have had in production: we unconditionally set `core.alternateRefsCommand=exit 0 #`, which causes us to skip over any alternate refs even when explicitly asking for them via `--alternate-refs`. This is definitely unintentional as it causes us to not honor the case where the client asks for shared objects to be excluded from the size calculations. With a recent change though we fixed this issue and started to correctly iterate over alterante refs again, but that resulted in a 20-fold increase in latency for the `RepositorySize()` RPC. So we're currently living in a world where `RepostiorySize()` is either broken, or where it has significant issues with performance. Mitigate the performance hit by not using bitmaps when the client asks tor alternate references to be excluded only in case the repository has an object pool. As shown by the benchmark, this should result in a 10x speedup compared to using bitmaps for repositories with many refs. Changelog: fixed
-rw-r--r--internal/git/localrepo/repo.go45
-rw-r--r--internal/git/localrepo/repo_test.go66
2 files changed, 87 insertions, 24 deletions
diff --git a/internal/git/localrepo/repo.go b/internal/git/localrepo/repo.go
index 14bf8ad10..6d6ac6da3 100644
--- a/internal/git/localrepo/repo.go
+++ b/internal/git/localrepo/repo.go
@@ -3,7 +3,10 @@ package localrepo
import (
"bytes"
"context"
+ "errors"
"fmt"
+ "io/fs"
+ "os"
"strconv"
"strings"
"testing"
@@ -129,7 +132,6 @@ func (repo *Repo) Size(ctx context.Context, opts ...RepoSizeOption) (int64, erro
var stdout bytes.Buffer
var cfg repoSizeConfig
-
for _, opt := range opts {
opt(&cfg)
}
@@ -143,17 +145,46 @@ func (repo *Repo) Size(ctx context.Context, opts ...RepoSizeOption) (int64, erro
}
if cfg.ExcludeAlternates {
- options = append(options,
- git.Flag{Name: "--not"},
- git.Flag{Name: "--alternate-refs"},
- git.Flag{Name: "--not"},
- )
+ alternatesPath, err := repo.InfoAlternatesPath()
+ if err != nil {
+ return 0, fmt.Errorf("getting alternates path: %w", err)
+ }
+
+ // when excluding alternatives we need to be careful with using the bitmap index. If
+ // the repository is indeed linked to an alternative object directory, then we know
+ // that only the linked-to object directory will have bitmaps. Consequentially, this
+ // bitmap will only ever cover objects that are part of the alternate repository and
+ // can thus by definition not contain any objects that are only part of the repo
+ // that is linking to it. Unfortunately, this case causes us to run into an edge
+ // case in Git where the command takes significantly longer to compute the disk size
+ // when using bitmaps compared to when not using bitmaps.
+ //
+ // To work around this case we thus don't use a bitmap index in case we find that
+ // the repository has an alternates file.
+ if _, err := os.Stat(alternatesPath); err != nil && errors.Is(err, fs.ErrNotExist) {
+ // The alternates file does not exist. We can thus use the bitmap index and
+ // don't have to specify `--not --alternate-refs` given that there aren't
+ // any anyway.
+ options = append(options, git.Flag{Name: "--use-bitmap-index"})
+ } else {
+ // We either have a bitmap index or we have run into any error that is not
+ // `fs.ErrNotExist`. In that case we don't use a bitmap index, but will
+ // exclude objects reachable from alternate refs.
+ options = append(options,
+ git.Flag{Name: "--not"},
+ git.Flag{Name: "--alternate-refs"},
+ git.Flag{Name: "--not"},
+ )
+ }
+ } else {
+ // If we don't exclude objects reachable from alternate refs we can always enable
+ // use of the bitmap index.
+ options = append(options, git.Flag{Name: "--use-bitmap-index"})
}
options = append(options,
git.Flag{Name: "--all"},
git.Flag{Name: "--objects"},
- git.Flag{Name: "--use-bitmap-index"},
git.Flag{Name: "--disk-usage"},
)
diff --git a/internal/git/localrepo/repo_test.go b/internal/git/localrepo/repo_test.go
index c513139bc..50eb94ff5 100644
--- a/internal/git/localrepo/repo_test.go
+++ b/internal/git/localrepo/repo_test.go
@@ -8,12 +8,12 @@ import (
"strings"
"testing"
- "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/helper/text"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
@@ -44,16 +44,28 @@ func TestRepo(t *testing.T) {
}
func TestSize(t *testing.T) {
+ t.Parallel()
+
cfg := testcfg.Build(t)
- gitCmdFactory := gittest.NewCommandFactory(t, cfg)
catfileCache := catfile.NewCache(cfg)
t.Cleanup(catfileCache.Stop)
+ ctx := testhelper.Context(t)
+
+ commandArgFile := filepath.Join(testhelper.TempDir(t), "args")
+ interceptingFactory := gittest.NewInterceptingCommandFactory(ctx, t, cfg, func(execEnv git.ExecutionEnvironment) string {
+ return fmt.Sprintf(`#!/bin/bash
+ echo "$@" >%q
+ exec %q "$@"
+ `, commandArgFile, execEnv.BinaryPath)
+ })
+
testCases := []struct {
- desc string
- setup func(t *testing.T) *gitalypb.Repository
- opts []RepoSizeOption
- expectedSize int64
+ desc string
+ setup func(t *testing.T) *gitalypb.Repository
+ opts []RepoSizeOption
+ expectedSize int64
+ expectedUseBitmap bool
}{
{
desc: "empty repository",
@@ -62,6 +74,7 @@ func TestSize(t *testing.T) {
repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0])
return repoProto
},
+ expectedUseBitmap: true,
},
{
desc: "referenced commit",
@@ -78,7 +91,8 @@ func TestSize(t *testing.T) {
return repoProto
},
- expectedSize: 203,
+ expectedSize: 203,
+ expectedUseBitmap: true,
},
{
desc: "unreferenced commit",
@@ -94,7 +108,8 @@ func TestSize(t *testing.T) {
return repoProto
},
- expectedSize: 0,
+ expectedSize: 0,
+ expectedUseBitmap: true,
},
{
desc: "modification to blob without repack",
@@ -119,7 +134,8 @@ func TestSize(t *testing.T) {
return repoProto
},
- expectedSize: 439,
+ expectedSize: 439,
+ expectedUseBitmap: true,
},
{
desc: "modification to blob after repack",
@@ -146,7 +162,8 @@ func TestSize(t *testing.T) {
return repoProto
},
- expectedSize: 398,
+ expectedSize: 398,
+ expectedUseBitmap: true,
},
{
desc: "excluded single ref",
@@ -174,7 +191,8 @@ func TestSize(t *testing.T) {
opts: []RepoSizeOption{
WithExcludeRefs("refs/heads/exclude-me"),
},
- expectedSize: 217,
+ expectedSize: 217,
+ expectedUseBitmap: true,
},
{
desc: "excluded everything",
@@ -194,7 +212,8 @@ func TestSize(t *testing.T) {
opts: []RepoSizeOption{
WithExcludeRefs("refs/heads/*"),
},
- expectedSize: 0,
+ expectedSize: 0,
+ expectedUseBitmap: true,
},
{
desc: "repo with alternate",
@@ -223,6 +242,9 @@ func TestSize(t *testing.T) {
// While both repositories have the same contents, we should still return
// the actual repository's size because we don't exclude the alternate.
expectedSize: 207,
+ // Even though we have an alternate, we should still use bitmap indices
+ // given that we don't use `--not --alternate-refs`.
+ expectedUseBitmap: true,
},
{
desc: "exclude alternate with identical conetnts",
@@ -253,7 +275,8 @@ func TestSize(t *testing.T) {
opts: []RepoSizeOption{
WithoutAlternates(),
},
- expectedSize: 0,
+ expectedSize: 0,
+ expectedUseBitmap: false,
},
{
desc: "exclude alternate with additional contents",
@@ -294,19 +317,28 @@ func TestSize(t *testing.T) {
opts: []RepoSizeOption{
WithoutAlternates(),
},
- expectedSize: 224,
+ expectedSize: 224,
+ expectedUseBitmap: false,
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
+ require.NoError(t, os.RemoveAll(commandArgFile))
+
repoProto := tc.setup(t)
- repo := New(config.NewLocator(cfg), gitCmdFactory, catfileCache, repoProto)
+ repo := New(config.NewLocator(cfg), interceptingFactory, catfileCache, repoProto)
- ctx := testhelper.Context(t)
size, err := repo.Size(ctx, tc.opts...)
require.NoError(t, err)
- assert.Equal(t, tc.expectedSize, size)
+ require.Equal(t, tc.expectedSize, size)
+
+ commandArgs := text.ChompBytes(testhelper.MustReadFile(t, commandArgFile))
+ if tc.expectedUseBitmap {
+ require.Contains(t, commandArgs, "--use-bitmap-index")
+ } else {
+ require.NotContains(t, commandArgs, "--use-bitmap-index")
+ }
})
}
}