diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-03-15 15:32:01 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-03-20 08:12:39 +0300 |
commit | bd921065b363c21a70d3e96fabdb95c4964d59a9 (patch) | |
tree | 65e8de39f49ed369730f8579cf66a145692ad8e0 | |
parent | 1d3d1eb3f1e32c0cb8356e9ffb0cdab984f1a1f5 (diff) |
repository: Drop size calculations via git-cat-file(1)
We have three different ways to calculate the repository size:
- The plain old way of using du(1) on the repository. This is fast,
but also counts in repository metadata, unreachable objects and
duplicate objects.
- The implementation based on git-rev-list(1). It allows to only
count in objects that are reachable via specific references and is
overall the most flexible approach in how we calculate the size.
The downside of this is that it is really slow compared to du(h).
- The implementation based on git-cat-file(1). This has been added
as a compromise between the other two options so that we can at
least account for duplicate objects. It's faster than using
git-rev-list(1), but still slower than du(1).
While the latter two options have been implemented quite a while ago
now, we still haven't managed to roll them out due to performance
reasons. By now it is clear though that we can either choose between
the correct and flexible approach, or the fast and dirty approach. Which
means that in the long run, our target architecture is going to be the
approach based on git-rev-list(1).
This kind of leaves the third implementation based on git-cat-file(1) on
the chopping block. It was implemented after we've seen how slow the
git-rev-list(1) based approach is as a middle ground where we can at
least count out duplicate objects while being reasonably fast. But it
did not deliver on that promise as it still wasn't fast enough without
also changing the overall architecture of how repository sizes are
calculated. So this approach is disfavoured nowadays.
Remove the git-cat-file(1) based implementation. We are not going to use
it anyway.
Changelog: removed
-rw-r--r-- | internal/gitaly/service/repository/size.go | 89 | ||||
-rw-r--r-- | internal/gitaly/service/repository/size_test.go | 42 | ||||
-rw-r--r-- | internal/metadata/featureflag/ff_catfile_for_repo_size.go | 10 |
3 files changed, 2 insertions, 139 deletions
diff --git a/internal/gitaly/service/repository/size.go b/internal/gitaly/service/repository/size.go index 632563540..dfe1b2fdd 100644 --- a/internal/gitaly/service/repository/size.go +++ b/internal/gitaly/service/repository/size.go @@ -14,14 +14,8 @@ import ( "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" "gitlab.com/gitlab-org/gitaly/v15/internal/command" "gitlab.com/gitlab-org/gitaly/v15/internal/git" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/gitpipe" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/objectpool" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service" - "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/storage" - "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction" "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" @@ -61,9 +55,8 @@ func (s *server) RepositorySize(ctx context.Context, in *gitalypb.RepositorySize logger := ctxlogrus.Extract(ctx).WithField("repo_size_du_bytes", sizeKiB*1024) - var newSizeBytes int64 if featureflag.RevlistForRepoSize.IsEnabled(ctx) { - newSizeBytes, err = calculateSizeWithRevlist(ctx, repo) + newSizeBytes, err := calculateSizeWithRevlist(ctx, repo) if err != nil { return nil, fmt.Errorf("calculating repository size with git-rev-list: %w", err) } @@ -73,91 +66,11 @@ func (s *server) RepositorySize(ctx context.Context, in *gitalypb.RepositorySize if featureflag.UseNewRepoSize.IsEnabled(ctx) { sizeKiB = newSizeBytes / 1024 } - } else if featureflag.CatfileRepoSize.IsEnabled(ctx) { - newSizeBytes, err = calculateSizeWithCatfile( - ctx, - repo, - s.locator, - s.gitCmdFactory, - s.catfileCache, - s.txManager, - s.housekeepingManager, - ) - if err != nil { - return nil, fmt.Errorf("calculating repository size with git-cat-file: %w", err) - } - - logger.WithField("repo_size_catfile_bytes", newSizeBytes).Info("repository size calculated") - - if featureflag.UseNewRepoSize.IsEnabled(ctx) { - sizeKiB = newSizeBytes / 1024 - } } return &gitalypb.RepositorySizeResponse{Size: sizeKiB}, nil } -// calculateSizeWithCatfile calculates the repository size using git-cat-file. -// In the case the repository belongs to a pool, it will subract the total -// size of the pool repository objects from its total size. One limitation of -// this approach is that we don't distinguish whether an object in the pool -// repository belongs to the fork repository, so in fact we may end up with a -// smaller total size and theoretically could go negative. -func calculateSizeWithCatfile( - ctx context.Context, - repo *localrepo.Repo, - locator storage.Locator, - gitCmdFactory git.CommandFactory, - catfileCache catfile.Cache, - txManager transaction.Manager, - housekeepingManager housekeeping.Manager, -) (int64, error) { - var size int64 - - catfileInfoIterator := gitpipe.CatfileInfoAllObjects( - ctx, - repo, - gitpipe.WithDiskUsageSize(), - ) - - for catfileInfoIterator.Next() { - size += catfileInfoIterator.Result().ObjectSize() - } - - if err := catfileInfoIterator.Err(); err != nil { - return 0, err - } - - var poolSize int64 - - if pool, err := objectpool.FromRepo( - locator, - gitCmdFactory, - catfileCache, - txManager, - housekeepingManager, - repo, - ); err == nil && pool != nil { - catfileInfoIterator = gitpipe.CatfileInfoAllObjects( - ctx, - pool.Repo, - gitpipe.WithDiskUsageSize(), - ) - - for catfileInfoIterator.Next() { - poolSize += catfileInfoIterator.Result().ObjectSize() - } - - if err := catfileInfoIterator.Err(); err != nil { - return 0, err - } - } - - size -= poolSize - // return the size in bytes - return size, nil -} - func calculateSizeWithRevlist(ctx context.Context, repo *localrepo.Repo) (int64, error) { var excludes []string for refPrefix := range git.InternalRefPrefixes { diff --git a/internal/gitaly/service/repository/size_test.go b/internal/gitaly/service/repository/size_test.go index 67bb0cd73..ae57e433d 100644 --- a/internal/gitaly/service/repository/size_test.go +++ b/internal/gitaly/service/repository/size_test.go @@ -36,7 +36,6 @@ func TestRepositorySize_SuccessfulRequest(t *testing.T) { featureSet := testhelper.NewFeatureSets( featureflag.RepositorySizeViaWalk, featureflag.RevlistForRepoSize, - featureflag.CatfileRepoSize, featureflag.UseNewRepoSize, ) @@ -95,9 +94,7 @@ func testSuccessfulRepositorySizeRequestPoolMember(t *testing.T, ctx context.Con response, err = repoClient.RepositorySize(ctx, sizeRequest) require.NoError(t, err) - if featureflag.UseNewRepoSize.IsEnabled(ctx) && - (featureflag.RevlistForRepoSize.IsEnabled(ctx) || - featureflag.CatfileRepoSize.IsEnabled(ctx)) { + if featureflag.UseNewRepoSize.IsEnabled(ctx) && featureflag.RevlistForRepoSize.IsEnabled(ctx) { assert.Equal(t, int64(0), response.GetSize()) } else { assert.Less(t, response.GetSize(), sizeBeforePool) @@ -169,29 +166,6 @@ func testSuccessfulRepositorySizeRequest(t *testing.T, ctx context.Context) { "excluded refs do not contribute to the repository size", ) } - case featureflag.CatfileRepoSize.IsEnabled(ctx): - for _, entry := range hook.AllEntries() { - _, ok := entry.Data["repo_size_catfile_bytes"] - if ok { - entries = append(entries, entry) - } - } - - require.Len(t, entries, 2) - catfileSizeInLog, ok := entries[1].Data["repo_size_catfile_bytes"] - require.True(t, ok) - duSizeInLog, ok := entries[1].Data["repo_size_du_bytes"] - require.True(t, ok) - - require.Equal(t, "repository size calculated", entries[1].Message) - - require.Less(t, catfileSizeInLog, duSizeInLog) - - if featureflag.UseNewRepoSize.IsEnabled(ctx) { - // Because we divide by 1024 to get kibibytes, small - // differences might not appear in the final size. - assert.LessOrEqual(t, response.Size, responseAfterRefs.Size) - } default: assert.Less(t, response.Size, responseAfterRefs.Size) } @@ -243,7 +217,6 @@ func BenchmarkRepositorySize(b *testing.B) { ctx := testhelper.Context(b) ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RepositorySizeViaWalk, false) ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RevlistForRepoSize, false) - ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CatfileRepoSize, false) return ctx }, }, @@ -253,7 +226,6 @@ func BenchmarkRepositorySize(b *testing.B) { ctx := testhelper.Context(b) ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RepositorySizeViaWalk, true) ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RevlistForRepoSize, false) - ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CatfileRepoSize, false) return ctx }, }, @@ -264,18 +236,6 @@ func BenchmarkRepositorySize(b *testing.B) { ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RepositorySizeViaWalk, false) ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RevlistForRepoSize, true) ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.UseNewRepoSize, true) - ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CatfileRepoSize, false) - return ctx - }, - }, - { - desc: "cat-file", - setupContext: func(b *testing.B) context.Context { - ctx := testhelper.Context(b) - ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RepositorySizeViaWalk, false) - ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RevlistForRepoSize, false) - ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.UseNewRepoSize, true) - ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CatfileRepoSize, true) return ctx }, }, diff --git a/internal/metadata/featureflag/ff_catfile_for_repo_size.go b/internal/metadata/featureflag/ff_catfile_for_repo_size.go deleted file mode 100644 index d3f409818..000000000 --- a/internal/metadata/featureflag/ff_catfile_for_repo_size.go +++ /dev/null @@ -1,10 +0,0 @@ -package featureflag - -// CatfileRepoSize will enable the rate limiter to reject requests beyond a configured -// rate. -var CatfileRepoSize = NewFeatureFlag( - "catfile_repo_size", - "v15.3.0", - "https://gitlab.com/gitlab-org/gitaly/-/issues/4421", - false, -) |