Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2023-03-15 15:32:01 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2023-03-20 08:12:39 +0300
commitbd921065b363c21a70d3e96fabdb95c4964d59a9 (patch)
tree65e8de39f49ed369730f8579cf66a145692ad8e0
parent1d3d1eb3f1e32c0cb8356e9ffb0cdab984f1a1f5 (diff)
repository: Drop size calculations via git-cat-file(1)
We have three different ways to calculate the repository size: - The plain old way of using du(1) on the repository. This is fast, but also counts in repository metadata, unreachable objects and duplicate objects. - The implementation based on git-rev-list(1). It allows to only count in objects that are reachable via specific references and is overall the most flexible approach in how we calculate the size. The downside of this is that it is really slow compared to du(h). - The implementation based on git-cat-file(1). This has been added as a compromise between the other two options so that we can at least account for duplicate objects. It's faster than using git-rev-list(1), but still slower than du(1). While the latter two options have been implemented quite a while ago now, we still haven't managed to roll them out due to performance reasons. By now it is clear though that we can either choose between the correct and flexible approach, or the fast and dirty approach. Which means that in the long run, our target architecture is going to be the approach based on git-rev-list(1). This kind of leaves the third implementation based on git-cat-file(1) on the chopping block. It was implemented after we've seen how slow the git-rev-list(1) based approach is as a middle ground where we can at least count out duplicate objects while being reasonably fast. But it did not deliver on that promise as it still wasn't fast enough without also changing the overall architecture of how repository sizes are calculated. So this approach is disfavoured nowadays. Remove the git-cat-file(1) based implementation. We are not going to use it anyway. Changelog: removed
-rw-r--r--internal/gitaly/service/repository/size.go89
-rw-r--r--internal/gitaly/service/repository/size_test.go42
-rw-r--r--internal/metadata/featureflag/ff_catfile_for_repo_size.go10
3 files changed, 2 insertions, 139 deletions
diff --git a/internal/gitaly/service/repository/size.go b/internal/gitaly/service/repository/size.go
index 632563540..dfe1b2fdd 100644
--- a/internal/gitaly/service/repository/size.go
+++ b/internal/gitaly/service/repository/size.go
@@ -14,14 +14,8 @@ import (
"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
"gitlab.com/gitlab-org/gitaly/v15/internal/command"
"gitlab.com/gitlab-org/gitaly/v15/internal/git"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/gitpipe"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/objectpool"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service"
- "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/storage"
- "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction"
"gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/internal/structerr"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
@@ -61,9 +55,8 @@ func (s *server) RepositorySize(ctx context.Context, in *gitalypb.RepositorySize
logger := ctxlogrus.Extract(ctx).WithField("repo_size_du_bytes", sizeKiB*1024)
- var newSizeBytes int64
if featureflag.RevlistForRepoSize.IsEnabled(ctx) {
- newSizeBytes, err = calculateSizeWithRevlist(ctx, repo)
+ newSizeBytes, err := calculateSizeWithRevlist(ctx, repo)
if err != nil {
return nil, fmt.Errorf("calculating repository size with git-rev-list: %w", err)
}
@@ -73,91 +66,11 @@ func (s *server) RepositorySize(ctx context.Context, in *gitalypb.RepositorySize
if featureflag.UseNewRepoSize.IsEnabled(ctx) {
sizeKiB = newSizeBytes / 1024
}
- } else if featureflag.CatfileRepoSize.IsEnabled(ctx) {
- newSizeBytes, err = calculateSizeWithCatfile(
- ctx,
- repo,
- s.locator,
- s.gitCmdFactory,
- s.catfileCache,
- s.txManager,
- s.housekeepingManager,
- )
- if err != nil {
- return nil, fmt.Errorf("calculating repository size with git-cat-file: %w", err)
- }
-
- logger.WithField("repo_size_catfile_bytes", newSizeBytes).Info("repository size calculated")
-
- if featureflag.UseNewRepoSize.IsEnabled(ctx) {
- sizeKiB = newSizeBytes / 1024
- }
}
return &gitalypb.RepositorySizeResponse{Size: sizeKiB}, nil
}
-// calculateSizeWithCatfile calculates the repository size using git-cat-file.
-// In the case the repository belongs to a pool, it will subract the total
-// size of the pool repository objects from its total size. One limitation of
-// this approach is that we don't distinguish whether an object in the pool
-// repository belongs to the fork repository, so in fact we may end up with a
-// smaller total size and theoretically could go negative.
-func calculateSizeWithCatfile(
- ctx context.Context,
- repo *localrepo.Repo,
- locator storage.Locator,
- gitCmdFactory git.CommandFactory,
- catfileCache catfile.Cache,
- txManager transaction.Manager,
- housekeepingManager housekeeping.Manager,
-) (int64, error) {
- var size int64
-
- catfileInfoIterator := gitpipe.CatfileInfoAllObjects(
- ctx,
- repo,
- gitpipe.WithDiskUsageSize(),
- )
-
- for catfileInfoIterator.Next() {
- size += catfileInfoIterator.Result().ObjectSize()
- }
-
- if err := catfileInfoIterator.Err(); err != nil {
- return 0, err
- }
-
- var poolSize int64
-
- if pool, err := objectpool.FromRepo(
- locator,
- gitCmdFactory,
- catfileCache,
- txManager,
- housekeepingManager,
- repo,
- ); err == nil && pool != nil {
- catfileInfoIterator = gitpipe.CatfileInfoAllObjects(
- ctx,
- pool.Repo,
- gitpipe.WithDiskUsageSize(),
- )
-
- for catfileInfoIterator.Next() {
- poolSize += catfileInfoIterator.Result().ObjectSize()
- }
-
- if err := catfileInfoIterator.Err(); err != nil {
- return 0, err
- }
- }
-
- size -= poolSize
- // return the size in bytes
- return size, nil
-}
-
func calculateSizeWithRevlist(ctx context.Context, repo *localrepo.Repo) (int64, error) {
var excludes []string
for refPrefix := range git.InternalRefPrefixes {
diff --git a/internal/gitaly/service/repository/size_test.go b/internal/gitaly/service/repository/size_test.go
index 67bb0cd73..ae57e433d 100644
--- a/internal/gitaly/service/repository/size_test.go
+++ b/internal/gitaly/service/repository/size_test.go
@@ -36,7 +36,6 @@ func TestRepositorySize_SuccessfulRequest(t *testing.T) {
featureSet := testhelper.NewFeatureSets(
featureflag.RepositorySizeViaWalk,
featureflag.RevlistForRepoSize,
- featureflag.CatfileRepoSize,
featureflag.UseNewRepoSize,
)
@@ -95,9 +94,7 @@ func testSuccessfulRepositorySizeRequestPoolMember(t *testing.T, ctx context.Con
response, err = repoClient.RepositorySize(ctx, sizeRequest)
require.NoError(t, err)
- if featureflag.UseNewRepoSize.IsEnabled(ctx) &&
- (featureflag.RevlistForRepoSize.IsEnabled(ctx) ||
- featureflag.CatfileRepoSize.IsEnabled(ctx)) {
+ if featureflag.UseNewRepoSize.IsEnabled(ctx) && featureflag.RevlistForRepoSize.IsEnabled(ctx) {
assert.Equal(t, int64(0), response.GetSize())
} else {
assert.Less(t, response.GetSize(), sizeBeforePool)
@@ -169,29 +166,6 @@ func testSuccessfulRepositorySizeRequest(t *testing.T, ctx context.Context) {
"excluded refs do not contribute to the repository size",
)
}
- case featureflag.CatfileRepoSize.IsEnabled(ctx):
- for _, entry := range hook.AllEntries() {
- _, ok := entry.Data["repo_size_catfile_bytes"]
- if ok {
- entries = append(entries, entry)
- }
- }
-
- require.Len(t, entries, 2)
- catfileSizeInLog, ok := entries[1].Data["repo_size_catfile_bytes"]
- require.True(t, ok)
- duSizeInLog, ok := entries[1].Data["repo_size_du_bytes"]
- require.True(t, ok)
-
- require.Equal(t, "repository size calculated", entries[1].Message)
-
- require.Less(t, catfileSizeInLog, duSizeInLog)
-
- if featureflag.UseNewRepoSize.IsEnabled(ctx) {
- // Because we divide by 1024 to get kibibytes, small
- // differences might not appear in the final size.
- assert.LessOrEqual(t, response.Size, responseAfterRefs.Size)
- }
default:
assert.Less(t, response.Size, responseAfterRefs.Size)
}
@@ -243,7 +217,6 @@ func BenchmarkRepositorySize(b *testing.B) {
ctx := testhelper.Context(b)
ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RepositorySizeViaWalk, false)
ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RevlistForRepoSize, false)
- ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CatfileRepoSize, false)
return ctx
},
},
@@ -253,7 +226,6 @@ func BenchmarkRepositorySize(b *testing.B) {
ctx := testhelper.Context(b)
ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RepositorySizeViaWalk, true)
ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RevlistForRepoSize, false)
- ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CatfileRepoSize, false)
return ctx
},
},
@@ -264,18 +236,6 @@ func BenchmarkRepositorySize(b *testing.B) {
ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RepositorySizeViaWalk, false)
ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RevlistForRepoSize, true)
ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.UseNewRepoSize, true)
- ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CatfileRepoSize, false)
- return ctx
- },
- },
- {
- desc: "cat-file",
- setupContext: func(b *testing.B) context.Context {
- ctx := testhelper.Context(b)
- ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RepositorySizeViaWalk, false)
- ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.RevlistForRepoSize, false)
- ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.UseNewRepoSize, true)
- ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CatfileRepoSize, true)
return ctx
},
},
diff --git a/internal/metadata/featureflag/ff_catfile_for_repo_size.go b/internal/metadata/featureflag/ff_catfile_for_repo_size.go
deleted file mode 100644
index d3f409818..000000000
--- a/internal/metadata/featureflag/ff_catfile_for_repo_size.go
+++ /dev/null
@@ -1,10 +0,0 @@
-package featureflag
-
-// CatfileRepoSize will enable the rate limiter to reject requests beyond a configured
-// rate.
-var CatfileRepoSize = NewFeatureFlag(
- "catfile_repo_size",
- "v15.3.0",
- "https://gitlab.com/gitlab-org/gitaly/-/issues/4421",
- false,
-)