diff options
author | Andras Horvath <ahorvath@gitlab.com> | 2022-09-06 11:53:44 +0300 |
---|---|---|
committer | Andras Horvath <ahorvath@gitlab.com> | 2022-09-06 11:53:44 +0300 |
commit | 154b2c592dd248e7c8fe84da090e3238ea27b6ff (patch) | |
tree | d05027a19b42250a4a23787550401a7540b6e93a | |
parent | 866ca5a058f371dc3eb44f7fe24dfe3b635f18fc (diff) | |
parent | 6fad1e386afa640f58b729e7eea9bb4d3e7c2ce7 (diff) |
Merge branch 'jc-cat-file-repo-size-logging' into 'master'
Log only mode for new repository calculation methods
Closes #4447
See merge request gitlab-org/gitaly!4848
-rw-r--r-- | internal/git/gitpipe/catfile_info.go | 18 | ||||
-rw-r--r-- | internal/git/gitpipe/catfile_info_test.go | 59 | ||||
-rw-r--r-- | internal/gitaly/service/repository/size.go | 55 | ||||
-rw-r--r-- | internal/gitaly/service/repository/size_test.go | 74 | ||||
-rw-r--r-- | internal/metadata/featureflag/ff_use_new_calculation_for_repo_size.go | 10 |
5 files changed, 183 insertions, 33 deletions
diff --git a/internal/git/gitpipe/catfile_info.go b/internal/git/gitpipe/catfile_info.go index 3aec77cca..07e29525b 100644 --- a/internal/git/gitpipe/catfile_info.go +++ b/internal/git/gitpipe/catfile_info.go @@ -27,6 +27,7 @@ type CatfileInfoResult struct { type catfileInfoConfig struct { skipResult func(*catfile.ObjectInfo) bool + diskUsage bool } // CatfileInfoOption is an option for the CatfileInfo and CatfileInfoAllObjects pipeline steps. @@ -41,6 +42,14 @@ func WithSkipCatfileInfoResult(skipResult func(*catfile.ObjectInfo) bool) Catfil } } +// WithDiskUsageSize will cause the size of the object to be returned to be the +// size it takes up on disk. This value will override the existing size field. +func WithDiskUsageSize() CatfileInfoOption { + return func(cfg *catfileInfoConfig) { + cfg.diskUsage = true + } +} + type catfileInfoRequest struct { objectID git.ObjectID objectName []byte @@ -193,12 +202,19 @@ func CatfileInfoAllObjects( return } + batchCheckOption := git.Flag{Name: "--batch-check"} + + if cfg.diskUsage { + batchCheckOption.Name = batchCheckOption.Name + + fmt.Sprintf("=%%(objectname) %%(objecttype) %%(objectsize:disk)") + } + var stderr bytes.Buffer cmd, err := repo.Exec(ctx, git.SubCmd{ Name: "cat-file", Flags: []git.Option{ + batchCheckOption, git.Flag{Name: "--batch-all-objects"}, - git.Flag{Name: "--batch-check"}, git.Flag{Name: "--buffer"}, git.Flag{Name: "--unordered"}, }, diff --git a/internal/git/gitpipe/catfile_info_test.go b/internal/git/gitpipe/catfile_info_test.go index 94cf6a78d..317c93ba3 100644 --- a/internal/git/gitpipe/catfile_info_test.go +++ b/internal/git/gitpipe/catfile_info_test.go @@ -335,3 +335,62 @@ func TestCatfileInfoAllObjects(t *testing.T) { }, it.Result()) }) } + +func TestCatfileInfo_WithDiskUsageSize(t *testing.T) { + t.Parallel() + + cfg := testcfg.Build(t) + ctx := testhelper.Context(t) + + repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + tree1 := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + { + Path: "foobar", + Mode: "100644", + OID: gittest.WriteBlob(t, cfg, repoPath, bytes.Repeat([]byte("a"), 100)), + }, + }) + initialCommitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTree(tree1)) + + tree2 := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + { + Path: "foobar", + Mode: "100644", + // take advantage of compression + OID: gittest.WriteBlob(t, cfg, repoPath, append(bytes.Repeat([]byte("a"), 100), + '\n', + 'b', + )), + }, + }) + gittest.WriteCommit( + t, + cfg, + repoPath, + gittest.WithTree(tree2), + gittest.WithParents(initialCommitID), + gittest.WithBranch("master"), + ) + + gittest.Exec(t, cfg, "-C", repoPath, "gc") + + itWithoutDiskSize := CatfileInfoAllObjects(ctx, repo) + itWithDiskSize := CatfileInfoAllObjects(ctx, repo, WithDiskUsageSize()) + + var totalWithoutDiskSize, totalWithDiskSize int64 + for itWithoutDiskSize.Next() { + totalWithoutDiskSize += itWithoutDiskSize.Result().ObjectSize() + } + require.NoError(t, itWithoutDiskSize.Err()) + + for itWithDiskSize.Next() { + totalWithDiskSize += itWithDiskSize.Result().ObjectSize() + } + require.NoError(t, itWithDiskSize.Err()) + + require.Less(t, totalWithDiskSize, totalWithoutDiskSize) +} diff --git a/internal/gitaly/service/repository/size.go b/internal/gitaly/service/repository/size.go index 6b3002c15..df03e2300 100644 --- a/internal/gitaly/service/repository/size.go +++ b/internal/gitaly/service/repository/size.go @@ -21,30 +21,42 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" ) +// RepositorySize returns the size of the specified repository in kibibytes. +// By default, this calculation is performed using the disk usage command. +// +// Optionally the feature flags `revlist_for_repo_size` or `catfile_repo_size` +// can be enabled to log an alternative calculation of the repository size. +// The original size derived from disk usage is still returned. +// +// In conjunction with the other flags the `use_new_repository_size` feature +// flag can be enabled to return the alternative repository size calculation +// instead of the size derived from the disk usage command. func (s *server) RepositorySize(ctx context.Context, in *gitalypb.RepositorySizeRequest) (*gitalypb.RepositorySizeResponse, error) { repo := s.localrepo(in.GetRepository()) - var size int64 - var err error path, err := repo.Path() if err != nil { return nil, err } - duSize := getPathSize(ctx, path) + sizeKiB := getPathSize(ctx, path) + + logger := ctxlogrus.Extract(ctx).WithField("repo_size_du_bytes", sizeKiB*1024) + var newSizeBytes int64 if featureflag.RevlistForRepoSize.IsEnabled(ctx) { - size, err = calculateSizeWithRevlist(ctx, repo) + newSizeBytes, err = calculateSizeWithRevlist(ctx, repo) if err != nil { - return nil, fmt.Errorf("calculating repository size with git-rev-list: %w,", err) + return nil, fmt.Errorf("calculating repository size with git-rev-list: %w", err) } - ctxlogrus.Extract(ctx). - WithField("repo_size_revlist", size). - WithField("repo_size_du", duSize). - Info("repository size calculated") + logger.WithField("repo_size_revlist_bytes", newSizeBytes).Info("repository size calculated") + + if featureflag.UseNewRepoSize.IsEnabled(ctx) { + sizeKiB = newSizeBytes / 1024 + } } else if featureflag.CatfileRepoSize.IsEnabled(ctx) { - size, err = calculateSizeWithCatfile( + newSizeBytes, err = calculateSizeWithCatfile( ctx, repo, s.locator, @@ -57,15 +69,14 @@ func (s *server) RepositorySize(ctx context.Context, in *gitalypb.RepositorySize return nil, fmt.Errorf("calculating repository size with git-cat-file: %w", err) } - ctxlogrus.Extract(ctx). - WithField("repo_size_catfile", size). - WithField("repo_size_du", duSize). - Info("repository size calculated") - } else { - size = duSize + logger.WithField("repo_size_catfile_bytes", newSizeBytes).Info("repository size calculated") + + if featureflag.UseNewRepoSize.IsEnabled(ctx) { + sizeKiB = newSizeBytes / 1024 + } } - return &gitalypb.RepositorySizeResponse{Size: size}, nil + return &gitalypb.RepositorySizeResponse{Size: sizeKiB}, nil } // calculateSizeWithCatfile calculates the repository size using git-cat-file. @@ -88,6 +99,7 @@ func calculateSizeWithCatfile( catfileInfoIterator := gitpipe.CatfileInfoAllObjects( ctx, repo, + gitpipe.WithDiskUsageSize(), ) for catfileInfoIterator.Next() { @@ -111,6 +123,7 @@ func calculateSizeWithCatfile( catfileInfoIterator = gitpipe.CatfileInfoAllObjects( ctx, pool.Repo, + gitpipe.WithDiskUsageSize(), ) for catfileInfoIterator.Next() { @@ -123,8 +136,8 @@ func calculateSizeWithCatfile( } size -= poolSize - // return the size in kb - return size / 1024, nil + // return the size in bytes + return size, nil } func calculateSizeWithRevlist(ctx context.Context, repo *localrepo.Repo) (int64, error) { @@ -142,8 +155,8 @@ func calculateSizeWithRevlist(ctx context.Context, repo *localrepo.Repo) (int64, return 0, err } - // return the size in kb to remain consistent - return (size / 1024), nil + // return the size in bytes + return size, nil } func (s *server) GetObjectDirectorySize(ctx context.Context, in *gitalypb.GetObjectDirectorySizeRequest) (*gitalypb.GetObjectDirectorySizeResponse, error) { diff --git a/internal/gitaly/service/repository/size_test.go b/internal/gitaly/service/repository/size_test.go index 98f129bea..0abb67c41 100644 --- a/internal/gitaly/service/repository/size_test.go +++ b/internal/gitaly/service/repository/size_test.go @@ -7,6 +7,8 @@ import ( "context" "testing" + "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git" @@ -18,6 +20,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" + "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testserver" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" "google.golang.org/grpc/codes" "google.golang.org/protobuf/proto" @@ -33,6 +36,7 @@ func TestRepositorySize_SuccessfulRequest(t *testing.T) { featureSet := testhelper.NewFeatureSets( featureflag.RevlistForRepoSize, featureflag.CatfileRepoSize, + featureflag.UseNewRepoSize, ) featureSet.Run(t, testSuccessfulRepositorySizeRequest) @@ -97,8 +101,9 @@ func testSuccessfulRepositorySizeRequestPoolMember(t *testing.T, ctx context.Con response, err = repoClient.RepositorySize(ctx, sizeRequest) require.NoError(t, err) - if featureflag.RevlistForRepoSize.IsEnabled(ctx) || - featureflag.CatfileRepoSize.IsEnabled(ctx) { + if featureflag.UseNewRepoSize.IsEnabled(ctx) && + (featureflag.RevlistForRepoSize.IsEnabled(ctx) || + featureflag.CatfileRepoSize.IsEnabled(ctx)) { assert.Equal(t, int64(0), response.GetSize()) } else { assert.Less(t, response.GetSize(), sizeBeforePool) @@ -106,7 +111,8 @@ func testSuccessfulRepositorySizeRequestPoolMember(t *testing.T, ctx context.Con } func testSuccessfulRepositorySizeRequest(t *testing.T, ctx context.Context) { - cfg, repo, repoPath, client := setupRepositoryService(ctx, t) + logger, hook := test.NewNullLogger() + cfg, repo, repoPath, client := setupRepositoryService(ctx, t, testserver.WithLogger(logger)) request := &gitalypb.RepositorySizeRequest{Repository: repo} response, err := client.RepositorySize(ctx, request) @@ -136,14 +142,60 @@ func testSuccessfulRepositorySizeRequest(t *testing.T, ctx context.Context) { responseAfterRefs, err := client.RepositorySize(ctx, request) require.NoError(t, err) - if featureflag.RevlistForRepoSize.IsEnabled(ctx) { - assert.Equal( - t, - response.Size, - responseAfterRefs.Size, - "excluded refs do not contribute to the repository size", - ) - } else { + // We expect two log entries with "repository size calculated" + // since we called RepositorySize twice, but we are really interested + // in the results of the second log. + var entries []*logrus.Entry + switch { + case featureflag.RevlistForRepoSize.IsEnabled(ctx): + for _, entry := range hook.AllEntries() { + _, ok := entry.Data["repo_size_revlist_bytes"] + if ok { + entries = append(entries, entry) + } + } + + require.Len(t, entries, 2) + revlistSizeInLog, ok := entries[1].Data["repo_size_revlist_bytes"] + require.True(t, ok) + require.Equal(t, "repository size calculated", entries[1].Message) + + duSizeInLog, ok := entries[1].Data["repo_size_du_bytes"] + require.True(t, ok) + + require.Less(t, revlistSizeInLog, duSizeInLog) + if featureflag.UseNewRepoSize.IsEnabled(ctx) { + assert.Equal( + t, + response.Size, + responseAfterRefs.Size, + "excluded refs do not contribute to the repository size", + ) + } + case featureflag.CatfileRepoSize.IsEnabled(ctx): + for _, entry := range hook.AllEntries() { + _, ok := entry.Data["repo_size_catfile_bytes"] + if ok { + entries = append(entries, entry) + } + } + + require.Len(t, entries, 2) + catfileSizeInLog, ok := entries[1].Data["repo_size_catfile_bytes"] + require.True(t, ok) + duSizeInLog, ok := entries[1].Data["repo_size_du_bytes"] + require.True(t, ok) + + require.Equal(t, "repository size calculated", entries[1].Message) + + require.Less(t, catfileSizeInLog, duSizeInLog) + + if featureflag.UseNewRepoSize.IsEnabled(ctx) { + // Because we divide by 1024 to get kibibytes, small + // differences might not appear in the final size. + assert.LessOrEqual(t, response.Size, responseAfterRefs.Size) + } + default: assert.Less(t, response.Size, responseAfterRefs.Size) } } diff --git a/internal/metadata/featureflag/ff_use_new_calculation_for_repo_size.go b/internal/metadata/featureflag/ff_use_new_calculation_for_repo_size.go new file mode 100644 index 000000000..960e213c1 --- /dev/null +++ b/internal/metadata/featureflag/ff_use_new_calculation_for_repo_size.go @@ -0,0 +1,10 @@ +package featureflag + +// UseNewRepoSize will send the new repository size values in RepositorySize to +// the client. +var UseNewRepoSize = NewFeatureFlag( + "use_new_repository_size", + "v15.4.0", + "https://gitlab.com/gitlab-org/gitaly/-/issues/4448", + false, +) |