diff options
author | karthik nayak <knayak@gitlab.com> | 2023-04-04 19:47:15 +0300 |
---|---|---|
committer | karthik nayak <knayak@gitlab.com> | 2023-04-04 19:47:15 +0300 |
commit | 3fd0240c8d286bab982fbb5b75dd34eb64e260b5 (patch) | |
tree | 4047c7dd33d2869e893b7762e458df96df2e383d | |
parent | d2ebb7f9a436d1476a2b750e844fe3525ae9dc8b (diff) | |
parent | 771fd6c519b60de29dff9fbef458572031cc636c (diff) |
Merge branch 'pks-localrepo-remove-size-calculations' into 'master'
localrepo: Remove function to calculate repository size
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5608
Merged-by: karthik nayak <knayak@gitlab.com>
Approved-by: karthik nayak <knayak@gitlab.com>
Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r-- | internal/git/localrepo/repo.go | 114 | ||||
-rw-r--r-- | internal/git/localrepo/repo_test.go | 328 |
2 files changed, 0 insertions, 442 deletions
diff --git a/internal/git/localrepo/repo.go b/internal/git/localrepo/repo.go index eca19b173..6892388e6 100644 --- a/internal/git/localrepo/repo.go +++ b/internal/git/localrepo/repo.go @@ -1,14 +1,9 @@ package localrepo import ( - "bytes" "context" - "errors" "fmt" - "io/fs" "os" - "strconv" - "strings" "sync" "testing" @@ -103,115 +98,6 @@ func errorWithStderr(err error, stderr []byte) error { return fmt.Errorf("%w, stderr: %q", err, stderr) } -// repoSizeConfig can be used to pass in different options to -// git rev-list in determining the size of a repository. -type repoSizeConfig struct { - // ExcludeRefs is a list of ref glob patterns to exclude from the size - // calculation. - ExcludeRefs []string - // ExcludeAlternates will exclude objects in the alternates directory - // from being counted towards the total size of the repository. - ExcludeAlternates bool -} - -// RepoSizeOption is an option which can be passed to Size -type RepoSizeOption func(*repoSizeConfig) - -// WithExcludeRefs is an option for Size that excludes certain refs from the size -// calculation. The format must be a glob pattern. -// see https://git-scm.com/docs/git-rev-list#Documentation/git-rev-list.txt---excludeltglob-patterngt -func WithExcludeRefs(excludeRefs ...string) RepoSizeOption { - return func(cfg *repoSizeConfig) { - cfg.ExcludeRefs = excludeRefs - } -} - -// WithoutAlternates will exclude any objects in the alternate objects directory -func WithoutAlternates() RepoSizeOption { - return func(cfg *repoSizeConfig) { - cfg.ExcludeAlternates = true - } -} - -// Size calculates the size of all reachable objects in bytes -func (repo *Repo) Size(ctx context.Context, opts ...RepoSizeOption) (int64, error) { - var stdout bytes.Buffer - - var cfg repoSizeConfig - for _, opt := range opts { - opt(&cfg) - } - - var options []git.Option - for _, refToExclude := range cfg.ExcludeRefs { - options = append( - options, - git.Flag{Name: fmt.Sprintf("--exclude=%s", refToExclude)}, - ) - } - - if cfg.ExcludeAlternates { - alternatesPath, err := repo.InfoAlternatesPath() - if err != nil { - return 0, fmt.Errorf("getting alternates path: %w", err) - } - - // when excluding alternatives we need to be careful with using the bitmap index. If - // the repository is indeed linked to an alternative object directory, then we know - // that only the linked-to object directory will have bitmaps. Consequentially, this - // bitmap will only ever cover objects that are part of the alternate repository and - // can thus by definition not contain any objects that are only part of the repo - // that is linking to it. Unfortunately, this case causes us to run into an edge - // case in Git where the command takes significantly longer to compute the disk size - // when using bitmaps compared to when not using bitmaps. - // - // To work around this case we thus don't use a bitmap index in case we find that - // the repository has an alternates file. - if _, err := os.Stat(alternatesPath); err != nil && errors.Is(err, fs.ErrNotExist) { - // The alternates file does not exist. We can thus use the bitmap index and - // don't have to specify `--not --alternate-refs` given that there aren't - // any anyway. - options = append(options, git.Flag{Name: "--use-bitmap-index"}) - } else { - // We either have a bitmap index or we have run into any error that is not - // `fs.ErrNotExist`. In that case we don't use a bitmap index, but will - // exclude objects reachable from alternate refs. - options = append(options, - git.Flag{Name: "--not"}, - git.Flag{Name: "--alternate-refs"}, - git.Flag{Name: "--not"}, - ) - } - } else { - // If we don't exclude objects reachable from alternate refs we can always enable - // use of the bitmap index. - options = append(options, git.Flag{Name: "--use-bitmap-index"}) - } - - options = append(options, - git.Flag{Name: "--all"}, - git.Flag{Name: "--objects"}, - git.Flag{Name: "--disk-usage"}, - ) - - if err := repo.ExecAndWait(ctx, - git.Command{ - Name: "rev-list", - Flags: options, - }, - git.WithStdout(&stdout), - ); err != nil { - return -1, err - } - - size, err := strconv.ParseInt(strings.TrimSuffix(stdout.String(), "\n"), 10, 64) - if err != nil { - return -1, err - } - - return size, nil -} - // StorageTempDir returns the temporary dir for the storage where the repo is on. // When this directory does not exist yet, it's being created. func (repo *Repo) StorageTempDir() (string, error) { diff --git a/internal/git/localrepo/repo_test.go b/internal/git/localrepo/repo_test.go index 87ebb5267..38f612799 100644 --- a/internal/git/localrepo/repo_test.go +++ b/internal/git/localrepo/repo_test.go @@ -3,9 +3,7 @@ package localrepo import ( "context" "fmt" - "os" "path/filepath" - "strings" "testing" "github.com/stretchr/testify/require" @@ -13,11 +11,8 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" - "gitlab.com/gitlab-org/gitaly/v15/internal/helper/perm" - "gitlab.com/gitlab-org/gitaly/v15/internal/helper/text" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" - "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" ) func TestRepo(t *testing.T) { @@ -37,329 +32,6 @@ func TestRepo(t *testing.T) { }) } -func TestSize(t *testing.T) { - t.Parallel() - - cfg := testcfg.Build(t) - catfileCache := catfile.NewCache(cfg) - t.Cleanup(catfileCache.Stop) - - ctx := testhelper.Context(t) - - commandArgFile := filepath.Join(testhelper.TempDir(t), "args") - interceptingFactory := gittest.NewInterceptingCommandFactory(t, ctx, cfg, func(execEnv git.ExecutionEnvironment) string { - return fmt.Sprintf(`#!/usr/bin/env bash - echo "$@" >%q - exec %q "$@" - `, commandArgFile, execEnv.BinaryPath) - }) - - hashDependentSize := func(sha1Size, sha256Size int64) int64 { - if gittest.ObjectHashIsSHA256() { - return sha256Size - } - return sha1Size - } - - testCases := []struct { - desc string - setup func(t *testing.T) *gitalypb.Repository - opts []RepoSizeOption - expectedSize int64 - expectedUseBitmap bool - }{ - { - desc: "empty repository", - expectedSize: 0, - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - return repoProto - }, - expectedUseBitmap: true, - }, - { - desc: "referenced commit", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - gittest.WriteCommit(t, cfg, repoPath, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "file", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - gittest.WithBranch("main"), - ) - - return repoProto - }, - expectedSize: hashDependentSize(203, 230), - expectedUseBitmap: true, - }, - { - desc: "unreferenced commit", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - gittest.WriteCommit(t, cfg, repoPath, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "file", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - ) - - return repoProto - }, - expectedSize: 0, - expectedUseBitmap: true, - }, - { - desc: "modification to blob without repack", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - rootCommitID := gittest.WriteCommit(t, cfg, repoPath, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "file", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - ) - - gittest.WriteCommit(t, cfg, repoPath, - gittest.WithParents(rootCommitID), - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "file", Mode: "100644", Content: strings.Repeat("a", 1001)}, - ), - gittest.WithMessage("modification"), - gittest.WithBranch("main"), - ) - - return repoProto - }, - expectedSize: hashDependentSize(439, 510), - expectedUseBitmap: true, - }, - { - desc: "modification to blob after repack", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - rootCommitID := gittest.WriteCommit(t, cfg, repoPath, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "file", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - ) - - gittest.WriteCommit(t, cfg, repoPath, - gittest.WithParents(rootCommitID), - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "file", Mode: "100644", Content: strings.Repeat("a", 1001)}, - ), - gittest.WithMessage("modification"), - gittest.WithBranch("main"), - ) - - gittest.Exec(t, cfg, "-C", repoPath, "repack", "-a", "-d") - - return repoProto - }, - expectedSize: hashDependentSize(398, 465), - expectedUseBitmap: true, - }, - { - desc: "excluded single ref", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - gittest.WriteCommit(t, cfg, repoPath, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "1kbblob", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - gittest.WithBranch("exclude-me"), - ) - - gittest.WriteCommit(t, cfg, repoPath, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "1kbblob", Mode: "100644", Content: strings.Repeat("x", 2000)}, - ), - gittest.WithBranch("include-me"), - ) - - return repoProto - }, - opts: []RepoSizeOption{ - WithExcludeRefs("refs/heads/exclude-me"), - }, - expectedSize: hashDependentSize(217, 245), - expectedUseBitmap: true, - }, - { - desc: "excluded everything", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - gittest.WriteCommit(t, cfg, repoPath, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "1kbblob", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - gittest.WithBranch("exclude-me"), - ) - - return repoProto - }, - opts: []RepoSizeOption{ - WithExcludeRefs("refs/heads/*"), - }, - expectedSize: 0, - expectedUseBitmap: true, - }, - { - desc: "repo with alternate", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - _, poolPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - require.NoError(t, os.WriteFile( - filepath.Join(repoPath, "objects", "info", "alternates"), - []byte(filepath.Join(poolPath, "objects")), - perm.PublicFile, - )) - - for _, path := range []string{repoPath, poolPath} { - gittest.WriteCommit(t, cfg, path, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "1kbblob", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - gittest.WithBranch("main"), - ) - } - - return repoProto - }, - // While both repositories have the same contents, we should still return - // the actual repository's size because we don't exclude the alternate. - expectedSize: hashDependentSize(207, 234), - // Even though we have an alternate, we should still use bitmap indices - // given that we don't use `--not --alternate-refs`. - expectedUseBitmap: true, - }, - { - desc: "exclude alternate with identical contents", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - _, poolPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - require.NoError(t, os.WriteFile( - filepath.Join(repoPath, "objects", "info", "alternates"), - []byte(filepath.Join(poolPath, "objects")), - perm.PublicFile, - )) - - // We write the same object into both repositories, so we should - // exclude it from our size calculations. - for _, path := range []string{repoPath, poolPath} { - gittest.WriteCommit(t, cfg, path, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "1kbblob", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - gittest.WithBranch("main"), - ) - } - - return repoProto - }, - opts: []RepoSizeOption{ - WithoutAlternates(), - }, - expectedSize: 0, - expectedUseBitmap: false, - }, - { - desc: "exclude alternate with additional contents", - setup: func(t *testing.T) *gitalypb.Repository { - repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - _, poolPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - require.NoError(t, os.WriteFile( - filepath.Join(repoPath, "objects", "info", "alternates"), - []byte(filepath.Join(poolPath, "objects")), - perm.PublicFile, - )) - - for i, path := range []string{repoPath, poolPath} { - // We first write one blob into the repo that is the same - // across both repositories. - rootCommitID := gittest.WriteCommit(t, cfg, path, - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "1kbblob", Mode: "100644", Content: strings.Repeat("a", 1000)}, - ), - ) - - // But this time we also write a second commit into each of - // the repositories that is not the same to simulate history - // that has diverged. - gittest.WriteCommit(t, cfg, path, - gittest.WithParents(rootCommitID), - gittest.WithTreeEntries( - gittest.TreeEntry{Path: "1kbblob", Mode: "100644", Content: fmt.Sprintf("%d", i)}, - ), - gittest.WithBranch("main"), - ) - } - - return repoProto - }, - opts: []RepoSizeOption{ - WithoutAlternates(), - }, - expectedSize: hashDependentSize(224, 268), - expectedUseBitmap: false, - }, - } - - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { - require.NoError(t, os.RemoveAll(commandArgFile)) - - repoProto := tc.setup(t) - repo := New(config.NewLocator(cfg), interceptingFactory, catfileCache, repoProto) - - size, err := repo.Size(ctx, tc.opts...) - require.NoError(t, err) - require.Equal(t, tc.expectedSize, size) - - commandArgs := text.ChompBytes(testhelper.MustReadFile(t, commandArgFile)) - if tc.expectedUseBitmap { - require.Contains(t, commandArgs, "--use-bitmap-index") - } else { - require.NotContains(t, commandArgs, "--use-bitmap-index") - } - }) - } -} - func TestRepo_StorageTempDir(t *testing.T) { ctx := testhelper.Context(t) cfg := testcfg.Build(t) |