Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-02-11 17:07:37 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-02-16 09:59:59 +0300
commitceeed993e3c363dbc372a0ae0cff5ec98e6effe4 (patch)
tree4ccdb4e50a6e6a36dfdb0f2ab978a195c6c0bfa0
parent8f6670062530e3d5f0aba581bdb3c9ff9a543fa4 (diff)
repository: Move optimization of repositories into housekeeping package
We want to allow for OptimizeRepository to be called without invoking an RPC in order to allow us to call it in more contexts. Now that all its dependent parts are part of the housekeeping package this commit moves the logic of OptimizeRepository in there, too.
-rw-r--r--internal/git/gittest/repo.go7
-rw-r--r--internal/git/housekeeping/objects.go6
-rw-r--r--internal/git/housekeeping/optimize_repository.go391
-rw-r--r--internal/git/housekeeping/optimize_repository_test.go559
-rw-r--r--internal/gitaly/service/repository/optimize.go390
-rw-r--r--internal/gitaly/service/repository/optimize_test.go558
6 files changed, 963 insertions, 948 deletions
diff --git a/internal/git/gittest/repo.go b/internal/git/gittest/repo.go
index b250042c1..10167fb92 100644
--- a/internal/git/gittest/repo.go
+++ b/internal/git/gittest/repo.go
@@ -174,6 +174,8 @@ type InitRepoOpts struct {
// WithWorktree determines whether the resulting Git repository should have a worktree or
// not.
WithWorktree bool
+ // WithRelativePath determines the relative path of this repository.
+ WithRelativePath string
}
// InitRepo creates a new empty repository in the given storage. You can either pass no or exactly
@@ -186,7 +188,10 @@ func InitRepo(t testing.TB, cfg config.Cfg, storage config.Storage, opts ...Init
opt = opts[0]
}
- relativePath := NewRepositoryName(t, !opt.WithWorktree)
+ relativePath := opt.WithRelativePath
+ if relativePath == "" {
+ relativePath = NewRepositoryName(t, !opt.WithWorktree)
+ }
repoPath := filepath.Join(storage.Path, relativePath)
args := []string{"init"}
diff --git a/internal/git/housekeeping/objects.go b/internal/git/housekeeping/objects.go
index 3258dab76..afa52b291 100644
--- a/internal/git/housekeeping/objects.go
+++ b/internal/git/housekeeping/objects.go
@@ -11,6 +11,12 @@ import (
"gitlab.com/gitlab-org/gitaly/v14/internal/git/stats"
)
+const (
+ // looseObjectLimit is the limit of loose objects we accept both when doing incremental
+ // repacks and when pruning objects.
+ looseObjectLimit = 1024
+)
+
// RepackObjectsConfig is configuration for RepackObjects.
type RepackObjectsConfig struct {
// FullRepack determines whether all reachable objects should be repacked into a single
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go
new file mode 100644
index 000000000..7fdb76f8a
--- /dev/null
+++ b/internal/git/housekeeping/optimize_repository.go
@@ -0,0 +1,391 @@
+package housekeeping
+
+import (
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io/fs"
+ "math"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/stats"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/transaction"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/helper"
+)
+
+// OptimizeRepository performs optimizations on the repository. Whether optimizations are performed
+// or not depends on a set of heuristics.
+func OptimizeRepository(ctx context.Context, repo *localrepo.Repo, txManager transaction.Manager) error {
+ optimizations := struct {
+ PackedObjects bool `json:"packed_objects"`
+ PrunedObjects bool `json:"pruned_objects"`
+ PackedRefs bool `json:"packed_refs"`
+ }{}
+ defer func() {
+ ctxlogrus.Extract(ctx).WithField("optimizations", optimizations).Info("optimized repository")
+ }()
+
+ if err := Perform(ctx, repo, txManager); err != nil {
+ return fmt.Errorf("could not execute houskeeping: %w", err)
+ }
+
+ if err := CleanupWorktrees(ctx, repo); err != nil {
+ return fmt.Errorf("could not clean up worktrees: %w", err)
+ }
+
+ didRepack, err := repackIfNeeded(ctx, repo)
+ if err != nil {
+ return fmt.Errorf("could not repack: %w", err)
+ }
+ optimizations.PackedObjects = didRepack
+
+ didPrune, err := pruneIfNeeded(ctx, repo)
+ if err != nil {
+ return fmt.Errorf("could not prune: %w", err)
+ }
+ optimizations.PrunedObjects = didPrune
+
+ didPackRefs, err := packRefsIfNeeded(ctx, repo)
+ if err != nil {
+ return fmt.Errorf("could not pack refs: %w", err)
+ }
+ optimizations.PackedRefs = didPackRefs
+
+ return nil
+}
+
+// repackIfNeeded uses a set of heuristics to determine whether the repository needs a
+// full repack and, if so, repacks it.
+func repackIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
+ repackNeeded, cfg, err := needsRepacking(repo)
+ if err != nil {
+ return false, fmt.Errorf("determining whether repo needs repack: %w", err)
+ }
+
+ if !repackNeeded {
+ return false, nil
+ }
+
+ if err := RepackObjects(ctx, repo, cfg); err != nil {
+ return false, err
+ }
+
+ return true, nil
+}
+
+func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) {
+ repoPath, err := repo.Path()
+ if err != nil {
+ return false, RepackObjectsConfig{}, fmt.Errorf("getting repository path: %w", err)
+ }
+
+ altFile, err := repo.InfoAlternatesPath()
+ if err != nil {
+ return false, RepackObjectsConfig{}, helper.ErrInternal(err)
+ }
+
+ hasAlternate := true
+ if _, err := os.Stat(altFile); os.IsNotExist(err) {
+ hasAlternate = false
+ }
+
+ hasBitmap, err := stats.HasBitmap(repoPath)
+ if err != nil {
+ return false, RepackObjectsConfig{}, fmt.Errorf("checking for bitmap: %w", err)
+ }
+
+ // Bitmaps are used to efficiently determine transitive reachability of objects from a
+ // set of commits. They are an essential part of the puzzle required to serve fetches
+ // efficiently, as we'd otherwise need to traverse the object graph every time to find
+ // which objects we have to send. We thus repack the repository with bitmaps enabled in
+ // case they're missing.
+ //
+ // There is one exception: repositories which are connected to an object pool must not have
+ // a bitmap on their own. We do not yet use multi-pack indices, and in that case Git can
+ // only use one bitmap. We already generate this bitmap in the pool, so member of it
+ // shouldn't have another bitmap on their own.
+ if !hasBitmap && !hasAlternate {
+ return true, RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: true,
+ }, nil
+ }
+
+ missingBloomFilters, err := stats.IsMissingBloomFilters(repoPath)
+ if err != nil {
+ return false, RepackObjectsConfig{}, fmt.Errorf("checking for bloom filters: %w", err)
+ }
+
+ // Bloom filters are part of the commit-graph and allow us to efficiently determine which
+ // paths have been modified in a given commit without having to look into the object
+ // database. In the past we didn't compute bloom filters at all, so we want to rewrite the
+ // whole commit-graph to generate them.
+ //
+ // Note that we'll eventually want to move out commit-graph generation from repacking. When
+ // that happens we should update the commit-graph either if it's missing, when bloom filters
+ // are missing or when packfiles have been updated.
+ if missingBloomFilters {
+ return true, RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: !hasAlternate,
+ }, nil
+ }
+
+ largestPackfileSize, packfileCount, err := packfileSizeAndCount(repo)
+ if err != nil {
+ return false, RepackObjectsConfig{}, fmt.Errorf("checking largest packfile size: %w", err)
+ }
+
+ // Whenever we do an incremental repack we create a new packfile, and as a result Git may
+ // have to look into every one of the packfiles to find objects. This is less efficient the
+ // more packfiles we have, but we cannot repack the whole repository every time either given
+ // that this may take a lot of time.
+ //
+ // Instead, we determine whether the repository has "too many" packfiles. "Too many" is
+ // relative though: for small repositories it's fine to do full repacks regularly, but for
+ // large repositories we need to be more careful. We thus use a heuristic of "repository
+ // largeness": we take the biggest packfile that exists, and then the maximum allowed number
+ // of packfiles is `log(largestpackfile_size_in_mb) / log(1.3)`. This gives the following
+ // allowed number of packfiles:
+ //
+ // - No packfile: 5 packfile. This is a special case.
+ // - 10MB packfile: 8 packfiles.
+ // - 100MB packfile: 17 packfiles.
+ // - 500MB packfile: 23 packfiles.
+ // - 1GB packfile: 26 packfiles.
+ // - 5GB packfile: 32 packfiles.
+ // - 10GB packfile: 35 packfiles.
+ // - 100GB packfile: 43 packfiles.
+ //
+ // The goal is to have a comparatively quick ramp-up of allowed packfiles as the repository
+ // size grows, but then slow down such that we're effectively capped and don't end up with
+ // an excessive amount of packfiles.
+ //
+ // This is a heuristic and thus imperfect by necessity. We may tune it as we gain experience
+ // with the way it behaves.
+ if int64(math.Max(5, math.Log(float64(largestPackfileSize))/math.Log(1.3))) < packfileCount {
+ return true, RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: !hasAlternate,
+ }, nil
+ }
+
+ looseObjectCount, err := estimateLooseObjectCount(repo)
+ if err != nil {
+ return false, RepackObjectsConfig{}, fmt.Errorf("estimating loose object count: %w", err)
+ }
+
+ // Most Git commands do not write packfiles directly, but instead write loose objects into
+ // the object database. So while we now know that there ain't too many packfiles, we still
+ // need to check whether we have too many objects.
+ //
+ // In this case it doesn't make a lot of sense to scale incremental repacks with the repo's
+ // size: we only pack loose objects, so the time to pack them doesn't scale with repository
+ // size but with the number of loose objects we have. git-gc(1) uses a threshold of 6700
+ // loose objects to start an incremental repack, but one needs to keep in mind that Git
+ // typically has defaults which are better suited for the client-side instead of the
+ // server-side in most commands.
+ //
+ // In our case we typically want to ensure that our repositories are much better packed than
+ // it is necessary on the client side. We thus take a much stricter limit of 1024 objects.
+ if looseObjectCount > looseObjectLimit {
+ return true, RepackObjectsConfig{
+ FullRepack: false,
+ WriteBitmap: false,
+ }, nil
+ }
+
+ return false, RepackObjectsConfig{}, nil
+}
+
+func packfileSizeAndCount(repo *localrepo.Repo) (int64, int64, error) {
+ repoPath, err := repo.Path()
+ if err != nil {
+ return 0, 0, fmt.Errorf("getting repository path: %w", err)
+ }
+
+ entries, err := os.ReadDir(filepath.Join(repoPath, "objects/pack"))
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ return 0, 0, nil
+ }
+
+ return 0, 0, err
+ }
+
+ largestSize := int64(0)
+ count := int64(0)
+
+ for _, entry := range entries {
+ if !strings.HasSuffix(entry.Name(), ".pack") {
+ continue
+ }
+
+ entryInfo, err := entry.Info()
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ continue
+ }
+
+ return 0, 0, fmt.Errorf("getting packfile info: %w", err)
+ }
+
+ if entryInfo.Size() > largestSize {
+ largestSize = entryInfo.Size()
+ }
+
+ count++
+ }
+
+ return largestSize / 1024 / 1024, count, nil
+}
+
+// estimateLooseObjectCount estimates the number of loose objects in the repository. Due to the
+// object name being derived via a cryptographic hash we know that in the general case, objects are
+// evenly distributed across their sharding directories. We can thus estimate the number of loose
+// objects by opening a single sharding directory and counting its entries.
+func estimateLooseObjectCount(repo *localrepo.Repo) (int64, error) {
+ repoPath, err := repo.Path()
+ if err != nil {
+ return 0, fmt.Errorf("getting repository path: %w", err)
+ }
+
+ // We use the same sharding directory as git-gc(1) does for its estimation.
+ entries, err := os.ReadDir(filepath.Join(repoPath, "objects/17"))
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ return 0, nil
+ }
+
+ return 0, fmt.Errorf("reading loose object shard: %w", err)
+ }
+
+ looseObjects := int64(0)
+ for _, entry := range entries {
+ if strings.LastIndexAny(entry.Name(), "0123456789abcdef") != len(entry.Name())-1 {
+ continue
+ }
+
+ looseObjects++
+ }
+
+ // Scale up found loose objects by the number of sharding directories.
+ return looseObjects * 256, nil
+}
+
+// pruneIfNeeded removes objects from the repository which are either unreachable or which are
+// already part of a packfile. We use a grace period of two weeks.
+func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
+ // Pool repositories must never prune any objects, or otherwise we may corrupt members of
+ // that pool if they still refer to that object.
+ if strings.HasPrefix(repo.GetRelativePath(), "@pools") {
+ return false, nil
+ }
+
+ looseObjectCount, err := estimateLooseObjectCount(repo)
+ if err != nil {
+ return false, fmt.Errorf("estimating loose object count: %w", err)
+ }
+
+ // We again use the same limit here as we do when doing an incremental repack. This is done
+ // intentionally: if we determine that there's too many loose objects and try to repack, but
+ // all of those loose objects are in fact unreachable, then we'd still have the same number
+ // of unreachable objects after the incremental repack. We'd thus try to repack every single
+ // time.
+ //
+ // Using the same limit here doesn't quite fix this case: the unreachable objects would only
+ // be pruned after a grace period of two weeks. But at least we know that we will eventually
+ // prune up those unreachable objects, at which point we won't try to do another incremental
+ // repack.
+ if looseObjectCount <= looseObjectLimit {
+ return false, nil
+ }
+
+ if err := repo.ExecAndWait(ctx, git.SubCmd{
+ Name: "prune",
+ }); err != nil {
+ return false, fmt.Errorf("pruning objects: %w", err)
+ }
+
+ return true, nil
+}
+
+func packRefsIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
+ repoPath, err := repo.Path()
+ if err != nil {
+ return false, fmt.Errorf("getting repository path: %w", err)
+ }
+ refsPath := filepath.Join(repoPath, "refs")
+
+ looseRefs := int64(0)
+ if err := filepath.WalkDir(refsPath, func(path string, entry fs.DirEntry, err error) error {
+ if err != nil {
+ return err
+ }
+
+ if !entry.IsDir() {
+ looseRefs++
+ }
+
+ return nil
+ }); err != nil {
+ return false, fmt.Errorf("counting loose refs: %w", err)
+ }
+
+ // If there aren't any loose refs then there is nothing we need to do.
+ if looseRefs == 0 {
+ return false, nil
+ }
+
+ packedRefsSize := int64(0)
+ if stat, err := os.Stat(filepath.Join(repoPath, "packed-refs")); err != nil {
+ if !errors.Is(err, os.ErrNotExist) {
+ return false, fmt.Errorf("getting packed-refs size: %w", err)
+ }
+ } else {
+ packedRefsSize = stat.Size()
+ }
+
+ // Packing loose references into the packed-refs file scales with the number of references
+ // we're about to write. We thus decide whether we repack refs by weighing the current size
+ // of the packed-refs file against the number of loose references. This is done such that we
+ // do not repack too often on repositories with a huge number of references, where we can
+ // expect a lot of churn in the number of references.
+ //
+ // As a heuristic, we repack if the number of loose references in the repository exceeds
+ // `log(packed_refs_size_in_bytes/100)/log(1.15)`, which scales as following (number of refs
+ // is estimated with 100 bytes per reference):
+ //
+ // - 1kB ~ 10 packed refs: 16 refs
+ // - 10kB ~ 100 packed refs: 33 refs
+ // - 100kB ~ 1k packed refs: 49 refs
+ // - 1MB ~ 10k packed refs: 66 refs
+ // - 10MB ~ 100k packed refs: 82 refs
+ // - 100MB ~ 1m packed refs: 99 refs
+ //
+ // We thus allow roughly 16 additional loose refs per factor of ten of packed refs.
+ //
+ // This heuristic may likely need tweaking in the future, but should serve as a good first
+ // iteration.
+ if int64(math.Max(16, math.Log(float64(packedRefsSize)/100)/math.Log(1.15))) > looseRefs {
+ return false, nil
+ }
+
+ var stderr bytes.Buffer
+ if err := repo.ExecAndWait(ctx, git.SubCmd{
+ Name: "pack-refs",
+ Flags: []git.Option{
+ git.Flag{Name: "--all"},
+ },
+ }, git.WithStderr(&stderr)); err != nil {
+ return false, fmt.Errorf("packing refs: %w, stderr: %q", err, stderr.String())
+ }
+
+ return true, nil
+}
diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go
new file mode 100644
index 000000000..9182f8880
--- /dev/null
+++ b/internal/git/housekeeping/optimize_repository_test.go
@@ -0,0 +1,559 @@
+package housekeeping
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/stats"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg"
+ "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
+)
+
+type infiniteReader struct{}
+
+func (r infiniteReader) Read(b []byte) (int, error) {
+ for i := range b {
+ b[i] = '\000'
+ }
+ return len(b), nil
+}
+
+func TestNeedsRepacking(t *testing.T) {
+ t.Parallel()
+
+ cfg := testcfg.Build(t)
+
+ for _, tc := range []struct {
+ desc string
+ setup func(t *testing.T) *gitalypb.Repository
+ expectedErr error
+ expectedNeeded bool
+ expectedConfig RepackObjectsConfig
+ }{
+ {
+ desc: "empty repo",
+ setup: func(t *testing.T) *gitalypb.Repository {
+ repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ return repoProto
+ },
+ // This is a bug: if the repo is empty then we wouldn't ever generate a
+ // packfile, but we determine a repack is needed because it's missing a
+ // bitmap. It's a rather benign bug though: git-repack(1) will exit
+ // immediately because it knows that there's nothing to repack.
+ expectedNeeded: true,
+ expectedConfig: RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: true,
+ },
+ },
+ {
+ desc: "missing bitmap",
+ setup: func(t *testing.T) *gitalypb.Repository {
+ repoProto, _ := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ return repoProto
+ },
+ expectedNeeded: true,
+ expectedConfig: RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: true,
+ },
+ },
+ {
+ desc: "missing bitmap with alternate",
+ setup: func(t *testing.T) *gitalypb.Repository {
+ repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+
+ // Create the alternates file. If it exists, then we shouldn't try
+ // to generate a bitmap.
+ require.NoError(t, os.WriteFile(filepath.Join(repoPath, "objects", "info", "alternates"), nil, 0o755))
+
+ return repoProto
+ },
+ expectedNeeded: true,
+ expectedConfig: RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: false,
+ },
+ },
+ {
+ desc: "missing commit-graph",
+ setup: func(t *testing.T) *gitalypb.Repository {
+ repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "--write-bitmap-index")
+
+ return repoProto
+ },
+ expectedNeeded: true,
+ expectedConfig: RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: true,
+ },
+ },
+ {
+ desc: "commit-graph without bloom filters",
+ setup: func(t *testing.T) *gitalypb.Repository {
+ repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "--write-bitmap-index")
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write")
+
+ return repoProto
+ },
+ expectedNeeded: true,
+ expectedConfig: RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: true,
+ },
+ },
+ {
+ desc: "no repack needed",
+ setup: func(t *testing.T) *gitalypb.Repository {
+ repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "--write-bitmap-index")
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--changed-paths", "--split")
+
+ return repoProto
+ },
+ expectedNeeded: false,
+ },
+ } {
+ tc := tc
+ t.Run(tc.desc, func(t *testing.T) {
+ t.Parallel()
+
+ repoProto := tc.setup(t)
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ repackNeeded, repackCfg, err := needsRepacking(repo)
+ require.Equal(t, tc.expectedErr, err)
+ require.Equal(t, tc.expectedNeeded, repackNeeded)
+ require.Equal(t, tc.expectedConfig, repackCfg)
+ })
+ }
+
+ const megaByte = 1024 * 1024
+
+ for _, tc := range []struct {
+ packfileSize int64
+ requiredPackfiles int
+ }{
+ {
+ packfileSize: 1,
+ requiredPackfiles: 5,
+ },
+ {
+ packfileSize: 5 * megaByte,
+ requiredPackfiles: 6,
+ },
+ {
+ packfileSize: 10 * megaByte,
+ requiredPackfiles: 8,
+ },
+ {
+ packfileSize: 50 * megaByte,
+ requiredPackfiles: 14,
+ },
+ {
+ packfileSize: 100 * megaByte,
+ requiredPackfiles: 17,
+ },
+ {
+ packfileSize: 500 * megaByte,
+ requiredPackfiles: 23,
+ },
+ {
+ packfileSize: 1000 * megaByte,
+ requiredPackfiles: 26,
+ },
+ // Let's not go any further than this, we're thrashing the temporary directory.
+ } {
+ t.Run(fmt.Sprintf("packfile with %d bytes", tc.packfileSize), func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+ packDir := filepath.Join(repoPath, "objects", "pack")
+
+ // Emulate the existence of a bitmap and a commit-graph with bloom filters.
+ // We explicitly don't want to generate them via Git commands as they would
+ // require us to already have objects in the repository, and we want to be
+ // in full control over all objects and packfiles in the repo.
+ require.NoError(t, os.WriteFile(filepath.Join(packDir, "something.bitmap"), nil, 0o644))
+ commitGraphChainPath := filepath.Join(repoPath, stats.CommitGraphChainRelPath)
+ require.NoError(t, os.MkdirAll(filepath.Dir(commitGraphChainPath), 0o755))
+ require.NoError(t, os.WriteFile(commitGraphChainPath, nil, 0o644))
+
+ // We first create a single big packfile which is used to determine the
+ // boundary of when we repack.
+ bigPackfile, err := os.OpenFile(filepath.Join(packDir, "big.pack"), os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644)
+ require.NoError(t, err)
+ defer testhelper.MustClose(t, bigPackfile)
+ _, err = io.Copy(bigPackfile, io.LimitReader(infiniteReader{}, tc.packfileSize))
+ require.NoError(t, err)
+
+ // And then we create one less packfile than we need to hit the boundary.
+ // This is done to assert that we indeed don't repack before hitting the
+ // boundary.
+ for i := 0; i < tc.requiredPackfiles-1; i++ {
+ additionalPackfile, err := os.Create(filepath.Join(packDir, fmt.Sprintf("%d.pack", i)))
+ require.NoError(t, err)
+ testhelper.MustClose(t, additionalPackfile)
+ }
+
+ repackNeeded, _, err := needsRepacking(repo)
+ require.NoError(t, err)
+ require.False(t, repackNeeded)
+
+ // Now we create the additional packfile that causes us to hit the boundary.
+ // We should thus see that we want to repack now.
+ lastPackfile, err := os.Create(filepath.Join(packDir, "last.pack"))
+ require.NoError(t, err)
+ testhelper.MustClose(t, lastPackfile)
+
+ repackNeeded, repackCfg, err := needsRepacking(repo)
+ require.NoError(t, err)
+ require.True(t, repackNeeded)
+ require.Equal(t, RepackObjectsConfig{
+ FullRepack: true,
+ WriteBitmap: true,
+ }, repackCfg)
+ })
+ }
+
+ for _, tc := range []struct {
+ desc string
+ looseObjects []string
+ expectedRepack bool
+ }{
+ {
+ desc: "no objects",
+ looseObjects: nil,
+ expectedRepack: false,
+ },
+ {
+ desc: "object not in 17 shard",
+ looseObjects: []string{
+ filepath.Join("ab/12345"),
+ },
+ expectedRepack: false,
+ },
+ {
+ desc: "object in 17 shard",
+ looseObjects: []string{
+ filepath.Join("17/12345"),
+ },
+ expectedRepack: false,
+ },
+ {
+ desc: "objects in different shards",
+ looseObjects: []string{
+ filepath.Join("ab/12345"),
+ filepath.Join("cd/12345"),
+ filepath.Join("12/12345"),
+ filepath.Join("17/12345"),
+ },
+ expectedRepack: false,
+ },
+ {
+ desc: "boundary",
+ looseObjects: []string{
+ filepath.Join("17/1"),
+ filepath.Join("17/2"),
+ filepath.Join("17/3"),
+ filepath.Join("17/4"),
+ },
+ expectedRepack: false,
+ },
+ {
+ desc: "exceeding boundary should cause repack",
+ looseObjects: []string{
+ filepath.Join("17/1"),
+ filepath.Join("17/2"),
+ filepath.Join("17/3"),
+ filepath.Join("17/4"),
+ filepath.Join("17/5"),
+ },
+ expectedRepack: true,
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ // Emulate the existence of a bitmap and a commit-graph with bloom filters.
+ // We explicitly don't want to generate them via Git commands as they would
+ // require us to already have objects in the repository, and we want to be
+ // in full control over all objects and packfiles in the repo.
+ require.NoError(t, os.WriteFile(filepath.Join(repoPath, "objects", "pack", "something.bitmap"), nil, 0o644))
+ commitGraphChainPath := filepath.Join(repoPath, stats.CommitGraphChainRelPath)
+ require.NoError(t, os.MkdirAll(filepath.Dir(commitGraphChainPath), 0o755))
+ require.NoError(t, os.WriteFile(commitGraphChainPath, nil, 0o644))
+
+ for _, looseObjectPath := range tc.looseObjects {
+ looseObjectPath := filepath.Join(repoPath, "objects", looseObjectPath)
+ require.NoError(t, os.MkdirAll(filepath.Dir(looseObjectPath), 0o755))
+
+ looseObjectFile, err := os.Create(looseObjectPath)
+ require.NoError(t, err)
+ testhelper.MustClose(t, looseObjectFile)
+ }
+
+ repackNeeded, repackCfg, err := needsRepacking(repo)
+ require.NoError(t, err)
+ require.Equal(t, tc.expectedRepack, repackNeeded)
+ if tc.expectedRepack {
+ require.Equal(t, RepackObjectsConfig{
+ FullRepack: false,
+ WriteBitmap: false,
+ }, repackCfg)
+ }
+ })
+ }
+}
+
+func TestPruneIfNeeded(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ for _, tc := range []struct {
+ desc string
+ isPool bool
+ looseObjects []string
+ expectedPrune bool
+ }{
+ {
+ desc: "no objects",
+ looseObjects: nil,
+ expectedPrune: false,
+ },
+ {
+ desc: "object not in 17 shard",
+ looseObjects: []string{
+ filepath.Join("ab/12345"),
+ },
+ expectedPrune: false,
+ },
+ {
+ desc: "object in 17 shard",
+ looseObjects: []string{
+ filepath.Join("17/12345"),
+ },
+ expectedPrune: false,
+ },
+ {
+ desc: "objects in different shards",
+ looseObjects: []string{
+ filepath.Join("ab/12345"),
+ filepath.Join("cd/12345"),
+ filepath.Join("12/12345"),
+ filepath.Join("17/12345"),
+ },
+ expectedPrune: false,
+ },
+ {
+ desc: "boundary",
+ looseObjects: []string{
+ filepath.Join("17/1"),
+ filepath.Join("17/2"),
+ filepath.Join("17/3"),
+ filepath.Join("17/4"),
+ },
+ expectedPrune: false,
+ },
+ {
+ desc: "exceeding boundary should cause repack",
+ looseObjects: []string{
+ filepath.Join("17/1"),
+ filepath.Join("17/2"),
+ filepath.Join("17/3"),
+ filepath.Join("17/4"),
+ filepath.Join("17/5"),
+ },
+ expectedPrune: true,
+ },
+ {
+ desc: "exceeding boundary on pool",
+ isPool: true,
+ looseObjects: []string{
+ filepath.Join("17/1"),
+ filepath.Join("17/2"),
+ filepath.Join("17/3"),
+ filepath.Join("17/4"),
+ filepath.Join("17/5"),
+ },
+ expectedPrune: false,
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ relativePath := gittest.NewRepositoryName(t, true)
+ if tc.isPool {
+ relativePath = gittest.NewObjectPoolName(t)
+ }
+
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0], gittest.InitRepoOpts{
+ WithRelativePath: relativePath,
+ })
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ for _, looseObjectPath := range tc.looseObjects {
+ looseObjectPath := filepath.Join(repoPath, "objects", looseObjectPath)
+ require.NoError(t, os.MkdirAll(filepath.Dir(looseObjectPath), 0o755))
+
+ looseObjectFile, err := os.Create(looseObjectPath)
+ require.NoError(t, err)
+ testhelper.MustClose(t, looseObjectFile)
+ }
+
+ didPrune, err := pruneIfNeeded(ctx, repo)
+ require.Equal(t, tc.expectedPrune, didPrune)
+ require.NoError(t, err)
+ })
+ }
+}
+
+func TestPackRefsIfNeeded(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ const kiloByte = 1024
+
+ for _, tc := range []struct {
+ packedRefsSize int64
+ requiredRefs int
+ }{
+ {
+ packedRefsSize: 1,
+ requiredRefs: 16,
+ },
+ {
+ packedRefsSize: 1 * kiloByte,
+ requiredRefs: 16,
+ },
+ {
+ packedRefsSize: 10 * kiloByte,
+ requiredRefs: 33,
+ },
+ {
+ packedRefsSize: 100 * kiloByte,
+ requiredRefs: 49,
+ },
+ {
+ packedRefsSize: 1000 * kiloByte,
+ requiredRefs: 66,
+ },
+ {
+ packedRefsSize: 10000 * kiloByte,
+ requiredRefs: 82,
+ },
+ {
+ packedRefsSize: 100000 * kiloByte,
+ requiredRefs: 99,
+ },
+ } {
+ t.Run(fmt.Sprintf("packed-refs with %d bytes", tc.packedRefsSize), func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ // Write an empty commit such that we can create valid refs.
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents())
+ looseRefContent := []byte(commitID.String() + "\n")
+
+ // We first create a single big packfile which is used to determine the
+ // boundary of when we repack. We need to write a valid packed-refs file or
+ // otherwise git-pack-refs(1) would choke later on, so we just write the
+ // file such that every line is a separate ref of exactly 128 bytes in
+ // length (a divisor of 1024), referring to the commit we created above.
+ packedRefs, err := os.OpenFile(filepath.Join(repoPath, "packed-refs"), os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644)
+ require.NoError(t, err)
+ defer testhelper.MustClose(t, packedRefs)
+ for i := int64(0); i < tc.packedRefsSize/128; i++ {
+ packedRefLine := fmt.Sprintf("%s refs/something/this-line-is-padded-to-exactly-128-bytes-%030d\n", commitID.String(), i)
+ require.Len(t, packedRefLine, 128)
+ _, err := packedRefs.WriteString(packedRefLine)
+ require.NoError(t, err)
+ }
+ require.NoError(t, packedRefs.Sync())
+
+ // And then we create one less loose ref than we need to hit the boundary.
+ // This is done to assert that we indeed don't repack before hitting the
+ // boundary.
+ for i := 0; i < tc.requiredRefs-1; i++ {
+ looseRefPath := filepath.Join(repoPath, "refs", "heads", fmt.Sprintf("branch-%d", i))
+ require.NoError(t, os.WriteFile(looseRefPath, looseRefContent, 0o644))
+ }
+
+ didRepack, err := packRefsIfNeeded(ctx, repo)
+ require.NoError(t, err)
+ require.False(t, didRepack)
+
+ // Now we create the additional loose ref that causes us to hit the
+ // boundary. We should thus see that we want to repack now.
+ looseRefPath := filepath.Join(repoPath, "refs", "heads", "last-branch")
+ require.NoError(t, os.WriteFile(looseRefPath, looseRefContent, 0o644))
+
+ didRepack, err = packRefsIfNeeded(ctx, repo)
+ require.NoError(t, err)
+ require.True(t, didRepack)
+ })
+ }
+}
+
+func TestEstimateLooseObjectCount(t *testing.T) {
+ t.Parallel()
+
+ cfg := testcfg.Build(t)
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ t.Run("empty repository", func(t *testing.T) {
+ looseObjects, err := estimateLooseObjectCount(repo)
+ require.NoError(t, err)
+ require.Zero(t, looseObjects)
+ })
+
+ t.Run("object in different shard", func(t *testing.T) {
+ differentShard := filepath.Join(repoPath, "objects", "a0")
+ require.NoError(t, os.MkdirAll(differentShard, 0o755))
+
+ object, err := os.Create(filepath.Join(differentShard, "123456"))
+ require.NoError(t, err)
+ testhelper.MustClose(t, object)
+
+ looseObjects, err := estimateLooseObjectCount(repo)
+ require.NoError(t, err)
+ require.Zero(t, looseObjects)
+ })
+
+ t.Run("object in estimation shard", func(t *testing.T) {
+ estimationShard := filepath.Join(repoPath, "objects", "17")
+ require.NoError(t, os.MkdirAll(estimationShard, 0o755))
+
+ object, err := os.Create(filepath.Join(estimationShard, "123456"))
+ require.NoError(t, err)
+ testhelper.MustClose(t, object)
+
+ looseObjects, err := estimateLooseObjectCount(repo)
+ require.NoError(t, err)
+ require.Equal(t, int64(256), looseObjects)
+
+ // Create a second object in there.
+ object, err = os.Create(filepath.Join(estimationShard, "654321"))
+ require.NoError(t, err)
+ testhelper.MustClose(t, object)
+
+ looseObjects, err = estimateLooseObjectCount(repo)
+ require.NoError(t, err)
+ require.Equal(t, int64(512), looseObjects)
+ })
+}
diff --git a/internal/gitaly/service/repository/optimize.go b/internal/gitaly/service/repository/optimize.go
index 64707b6f9..cea96b6de 100644
--- a/internal/gitaly/service/repository/optimize.go
+++ b/internal/gitaly/service/repository/optimize.go
@@ -1,32 +1,13 @@
package repository
import (
- "bytes"
"context"
- "errors"
- "fmt"
- "io/fs"
- "math"
- "os"
- "path/filepath"
- "strings"
- "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
- "gitlab.com/gitlab-org/gitaly/v14/internal/git"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/housekeeping"
- "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
- "gitlab.com/gitlab-org/gitaly/v14/internal/git/stats"
- "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/transaction"
"gitlab.com/gitlab-org/gitaly/v14/internal/helper"
"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
)
-const (
- // looseObjectLimit is the limit of loose objects we accept both when doing incremental
- // repacks and when pruning objects.
- looseObjectLimit = 1024
-)
-
func (s *server) OptimizeRepository(ctx context.Context, in *gitalypb.OptimizeRepositoryRequest) (*gitalypb.OptimizeRepositoryResponse, error) {
if err := s.validateOptimizeRepositoryRequest(in); err != nil {
return nil, err
@@ -34,7 +15,7 @@ func (s *server) OptimizeRepository(ctx context.Context, in *gitalypb.OptimizeRe
repo := s.localrepo(in.GetRepository())
- if err := optimizeRepository(ctx, repo, s.txManager); err != nil {
+ if err := housekeeping.OptimizeRepository(ctx, repo, s.txManager); err != nil {
return nil, helper.ErrInternal(err)
}
@@ -53,372 +34,3 @@ func (s *server) validateOptimizeRepositoryRequest(in *gitalypb.OptimizeReposito
return nil
}
-
-func optimizeRepository(ctx context.Context, repo *localrepo.Repo, txManager transaction.Manager) error {
- optimizations := struct {
- PackedObjects bool `json:"packed_objects"`
- PrunedObjects bool `json:"pruned_objects"`
- PackedRefs bool `json:"packed_refs"`
- }{}
- defer func() {
- ctxlogrus.Extract(ctx).WithField("optimizations", optimizations).Info("optimized repository")
- }()
-
- if err := housekeeping.Perform(ctx, repo, txManager); err != nil {
- return fmt.Errorf("could not execute houskeeping: %w", err)
- }
-
- if err := housekeeping.CleanupWorktrees(ctx, repo); err != nil {
- return fmt.Errorf("could not clean up worktrees: %w", err)
- }
-
- didRepack, err := repackIfNeeded(ctx, repo)
- if err != nil {
- return fmt.Errorf("could not repack: %w", err)
- }
- optimizations.PackedObjects = didRepack
-
- didPrune, err := pruneIfNeeded(ctx, repo)
- if err != nil {
- return fmt.Errorf("could not prune: %w", err)
- }
- optimizations.PrunedObjects = didPrune
-
- didPackRefs, err := packRefsIfNeeded(ctx, repo)
- if err != nil {
- return fmt.Errorf("could not pack refs: %w", err)
- }
- optimizations.PackedRefs = didPackRefs
-
- return nil
-}
-
-// repackIfNeeded uses a set of heuristics to determine whether the repository needs a
-// full repack and, if so, repacks it.
-func repackIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
- repackNeeded, cfg, err := needsRepacking(repo)
- if err != nil {
- return false, fmt.Errorf("determining whether repo needs repack: %w", err)
- }
-
- if !repackNeeded {
- return false, nil
- }
-
- if err := housekeeping.RepackObjects(ctx, repo, cfg); err != nil {
- return false, err
- }
-
- return true, nil
-}
-
-func needsRepacking(repo *localrepo.Repo) (bool, housekeeping.RepackObjectsConfig, error) {
- repoPath, err := repo.Path()
- if err != nil {
- return false, housekeeping.RepackObjectsConfig{}, fmt.Errorf("getting repository path: %w", err)
- }
-
- altFile, err := repo.InfoAlternatesPath()
- if err != nil {
- return false, housekeeping.RepackObjectsConfig{}, helper.ErrInternal(err)
- }
-
- hasAlternate := true
- if _, err := os.Stat(altFile); os.IsNotExist(err) {
- hasAlternate = false
- }
-
- hasBitmap, err := stats.HasBitmap(repoPath)
- if err != nil {
- return false, housekeeping.RepackObjectsConfig{}, fmt.Errorf("checking for bitmap: %w", err)
- }
-
- // Bitmaps are used to efficiently determine transitive reachability of objects from a
- // set of commits. They are an essential part of the puzzle required to serve fetches
- // efficiently, as we'd otherwise need to traverse the object graph every time to find
- // which objects we have to send. We thus repack the repository with bitmaps enabled in
- // case they're missing.
- //
- // There is one exception: repositories which are connected to an object pool must not have
- // a bitmap on their own. We do not yet use multi-pack indices, and in that case Git can
- // only use one bitmap. We already generate this bitmap in the pool, so member of it
- // shouldn't have another bitmap on their own.
- if !hasBitmap && !hasAlternate {
- return true, housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- }, nil
- }
-
- missingBloomFilters, err := stats.IsMissingBloomFilters(repoPath)
- if err != nil {
- return false, housekeeping.RepackObjectsConfig{}, fmt.Errorf("checking for bloom filters: %w", err)
- }
-
- // Bloom filters are part of the commit-graph and allow us to efficiently determine which
- // paths have been modified in a given commit without having to look into the object
- // database. In the past we didn't compute bloom filters at all, so we want to rewrite the
- // whole commit-graph to generate them.
- //
- // Note that we'll eventually want to move out commit-graph generation from repacking. When
- // that happens we should update the commit-graph either if it's missing, when bloom filters
- // are missing or when packfiles have been updated.
- if missingBloomFilters {
- return true, housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: !hasAlternate,
- }, nil
- }
-
- largestPackfileSize, packfileCount, err := packfileSizeAndCount(repo)
- if err != nil {
- return false, housekeeping.RepackObjectsConfig{}, fmt.Errorf("checking largest packfile size: %w", err)
- }
-
- // Whenever we do an incremental repack we create a new packfile, and as a result Git may
- // have to look into every one of the packfiles to find objects. This is less efficient the
- // more packfiles we have, but we cannot repack the whole repository every time either given
- // that this may take a lot of time.
- //
- // Instead, we determine whether the repository has "too many" packfiles. "Too many" is
- // relative though: for small repositories it's fine to do full repacks regularly, but for
- // large repositories we need to be more careful. We thus use a heuristic of "repository
- // largeness": we take the biggest packfile that exists, and then the maximum allowed number
- // of packfiles is `log(largestpackfile_size_in_mb) / log(1.3)`. This gives the following
- // allowed number of packfiles:
- //
- // - No packfile: 5 packfile. This is a special case.
- // - 10MB packfile: 8 packfiles.
- // - 100MB packfile: 17 packfiles.
- // - 500MB packfile: 23 packfiles.
- // - 1GB packfile: 26 packfiles.
- // - 5GB packfile: 32 packfiles.
- // - 10GB packfile: 35 packfiles.
- // - 100GB packfile: 43 packfiles.
- //
- // The goal is to have a comparatively quick ramp-up of allowed packfiles as the repository
- // size grows, but then slow down such that we're effectively capped and don't end up with
- // an excessive amount of packfiles.
- //
- // This is a heuristic and thus imperfect by necessity. We may tune it as we gain experience
- // with the way it behaves.
- if int64(math.Max(5, math.Log(float64(largestPackfileSize))/math.Log(1.3))) < packfileCount {
- return true, housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: !hasAlternate,
- }, nil
- }
-
- looseObjectCount, err := estimateLooseObjectCount(repo)
- if err != nil {
- return false, housekeeping.RepackObjectsConfig{}, fmt.Errorf("estimating loose object count: %w", err)
- }
-
- // Most Git commands do not write packfiles directly, but instead write loose objects into
- // the object database. So while we now know that there ain't too many packfiles, we still
- // need to check whether we have too many objects.
- //
- // In this case it doesn't make a lot of sense to scale incremental repacks with the repo's
- // size: we only pack loose objects, so the time to pack them doesn't scale with repository
- // size but with the number of loose objects we have. git-gc(1) uses a threshold of 6700
- // loose objects to start an incremental repack, but one needs to keep in mind that Git
- // typically has defaults which are better suited for the client-side instead of the
- // server-side in most commands.
- //
- // In our case we typically want to ensure that our repositories are much better packed than
- // it is necessary on the client side. We thus take a much stricter limit of 1024 objects.
- if looseObjectCount > looseObjectLimit {
- return true, housekeeping.RepackObjectsConfig{
- FullRepack: false,
- WriteBitmap: false,
- }, nil
- }
-
- return false, housekeeping.RepackObjectsConfig{}, nil
-}
-
-func packfileSizeAndCount(repo *localrepo.Repo) (int64, int64, error) {
- repoPath, err := repo.Path()
- if err != nil {
- return 0, 0, fmt.Errorf("getting repository path: %w", err)
- }
-
- entries, err := os.ReadDir(filepath.Join(repoPath, "objects/pack"))
- if err != nil {
- if errors.Is(err, os.ErrNotExist) {
- return 0, 0, nil
- }
-
- return 0, 0, err
- }
-
- largestSize := int64(0)
- count := int64(0)
-
- for _, entry := range entries {
- if !strings.HasSuffix(entry.Name(), ".pack") {
- continue
- }
-
- entryInfo, err := entry.Info()
- if err != nil {
- if errors.Is(err, os.ErrNotExist) {
- continue
- }
-
- return 0, 0, fmt.Errorf("getting packfile info: %w", err)
- }
-
- if entryInfo.Size() > largestSize {
- largestSize = entryInfo.Size()
- }
-
- count++
- }
-
- return largestSize / 1024 / 1024, count, nil
-}
-
-// estimateLooseObjectCount estimates the number of loose objects in the repository. Due to the
-// object name being derived via a cryptographic hash we know that in the general case, objects are
-// evenly distributed across their sharding directories. We can thus estimate the number of loose
-// objects by opening a single sharding directory and counting its entries.
-func estimateLooseObjectCount(repo *localrepo.Repo) (int64, error) {
- repoPath, err := repo.Path()
- if err != nil {
- return 0, fmt.Errorf("getting repository path: %w", err)
- }
-
- // We use the same sharding directory as git-gc(1) does for its estimation.
- entries, err := os.ReadDir(filepath.Join(repoPath, "objects/17"))
- if err != nil {
- if errors.Is(err, os.ErrNotExist) {
- return 0, nil
- }
-
- return 0, fmt.Errorf("reading loose object shard: %w", err)
- }
-
- looseObjects := int64(0)
- for _, entry := range entries {
- if strings.LastIndexAny(entry.Name(), "0123456789abcdef") != len(entry.Name())-1 {
- continue
- }
-
- looseObjects++
- }
-
- // Scale up found loose objects by the number of sharding directories.
- return looseObjects * 256, nil
-}
-
-// pruneIfNeeded removes objects from the repository which are either unreachable or which are
-// already part of a packfile. We use a grace period of two weeks.
-func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
- // Pool repositories must never prune any objects, or otherwise we may corrupt members of
- // that pool if they still refer to that object.
- if strings.HasPrefix(repo.GetRelativePath(), "@pools") {
- return false, nil
- }
-
- looseObjectCount, err := estimateLooseObjectCount(repo)
- if err != nil {
- return false, fmt.Errorf("estimating loose object count: %w", err)
- }
-
- // We again use the same limit here as we do when doing an incremental repack. This is done
- // intentionally: if we determine that there's too many loose objects and try to repack, but
- // all of those loose objects are in fact unreachable, then we'd still have the same number
- // of unreachable objects after the incremental repack. We'd thus try to repack every single
- // time.
- //
- // Using the same limit here doesn't quite fix this case: the unreachable objects would only
- // be pruned after a grace period of two weeks. But at least we know that we will eventually
- // prune up those unreachable objects, at which point we won't try to do another incremental
- // repack.
- if looseObjectCount <= looseObjectLimit {
- return false, nil
- }
-
- if err := repo.ExecAndWait(ctx, git.SubCmd{
- Name: "prune",
- }); err != nil {
- return false, fmt.Errorf("pruning objects: %w", err)
- }
-
- return true, nil
-}
-
-func packRefsIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
- repoPath, err := repo.Path()
- if err != nil {
- return false, fmt.Errorf("getting repository path: %w", err)
- }
- refsPath := filepath.Join(repoPath, "refs")
-
- looseRefs := int64(0)
- if err := filepath.WalkDir(refsPath, func(path string, entry fs.DirEntry, err error) error {
- if err != nil {
- return err
- }
-
- if !entry.IsDir() {
- looseRefs++
- }
-
- return nil
- }); err != nil {
- return false, fmt.Errorf("counting loose refs: %w", err)
- }
-
- // If there aren't any loose refs then there is nothing we need to do.
- if looseRefs == 0 {
- return false, nil
- }
-
- packedRefsSize := int64(0)
- if stat, err := os.Stat(filepath.Join(repoPath, "packed-refs")); err != nil {
- if !errors.Is(err, os.ErrNotExist) {
- return false, fmt.Errorf("getting packed-refs size: %w", err)
- }
- } else {
- packedRefsSize = stat.Size()
- }
-
- // Packing loose references into the packed-refs file scales with the number of references
- // we're about to write. We thus decide whether we repack refs by weighing the current size
- // of the packed-refs file against the number of loose references. This is done such that we
- // do not repack too often on repositories with a huge number of references, where we can
- // expect a lot of churn in the number of references.
- //
- // As a heuristic, we repack if the number of loose references in the repository exceeds
- // `log(packed_refs_size_in_bytes/100)/log(1.15)`, which scales as following (number of refs
- // is estimated with 100 bytes per reference):
- //
- // - 1kB ~ 10 packed refs: 16 refs
- // - 10kB ~ 100 packed refs: 33 refs
- // - 100kB ~ 1k packed refs: 49 refs
- // - 1MB ~ 10k packed refs: 66 refs
- // - 10MB ~ 100k packed refs: 82 refs
- // - 100MB ~ 1m packed refs: 99 refs
- //
- // We thus allow roughly 16 additional loose refs per factor of ten of packed refs.
- //
- // This heuristic may likely need tweaking in the future, but should serve as a good first
- // iteration.
- if int64(math.Max(16, math.Log(float64(packedRefsSize)/100)/math.Log(1.15))) > looseRefs {
- return false, nil
- }
-
- var stderr bytes.Buffer
- if err := repo.ExecAndWait(ctx, git.SubCmd{
- Name: "pack-refs",
- Flags: []git.Option{
- git.Flag{Name: "--all"},
- },
- }, git.WithStderr(&stderr)); err != nil {
- return false, fmt.Errorf("packing refs: %w, stderr: %q", err, stderr.String())
- }
-
- return true, nil
-}
diff --git a/internal/gitaly/service/repository/optimize_test.go b/internal/gitaly/service/repository/optimize_test.go
index 361ae2a92..c35ffa924 100644
--- a/internal/gitaly/service/repository/optimize_test.go
+++ b/internal/gitaly/service/repository/optimize_test.go
@@ -2,8 +2,6 @@ package repository
import (
"bytes"
- "fmt"
- "io"
"os"
"path/filepath"
"testing"
@@ -12,8 +10,6 @@ import (
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v14/internal/git"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
- "gitlab.com/gitlab-org/gitaly/v14/internal/git/housekeeping"
- "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/stats"
"gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
@@ -190,557 +186,3 @@ func TestOptimizeRepositoryValidation(t *testing.T) {
_, err := client.OptimizeRepository(ctx, &gitalypb.OptimizeRepositoryRequest{Repository: repo})
require.NoError(t, err)
}
-
-type infiniteReader struct{}
-
-func (r infiniteReader) Read(b []byte) (int, error) {
- for i := range b {
- b[i] = '\000'
- }
- return len(b), nil
-}
-
-func TestNeedsRepacking(t *testing.T) {
- t.Parallel()
-
- ctx := testhelper.Context(t)
- cfg, _ := setupRepositoryServiceWithoutRepo(t)
-
- for _, tc := range []struct {
- desc string
- setup func(t *testing.T) *gitalypb.Repository
- expectedErr error
- expectedNeeded bool
- expectedConfig housekeeping.RepackObjectsConfig
- }{
- {
- desc: "empty repo",
- setup: func(t *testing.T) *gitalypb.Repository {
- repoProto, _ := gittest.CreateRepository(ctx, t, cfg)
- return repoProto
- },
- // This is a bug: if the repo is empty then we wouldn't ever generate a
- // packfile, but we determine a repack is needed because it's missing a
- // bitmap. It's a rather benign bug though: git-repack(1) will exit
- // immediately because it knows that there's nothing to repack.
- expectedNeeded: true,
- expectedConfig: housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- },
- },
- {
- desc: "missing bitmap",
- setup: func(t *testing.T) *gitalypb.Repository {
- repoProto, _ := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
- Seed: gittest.SeedGitLabTest,
- })
- return repoProto
- },
- expectedNeeded: true,
- expectedConfig: housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- },
- },
- {
- desc: "missing bitmap with alternate",
- setup: func(t *testing.T) *gitalypb.Repository {
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
- Seed: gittest.SeedGitLabTest,
- })
-
- // Create the alternates file. If it exists, then we shouldn't try
- // to generate a bitmap.
- require.NoError(t, os.WriteFile(filepath.Join(repoPath, "objects", "info", "alternates"), nil, 0o755))
-
- return repoProto
- },
- expectedNeeded: true,
- expectedConfig: housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: false,
- },
- },
- {
- desc: "missing commit-graph",
- setup: func(t *testing.T) *gitalypb.Repository {
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
- Seed: gittest.SeedGitLabTest,
- })
-
- gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "--write-bitmap-index")
-
- return repoProto
- },
- expectedNeeded: true,
- expectedConfig: housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- },
- },
- {
- desc: "commit-graph without bloom filters",
- setup: func(t *testing.T) *gitalypb.Repository {
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
- Seed: gittest.SeedGitLabTest,
- })
-
- gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "--write-bitmap-index")
- gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write")
-
- return repoProto
- },
- expectedNeeded: true,
- expectedConfig: housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- },
- },
- {
- desc: "no repack needed",
- setup: func(t *testing.T) *gitalypb.Repository {
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
- Seed: gittest.SeedGitLabTest,
- })
-
- gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "--write-bitmap-index")
- gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--changed-paths", "--split")
-
- return repoProto
- },
- expectedNeeded: false,
- },
- } {
- tc := tc
- t.Run(tc.desc, func(t *testing.T) {
- t.Parallel()
-
- repoProto := tc.setup(t)
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
-
- repackNeeded, repackCfg, err := needsRepacking(repo)
- require.Equal(t, tc.expectedErr, err)
- require.Equal(t, tc.expectedNeeded, repackNeeded)
- require.Equal(t, tc.expectedConfig, repackCfg)
- })
- }
-
- const megaByte = 1024 * 1024
-
- for _, tc := range []struct {
- packfileSize int64
- requiredPackfiles int
- }{
- {
- packfileSize: 1,
- requiredPackfiles: 5,
- },
- {
- packfileSize: 5 * megaByte,
- requiredPackfiles: 6,
- },
- {
- packfileSize: 10 * megaByte,
- requiredPackfiles: 8,
- },
- {
- packfileSize: 50 * megaByte,
- requiredPackfiles: 14,
- },
- {
- packfileSize: 100 * megaByte,
- requiredPackfiles: 17,
- },
- {
- packfileSize: 500 * megaByte,
- requiredPackfiles: 23,
- },
- {
- packfileSize: 1000 * megaByte,
- requiredPackfiles: 26,
- },
- // Let's not go any further than this, we're thrashing the temporary directory.
- } {
- t.Run(fmt.Sprintf("packfile with %d bytes", tc.packfileSize), func(t *testing.T) {
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg)
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
- packDir := filepath.Join(repoPath, "objects", "pack")
-
- // Emulate the existence of a bitmap and a commit-graph with bloom filters.
- // We explicitly don't want to generate them via Git commands as they would
- // require us to already have objects in the repository, and we want to be
- // in full control over all objects and packfiles in the repo.
- require.NoError(t, os.WriteFile(filepath.Join(packDir, "something.bitmap"), nil, 0o644))
- commitGraphChainPath := filepath.Join(repoPath, stats.CommitGraphChainRelPath)
- require.NoError(t, os.MkdirAll(filepath.Dir(commitGraphChainPath), 0o755))
- require.NoError(t, os.WriteFile(commitGraphChainPath, nil, 0o644))
-
- // We first create a single big packfile which is used to determine the
- // boundary of when we repack.
- bigPackfile, err := os.OpenFile(filepath.Join(packDir, "big.pack"), os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644)
- require.NoError(t, err)
- defer testhelper.MustClose(t, bigPackfile)
- _, err = io.Copy(bigPackfile, io.LimitReader(infiniteReader{}, tc.packfileSize))
- require.NoError(t, err)
-
- // And then we create one less packfile than we need to hit the boundary.
- // This is done to assert that we indeed don't repack before hitting the
- // boundary.
- for i := 0; i < tc.requiredPackfiles-1; i++ {
- additionalPackfile, err := os.Create(filepath.Join(packDir, fmt.Sprintf("%d.pack", i)))
- require.NoError(t, err)
- testhelper.MustClose(t, additionalPackfile)
- }
-
- repackNeeded, _, err := needsRepacking(repo)
- require.NoError(t, err)
- require.False(t, repackNeeded)
-
- // Now we create the additional packfile that causes us to hit the boundary.
- // We should thus see that we want to repack now.
- lastPackfile, err := os.Create(filepath.Join(packDir, "last.pack"))
- require.NoError(t, err)
- testhelper.MustClose(t, lastPackfile)
-
- repackNeeded, repackCfg, err := needsRepacking(repo)
- require.NoError(t, err)
- require.True(t, repackNeeded)
- require.Equal(t, housekeeping.RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- }, repackCfg)
- })
- }
-
- for _, tc := range []struct {
- desc string
- looseObjects []string
- expectedRepack bool
- }{
- {
- desc: "no objects",
- looseObjects: nil,
- expectedRepack: false,
- },
- {
- desc: "object not in 17 shard",
- looseObjects: []string{
- filepath.Join("ab/12345"),
- },
- expectedRepack: false,
- },
- {
- desc: "object in 17 shard",
- looseObjects: []string{
- filepath.Join("17/12345"),
- },
- expectedRepack: false,
- },
- {
- desc: "objects in different shards",
- looseObjects: []string{
- filepath.Join("ab/12345"),
- filepath.Join("cd/12345"),
- filepath.Join("12/12345"),
- filepath.Join("17/12345"),
- },
- expectedRepack: false,
- },
- {
- desc: "boundary",
- looseObjects: []string{
- filepath.Join("17/1"),
- filepath.Join("17/2"),
- filepath.Join("17/3"),
- filepath.Join("17/4"),
- },
- expectedRepack: false,
- },
- {
- desc: "exceeding boundary should cause repack",
- looseObjects: []string{
- filepath.Join("17/1"),
- filepath.Join("17/2"),
- filepath.Join("17/3"),
- filepath.Join("17/4"),
- filepath.Join("17/5"),
- },
- expectedRepack: true,
- },
- } {
- t.Run(tc.desc, func(t *testing.T) {
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg)
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
-
- // Emulate the existence of a bitmap and a commit-graph with bloom filters.
- // We explicitly don't want to generate them via Git commands as they would
- // require us to already have objects in the repository, and we want to be
- // in full control over all objects and packfiles in the repo.
- require.NoError(t, os.WriteFile(filepath.Join(repoPath, "objects", "pack", "something.bitmap"), nil, 0o644))
- commitGraphChainPath := filepath.Join(repoPath, stats.CommitGraphChainRelPath)
- require.NoError(t, os.MkdirAll(filepath.Dir(commitGraphChainPath), 0o755))
- require.NoError(t, os.WriteFile(commitGraphChainPath, nil, 0o644))
-
- for _, looseObjectPath := range tc.looseObjects {
- looseObjectPath := filepath.Join(repoPath, "objects", looseObjectPath)
- require.NoError(t, os.MkdirAll(filepath.Dir(looseObjectPath), 0o755))
-
- looseObjectFile, err := os.Create(looseObjectPath)
- require.NoError(t, err)
- testhelper.MustClose(t, looseObjectFile)
- }
-
- repackNeeded, repackCfg, err := needsRepacking(repo)
- require.NoError(t, err)
- require.Equal(t, tc.expectedRepack, repackNeeded)
- if tc.expectedRepack {
- require.Equal(t, housekeeping.RepackObjectsConfig{
- FullRepack: false,
- WriteBitmap: false,
- }, repackCfg)
- }
- })
- }
-}
-
-func TestPruneIfNeeded(t *testing.T) {
- t.Parallel()
-
- ctx := testhelper.Context(t)
- cfg, _ := setupRepositoryServiceWithoutRepo(t)
-
- for _, tc := range []struct {
- desc string
- isPool bool
- looseObjects []string
- expectedPrune bool
- }{
- {
- desc: "no objects",
- looseObjects: nil,
- expectedPrune: false,
- },
- {
- desc: "object not in 17 shard",
- looseObjects: []string{
- filepath.Join("ab/12345"),
- },
- expectedPrune: false,
- },
- {
- desc: "object in 17 shard",
- looseObjects: []string{
- filepath.Join("17/12345"),
- },
- expectedPrune: false,
- },
- {
- desc: "objects in different shards",
- looseObjects: []string{
- filepath.Join("ab/12345"),
- filepath.Join("cd/12345"),
- filepath.Join("12/12345"),
- filepath.Join("17/12345"),
- },
- expectedPrune: false,
- },
- {
- desc: "boundary",
- looseObjects: []string{
- filepath.Join("17/1"),
- filepath.Join("17/2"),
- filepath.Join("17/3"),
- filepath.Join("17/4"),
- },
- expectedPrune: false,
- },
- {
- desc: "exceeding boundary should cause repack",
- looseObjects: []string{
- filepath.Join("17/1"),
- filepath.Join("17/2"),
- filepath.Join("17/3"),
- filepath.Join("17/4"),
- filepath.Join("17/5"),
- },
- expectedPrune: true,
- },
- {
- desc: "exceeding boundary on pool",
- isPool: true,
- looseObjects: []string{
- filepath.Join("17/1"),
- filepath.Join("17/2"),
- filepath.Join("17/3"),
- filepath.Join("17/4"),
- filepath.Join("17/5"),
- },
- expectedPrune: false,
- },
- } {
- t.Run(tc.desc, func(t *testing.T) {
- relativePath := gittest.NewRepositoryName(t, true)
- if tc.isPool {
- relativePath = gittest.NewObjectPoolName(t)
- }
-
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
- RelativePath: relativePath,
- })
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
-
- for _, looseObjectPath := range tc.looseObjects {
- looseObjectPath := filepath.Join(repoPath, "objects", looseObjectPath)
- require.NoError(t, os.MkdirAll(filepath.Dir(looseObjectPath), 0o755))
-
- looseObjectFile, err := os.Create(looseObjectPath)
- require.NoError(t, err)
- testhelper.MustClose(t, looseObjectFile)
- }
-
- didPrune, err := pruneIfNeeded(ctx, repo)
- require.Equal(t, tc.expectedPrune, didPrune)
- require.NoError(t, err)
- })
- }
-}
-
-func TestPackRefsIfNeeded(t *testing.T) {
- t.Parallel()
-
- ctx := testhelper.Context(t)
- cfg, _ := setupRepositoryServiceWithoutRepo(t)
-
- const kiloByte = 1024
-
- for _, tc := range []struct {
- packedRefsSize int64
- requiredRefs int
- }{
- {
- packedRefsSize: 1,
- requiredRefs: 16,
- },
- {
- packedRefsSize: 1 * kiloByte,
- requiredRefs: 16,
- },
- {
- packedRefsSize: 10 * kiloByte,
- requiredRefs: 33,
- },
- {
- packedRefsSize: 100 * kiloByte,
- requiredRefs: 49,
- },
- {
- packedRefsSize: 1000 * kiloByte,
- requiredRefs: 66,
- },
- {
- packedRefsSize: 10000 * kiloByte,
- requiredRefs: 82,
- },
- {
- packedRefsSize: 100000 * kiloByte,
- requiredRefs: 99,
- },
- } {
- t.Run(fmt.Sprintf("packed-refs with %d bytes", tc.packedRefsSize), func(t *testing.T) {
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg)
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
-
- // Write an empty commit such that we can create valid refs.
- commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents())
- looseRefContent := []byte(commitID.String() + "\n")
-
- // We first create a single big packfile which is used to determine the
- // boundary of when we repack. We need to write a valid packed-refs file or
- // otherwise git-pack-refs(1) would choke later on, so we just write the
- // file such that every line is a separate ref of exactly 128 bytes in
- // length (a divisor of 1024), referring to the commit we created above.
- packedRefs, err := os.OpenFile(filepath.Join(repoPath, "packed-refs"), os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644)
- require.NoError(t, err)
- defer testhelper.MustClose(t, packedRefs)
- for i := int64(0); i < tc.packedRefsSize/128; i++ {
- packedRefLine := fmt.Sprintf("%s refs/something/this-line-is-padded-to-exactly-128-bytes-%030d\n", commitID.String(), i)
- require.Len(t, packedRefLine, 128)
- _, err := packedRefs.WriteString(packedRefLine)
- require.NoError(t, err)
- }
- require.NoError(t, packedRefs.Sync())
-
- // And then we create one less loose ref than we need to hit the boundary.
- // This is done to assert that we indeed don't repack before hitting the
- // boundary.
- for i := 0; i < tc.requiredRefs-1; i++ {
- looseRefPath := filepath.Join(repoPath, "refs", "heads", fmt.Sprintf("branch-%d", i))
- require.NoError(t, os.WriteFile(looseRefPath, looseRefContent, 0o644))
- }
-
- didRepack, err := packRefsIfNeeded(ctx, repo)
- require.NoError(t, err)
- require.False(t, didRepack)
-
- // Now we create the additional loose ref that causes us to hit the
- // boundary. We should thus see that we want to repack now.
- looseRefPath := filepath.Join(repoPath, "refs", "heads", "last-branch")
- require.NoError(t, os.WriteFile(looseRefPath, looseRefContent, 0o644))
-
- didRepack, err = packRefsIfNeeded(ctx, repo)
- require.NoError(t, err)
- require.True(t, didRepack)
- })
- }
-}
-
-func TestEstimateLooseObjectCount(t *testing.T) {
- t.Parallel()
-
- ctx := testhelper.Context(t)
- cfg, _ := setupRepositoryServiceWithoutRepo(t)
- repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg)
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
-
- t.Run("empty repository", func(t *testing.T) {
- looseObjects, err := estimateLooseObjectCount(repo)
- require.NoError(t, err)
- require.Zero(t, looseObjects)
- })
-
- t.Run("object in different shard", func(t *testing.T) {
- differentShard := filepath.Join(repoPath, "objects", "a0")
- require.NoError(t, os.MkdirAll(differentShard, 0o755))
-
- object, err := os.Create(filepath.Join(differentShard, "123456"))
- require.NoError(t, err)
- testhelper.MustClose(t, object)
-
- looseObjects, err := estimateLooseObjectCount(repo)
- require.NoError(t, err)
- require.Zero(t, looseObjects)
- })
-
- t.Run("object in estimation shard", func(t *testing.T) {
- estimationShard := filepath.Join(repoPath, "objects", "17")
- require.NoError(t, os.MkdirAll(estimationShard, 0o755))
-
- object, err := os.Create(filepath.Join(estimationShard, "123456"))
- require.NoError(t, err)
- testhelper.MustClose(t, object)
-
- looseObjects, err := estimateLooseObjectCount(repo)
- require.NoError(t, err)
- require.Equal(t, int64(256), looseObjects)
-
- // Create a second object in there.
- object, err = os.Create(filepath.Join(estimationShard, "654321"))
- require.NoError(t, err)
- testhelper.MustClose(t, object)
-
- looseObjects, err = estimateLooseObjectCount(repo)
- require.NoError(t, err)
- require.Equal(t, int64(512), looseObjects)
- })
-}