diff options
author | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-03-29 08:33:34 +0300 |
---|---|---|
committer | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-03-29 08:33:34 +0300 |
commit | 4a144bdd080a7eeca4399d43add443fd7326b227 (patch) | |
tree | 10418486d2bc2382b20196a8cbb27c5f9a657c4c | |
parent | 0c5b7fc851773654a7e2dedf195eb55409967284 (diff) | |
parent | d1c0a77c4b8df1bd075488d89c4c13f62f00f471 (diff) |
Merge branch 'pks-housekeeping-geometric-repacking' into 'master'
git/housekeeping: Implement support for geometric repacking
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5559
Merged-by: Quang-Minh Nguyen <qmnguyen@gitlab.com>
Approved-by: Will Chandler <wchandler@gitlab.com>
Approved-by: Quang-Minh Nguyen <qmnguyen@gitlab.com>
Reviewed-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Reviewed-by: Quang-Minh Nguyen <qmnguyen@gitlab.com>
Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r-- | internal/git/housekeeping/objects.go | 128 | ||||
-rw-r--r-- | internal/git/housekeeping/objects_test.go | 271 | ||||
-rw-r--r-- | internal/git/housekeeping/optimization_strategy.go | 14 | ||||
-rw-r--r-- | internal/git/housekeeping/optimization_strategy_test.go | 28 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository.go | 9 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository_ext_test.go | 9 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository_test.go | 23 | ||||
-rw-r--r-- | internal/git/housekeeping/testhelper_test.go | 2 | ||||
-rw-r--r-- | internal/gitaly/service/repository/prune_unreachable_objects.go | 3 | ||||
-rw-r--r-- | internal/gitaly/service/repository/repack.go | 4 |
10 files changed, 398 insertions, 93 deletions
diff --git a/internal/git/housekeeping/objects.go b/internal/git/housekeeping/objects.go index 4ac0d1221..7202b2c8b 100644 --- a/internal/git/housekeeping/objects.go +++ b/internal/git/housekeeping/objects.go @@ -24,20 +24,38 @@ const ( rfc2822DateFormat = "Mon Jan 02 2006 15:04:05 -0700" ) +// RepackObjectsStrategy defines how objects shall be repacked. +type RepackObjectsStrategy int + +const ( + // RepackObjectsStrategyIncremental performs an incremental repack by writing all loose + // objects that are currently reachable into a new packfile. + RepackObjectsStrategyIncremental = RepackObjectsStrategy(iota + 1) + // RepackObjectsStrategyFullWithLooseUnreachable performs a full repack by writing all + // reachable objects into a new packfile. Unreachable objects will be exploded into loose + // objects. + RepackObjectsStrategyFullWithLooseUnreachable + // RepackObjectsStrategyFullWithCruft performs a full repack by writing all reachable + // objects into a new packfile. Unreachable objects will be written into a separate cruft + // packfile. + RepackObjectsStrategyFullWithCruft + // RepackObjectsStrategyGeometric performs an geometric repack. This strategy will repack + // packfiles so that the resulting pack structure forms a geometric sequence in the number + // of objects. Loose objects will get soaked up as part of the repack regardless of their + // reachability. + RepackObjectsStrategyGeometric +) + // RepackObjectsConfig is configuration for RepackObjects. type RepackObjectsConfig struct { - // FullRepack determines whether all reachable objects should be repacked into a single - // packfile. This is much less efficient than doing incremental repacks, which only soak up - // all loose objects into a new packfile. - FullRepack bool + // Strategy determines the strategy with which to repack objects. + Strategy RepackObjectsStrategy // WriteBitmap determines whether reachability bitmaps should be written or not. There is no // reason to set this to `false`, except for legacy compatibility reasons with existing RPC // behaviour WriteBitmap bool // WriteMultiPackIndex determines whether a multi-pack index should be written or not. WriteMultiPackIndex bool - // WriteCruftPack determines whether unreachable objects shall be written into cruft packs. - WriteCruftPack bool // CruftExpireBefore determines the cutoff date before which unreachable cruft objects shall // be expired and thus deleted. CruftExpireBefore time.Time @@ -51,30 +69,30 @@ func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsC return err } - if !cfg.FullRepack && !cfg.WriteMultiPackIndex && cfg.WriteBitmap { - return structerr.NewInvalidArgument("cannot write packfile bitmap for an incremental repack") - } - - if !cfg.FullRepack && cfg.WriteCruftPack { - return structerr.NewInvalidArgument("cannot write cruft packs for an incremental repack") - } - - if !cfg.WriteCruftPack && !cfg.CruftExpireBefore.IsZero() { - return structerr.NewInvalidArgument("cannot expire cruft objects when not writing cruft packs") - } - var options []git.Option - if cfg.FullRepack { - options = append(options, + var isFullRepack bool + + switch cfg.Strategy { + case RepackObjectsStrategyIncremental: + options = []git.Option{ + git.Flag{Name: "-d"}, + } + case RepackObjectsStrategyFullWithLooseUnreachable: + options = []git.Option{ + git.Flag{Name: "-A"}, git.Flag{Name: "--pack-kept-objects"}, git.Flag{Name: "-l"}, - ) - - if cfg.WriteCruftPack { - options = append(options, git.Flag{Name: "--cruft"}) - } else { - options = append(options, git.Flag{Name: "-A"}) + git.Flag{Name: "-d"}, + } + isFullRepack = true + case RepackObjectsStrategyFullWithCruft: + options = []git.Option{ + git.Flag{Name: "--cruft"}, + git.Flag{Name: "--pack-kept-objects"}, + git.Flag{Name: "-l"}, + git.Flag{Name: "-d"}, } + isFullRepack = true if !cfg.CruftExpireBefore.IsZero() { options = append(options, git.ValueFlag{ @@ -82,7 +100,55 @@ func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsC Value: cfg.CruftExpireBefore.Format(rfc2822DateFormat), }) } + case RepackObjectsStrategyGeometric: + options = []git.Option{ + // We use a geometric factor `r`, which means that every successively larger + // packfile must have at least `r` times the number of objects. + // + // This factor ultimately determines how many packfiles there can be at a + // maximum in a repository for a given number of objects. The maximum number + // of objects with `n` packfiles and a factor `r` is `(1 - r^n) / (1 - r)`. + // E.g. with a factor of 4 and 10 packfiles, we can have at most 349,525 + // objects, with 16 packfiles we can have 1,431,655,765 objects. Contrary to + // that, having a factor of 2 will translate to 1023 objects at 10 packfiles + // and 65535 objects at 16 packfiles at a maximum. + // + // So what we're effectively choosing here is how often we need to repack + // larger parts of the repository. The higher the factor the more we'll have + // to repack as the packfiles will be larger. On the other hand, having a + // smaller factor means we'll have to repack less objects as the slices we + // need to repack will have less objects. + // + // The end result is a hybrid approach between incremental repacks and full + // repacks: we won't typically repack the full repository, but only a subset + // of packfiles. + // + // For now, we choose a geometric factor of two. Large repositories nowadays + // typically have a few million objects, which would boil down to having at + // most 32 packfiles in the repository. This number is not scientifically + // chosen though any may be changed at a later point in time. + git.ValueFlag{Name: "--geometric", Value: "2"}, + // Make sure to delete loose objects and packfiles that are made obsolete + // by the new packfile. + git.Flag{Name: "-d"}, + // Don't include objects part of an alternate. + git.Flag{Name: "-l"}, + } + default: + return structerr.NewInvalidArgument("invalid strategy %d", cfg.Strategy) + } + if cfg.WriteMultiPackIndex { + options = append(options, git.Flag{Name: "--write-midx"}) + } + if !isFullRepack && !cfg.WriteMultiPackIndex && cfg.WriteBitmap { + return structerr.NewInvalidArgument("cannot write packfile bitmap for an incremental repack") + } + if cfg.Strategy != RepackObjectsStrategyFullWithCruft && !cfg.CruftExpireBefore.IsZero() { + return structerr.NewInvalidArgument("cannot expire cruft objects when not writing cruft packs") + } + + if isFullRepack { // When we have performed a full repack we're updating the "full-repack-timestamp" // file. This is done so that we can tell when we have last performed a full repack // in a repository. This information can be used by our heuristics to effectively @@ -98,15 +164,9 @@ func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsC } } - if cfg.WriteMultiPackIndex { - options = append(options, git.Flag{Name: "--write-midx"}) - } - if err := repo.ExecAndWait(ctx, git.Command{ - Name: "repack", - Flags: append([]git.Option{ - git.Flag{Name: "-d"}, - }, options...), + Name: "repack", + Flags: options, }, git.WithConfig(GetRepackGitConfig(ctx, repo, cfg.WriteBitmap)...)); err != nil { return err } diff --git a/internal/git/housekeeping/objects_test.go b/internal/git/housekeeping/objects_test.go index fd9d96d79..36deaadd6 100644 --- a/internal/git/housekeeping/objects_test.go +++ b/internal/git/housekeeping/objects_test.go @@ -1,7 +1,9 @@ package housekeeping import ( + "os" "path/filepath" + "strings" "testing" "time" @@ -9,6 +11,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" + "gitlab.com/gitlab-org/gitaly/v15/internal/helper/perm" "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" @@ -41,10 +44,21 @@ func TestRepackObjects(t *testing.T) { expectedErr error }{ { + desc: "default strategy fails", + setup: func(t *testing.T, repoPath string) {}, + repackCfg: RepackObjectsConfig{ + Strategy: 0, + }, + expectedErr: structerr.NewInvalidArgument("invalid strategy 0"), + }, + { desc: "incremental repack packs objects", setup: func(t *testing.T, repoPath string) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, + }, stateBeforeRepack: objectsState{ looseObjects: 2, }, @@ -61,6 +75,9 @@ func TestRepackObjects(t *testing.T) { repack(t, repoPath, "-d") gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("third"), gittest.WithBranch("third")) }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, + }, stateBeforeRepack: objectsState{ packfiles: 2, looseObjects: 1, @@ -75,6 +92,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("first"), gittest.WithBranch("first")) }, repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, WriteBitmap: true, }, stateBeforeRepack: objectsState{ @@ -96,7 +114,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("third"), gittest.WithBranch("third")) }, repackCfg: RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, }, stateBeforeRepack: objectsState{ looseObjects: 1, @@ -117,7 +135,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("third"), gittest.WithBranch("third")) }, repackCfg: RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, WriteBitmap: true, }, stateBeforeRepack: objectsState{ @@ -135,6 +153,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("master")) }, repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, WriteMultiPackIndex: true, }, stateBeforeRepack: objectsState{ @@ -151,6 +170,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("master")) }, repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, WriteMultiPackIndex: true, WriteBitmap: true, }, @@ -174,7 +194,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("third"), gittest.WithBranch("third")) }, repackCfg: RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, WriteMultiPackIndex: true, }, stateBeforeRepack: objectsState{ @@ -197,7 +217,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("third"), gittest.WithBranch("third")) }, repackCfg: RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, WriteBitmap: true, WriteMultiPackIndex: true, }, @@ -219,6 +239,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("second"), gittest.WithBranch("second")) }, repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, WriteBitmap: true, WriteMultiPackIndex: true, }, @@ -251,7 +272,7 @@ func TestRepackObjects(t *testing.T) { repack(t, repoPath, "-d") }, repackCfg: RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, WriteBitmap: true, WriteMultiPackIndex: true, }, @@ -266,22 +287,13 @@ func TestRepackObjects(t *testing.T) { }, }, { - desc: "writing cruft pack requires full repack", - setup: func(t *testing.T, repoPath string) {}, - repackCfg: RepackObjectsConfig{ - WriteCruftPack: true, - }, - expectedErr: structerr.NewInvalidArgument("cannot write cruft packs for an incremental repack"), - }, - { desc: "unreachable objects get moved into cruft pack", setup: func(t *testing.T, repoPath string) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("reachable"), gittest.WithBranch("reachable")) gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("unreachable")) }, repackCfg: RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, }, stateBeforeRepack: objectsState{ looseObjects: 3, @@ -295,6 +307,7 @@ func TestRepackObjects(t *testing.T) { desc: "expiring cruft objects requires writing cruft packs", setup: func(t *testing.T, repoPath string) {}, repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, CruftExpireBefore: time.Now(), }, expectedErr: structerr.NewInvalidArgument("cannot expire cruft objects when not writing cruft packs"), @@ -306,8 +319,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("unreachable")) }, repackCfg: RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, CruftExpireBefore: time.Now().Add(time.Hour), }, stateBeforeRepack: objectsState{ @@ -328,8 +340,7 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("unreachable")) }, repackCfg: RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, CruftExpireBefore: time.Now().Add(-1 * time.Hour), }, stateBeforeRepack: objectsState{ @@ -350,8 +361,7 @@ func TestRepackObjects(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "repack", "--cruft", "-d", "-n", "--no-write-bitmap-index") }, repackCfg: RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, CruftExpireBefore: time.Now().Add(-1 * time.Hour), }, stateBeforeRepack: objectsState{ @@ -371,8 +381,7 @@ func TestRepackObjects(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "repack", "--cruft", "-d", "-n", "--no-write-bitmap-index") }, repackCfg: RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, CruftExpireBefore: time.Now().Add(1 * time.Hour), }, stateBeforeRepack: objectsState{ @@ -383,6 +392,215 @@ func TestRepackObjects(t *testing.T) { packfiles: 1, }, }, + { + desc: "geometric repack with reachable object", + setup: func(t *testing.T, repoPath string) { + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main"), gittest.WithMessage("unreachable")) + }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyGeometric, + }, + stateBeforeRepack: objectsState{ + looseObjects: 2, + }, + stateAfterRepack: objectsState{ + packfiles: 1, + }, + }, + { + desc: "geometric repack soaks up unreachable objects", + setup: func(t *testing.T, repoPath string) { + gittest.WriteBlob(t, cfg, repoPath, []byte("unreachable blob")) + }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyGeometric, + }, + stateBeforeRepack: objectsState{ + looseObjects: 1, + }, + stateAfterRepack: objectsState{ + packfiles: 1, + }, + }, + { + desc: "geometric repack leaves cruft pack alone", + setup: func(t *testing.T, repoPath string) { + gittest.WriteBlob(t, cfg, repoPath, []byte("unreachable blob")) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "--cruft", "-d", "-n", "--no-write-bitmap-index") + }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyGeometric, + }, + stateBeforeRepack: objectsState{ + packfiles: 1, + cruftPacks: 1, + }, + stateAfterRepack: objectsState{ + packfiles: 1, + cruftPacks: 1, + }, + }, + { + desc: "geometric repack leaves keep pack alone", + setup: func(t *testing.T, repoPath string) { + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "-n", "--no-write-bitmap-index") + + packPath, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "pack-*.pack")) + require.NoError(t, err) + require.Len(t, packPath, 1) + + keepPath := strings.TrimSuffix(packPath[0], ".pack") + ".keep" + require.NoError(t, os.WriteFile(keepPath, nil, perm.PrivateFile)) + }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyGeometric, + }, + stateBeforeRepack: objectsState{ + packfiles: 1, + keepPacks: 1, + }, + stateAfterRepack: objectsState{ + packfiles: 1, + keepPacks: 1, + }, + }, + { + desc: "geometric repack keeps valid geometric sequence", + setup: func(t *testing.T, repoPath string) { + // Write a commit that in total contains 3 objects: 1 commit, 1 tree + // and 1 blob. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "a", Mode: "100644", Content: "a"}, + ), gittest.WithBranch("main")) + + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "-n", "--no-write-bitmap-index") + + // Now we write another blob. As the previous packfile contains 3x + // the amount of objects we should just create a new packfile + // instead of merging them. + gittest.WriteBlob(t, cfg, repoPath, []byte("new blob")) + }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyGeometric, + }, + stateBeforeRepack: objectsState{ + packfiles: 1, + looseObjects: 1, + }, + stateAfterRepack: objectsState{ + packfiles: 2, + }, + }, + { + desc: "geometric repack keeps preexisting pack when new pack violates sequence", + setup: func(t *testing.T, repoPath string) { + // Write a commit that in total contains 3 objects: 1 commit, 1 tree + // and 1 blob. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "a", Mode: "100644", Content: "a"}, + ), gittest.WithBranch("main")) + + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "-n", "--no-write-bitmap-index") + + // Now we write two additional blobs. Even though the newly written + // packfile will cause us to invalidate the geometric sequence, we + // still create it without merging them. This _seems_ to be on + // purpose: packfiles will only be merged when preexisting packs + // validate the sequence. + gittest.WriteBlob(t, cfg, repoPath, []byte("one")) + gittest.WriteBlob(t, cfg, repoPath, []byte("two")) + }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyGeometric, + }, + stateBeforeRepack: objectsState{ + packfiles: 1, + looseObjects: 2, + }, + stateAfterRepack: objectsState{ + packfiles: 2, + }, + }, + { + desc: "geometric repack repacks when geometric sequence is invalidated", + setup: func(t *testing.T, repoPath string) { + // Write a commit that in total contains 3 objects: 1 commit, 1 tree + // and 1 blob. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "a", Mode: "100644", Content: "a"}, + ), gittest.WithBranch("main")) + + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad", "-n", "--no-write-bitmap-index") + + // Write a second set of objects that is reachable so that we can + // create a second packfile easily. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "1", Mode: "100644", Content: "1"}, + ), gittest.WithBranch("main")) + // Do an incremental repack now. We thus have two packfiles that + // invalidate the geometric sequence. + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-d", "-n", "--no-write-bitmap-index") + }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyGeometric, + }, + stateBeforeRepack: objectsState{ + packfiles: 2, + }, + stateAfterRepack: objectsState{ + packfiles: 1, + }, + }, + { + desc: "geometric repack consolidates multiple packs into one", + setup: func(t *testing.T, repoPath string) { + // Write a commit that in total contains 3 objects: 1 commit, 1 tree + // and 1 blob. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "a", Mode: "100644", Content: "a"}, + ), gittest.WithBranch("main")) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-d", "-n", "--no-write-bitmap-index") + + // Write another commit with three new objects, so that we now have + // two packfiles with three objects each. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "b", Mode: "100644", Content: "b"}, + ), gittest.WithBranch("main")) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-d", "-n", "--no-write-bitmap-index") + + // Write a third commit with 12 new objects. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "01", Mode: "100644", Content: "01"}, + gittest.TreeEntry{Path: "02", Mode: "100644", Content: "02"}, + gittest.TreeEntry{Path: "03", Mode: "100644", Content: "03"}, + gittest.TreeEntry{Path: "04", Mode: "100644", Content: "04"}, + gittest.TreeEntry{Path: "05", Mode: "100644", Content: "05"}, + gittest.TreeEntry{Path: "06", Mode: "100644", Content: "06"}, + gittest.TreeEntry{Path: "07", Mode: "100644", Content: "07"}, + gittest.TreeEntry{Path: "08", Mode: "100644", Content: "08"}, + gittest.TreeEntry{Path: "09", Mode: "100644", Content: "09"}, + gittest.TreeEntry{Path: "10", Mode: "100644", Content: "10"}, + ), gittest.WithBranch("main")) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-d", "-n", "--no-write-bitmap-index") + + // We now have three packfiles: two with three objects respectively, + // and one with 12 objects. The first two packfiles do not form a + // geometric sequence, but if they are packed together they result + // in a packfile with 6 objects, which would restore the sequence. + // So what we want to see is that we start with three, but end with + // 2 packfiles. + }, + repackCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyGeometric, + }, + stateBeforeRepack: objectsState{ + packfiles: 3, + }, + stateAfterRepack: objectsState{ + packfiles: 2, + }, + }, } { tc := tc @@ -400,9 +618,10 @@ func TestRepackObjects(t *testing.T) { require.Equal(t, tc.expectedErr, RepackObjects(ctx, repo, tc.repackCfg)) requireObjectsState(t, repo, tc.stateAfterRepack) - if tc.repackCfg.FullRepack { + switch tc.repackCfg.Strategy { + case RepackObjectsStrategyFullWithLooseUnreachable, RepackObjectsStrategyFullWithCruft: require.FileExists(t, filepath.Join(repoPath, stats.FullRepackTimestampFilename)) - } else { + default: require.NoFileExists(t, filepath.Join(repoPath, stats.FullRepackTimestampFilename)) } @@ -423,7 +642,7 @@ func TestRepackObjects(t *testing.T) { gittest.TestDeltaIslands(t, cfg, repoPath, repoPath, stats.IsPoolRepository(repoProto), func() error { return RepackObjects(ctx, repo, RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, }) }) }) diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go index c4ac2411a..878669ae0 100644 --- a/internal/git/housekeeping/optimization_strategy.go +++ b/internal/git/housekeeping/optimization_strategy.go @@ -95,7 +95,7 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackObjects(ctx context.Context math.Log(float64(s.info.Packfiles.Size/1024/1024))/math.Log(log))) <= s.info.Packfiles.Count { cfg := RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, WriteBitmap: len(s.info.Alternates) == 0, WriteMultiPackIndex: true, } @@ -108,7 +108,7 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackObjects(ctx context.Context // This is left for another iteration though once we have determined that this is // even necessary. if featureflag.WriteCruftPacks.IsEnabled(ctx) && !s.info.IsObjectPool { - cfg.WriteCruftPack = true + cfg.Strategy = RepackObjectsStrategyFullWithCruft cfg.CruftExpireBefore = s.expireBefore } @@ -130,7 +130,7 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackObjects(ctx context.Context // it is necessary on the client side. We thus take a much stricter limit of 1024 objects. if s.info.LooseObjects.Count > looseObjectLimit { return true, RepackObjectsConfig{ - FullRepack: false, + Strategy: RepackObjectsStrategyIncremental, // Without multi-pack-index we cannot write bitmaps during an incremental // repack. WriteBitmap: len(s.info.Alternates) == 0, @@ -142,7 +142,7 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackObjects(ctx context.Context // multi-pack-index we perform an incremental repack to generate one. if !s.info.Packfiles.MultiPackIndex.Exists { return true, RepackObjectsConfig{ - FullRepack: false, + Strategy: RepackObjectsStrategyIncremental, WriteBitmap: len(s.info.Alternates) == 0, WriteMultiPackIndex: true, } @@ -188,7 +188,7 @@ func (s HeuristicalOptimizationStrategy) ShouldWriteCommitGraph(ctx context.Cont // Same as with pruning: if we are repacking the repository and write cruft // packs with an expiry date then we may end up pruning objects. We thus // need to replace the commit-graph chain in that case. - ReplaceChain: repackCfg.WriteCruftPack && !repackCfg.CruftExpireBefore.IsZero(), + ReplaceChain: repackCfg.Strategy == RepackObjectsStrategyFullWithCruft && !repackCfg.CruftExpireBefore.IsZero(), } } @@ -273,7 +273,7 @@ func NewEagerOptimizationStrategy(info stats.RepositoryInfo) EagerOptimizationSt // not have any alterantes. func (s EagerOptimizationStrategy) ShouldRepackObjects(ctx context.Context) (bool, RepackObjectsConfig) { cfg := RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, WriteBitmap: len(s.info.Alternates) == 0, WriteMultiPackIndex: true, } @@ -281,7 +281,7 @@ func (s EagerOptimizationStrategy) ShouldRepackObjects(ctx context.Context) (boo // Object pools should neither have unreachable objects, nor should we ever try to delete // any if there are some. So we disable cruft packs and expiration of them for them. if featureflag.WriteCruftPacks.IsEnabled(ctx) && !s.info.IsObjectPool { - cfg.WriteCruftPack = true + cfg.Strategy = RepackObjectsStrategyFullWithCruft cfg.CruftExpireBefore = s.expireBefore } diff --git a/internal/git/housekeeping/optimization_strategy_test.go b/internal/git/housekeeping/optimization_strategy_test.go index cb2d15c85..5012827d6 100644 --- a/internal/git/housekeeping/optimization_strategy_test.go +++ b/internal/git/housekeeping/optimization_strategy_test.go @@ -45,7 +45,7 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co }, expectedNeeded: true, expectedConfig: RepackObjectsConfig{ - FullRepack: false, + Strategy: RepackObjectsStrategyIncremental, WriteBitmap: true, WriteMultiPackIndex: true, }, @@ -71,6 +71,7 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co // exist in pooled repositories. expectedNeeded: true, expectedConfig: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, WriteMultiPackIndex: true, }, }, @@ -88,7 +89,7 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co }, expectedNeeded: true, expectedConfig: RepackObjectsConfig{ - FullRepack: false, + Strategy: RepackObjectsStrategyIncremental, WriteBitmap: true, WriteMultiPackIndex: true, }, @@ -215,10 +216,14 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co repackNeeded, repackCfg := strategy.ShouldRepackObjects(ctx) require.True(t, repackNeeded) require.Equal(t, RepackObjectsConfig{ - FullRepack: true, + Strategy: func() RepackObjectsStrategy { + if featureflag.WriteCruftPacks.IsEnabled(ctx) && !tc.isPool { + return RepackObjectsStrategyFullWithCruft + } + return RepackObjectsStrategyFullWithLooseUnreachable + }(), WriteBitmap: len(tc.alternates) == 0, WriteMultiPackIndex: true, - WriteCruftPack: featureflag.WriteCruftPacks.IsEnabled(ctx) && !tc.isPool, CruftExpireBefore: expireBefore, }, repackCfg) }) @@ -288,7 +293,12 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co repackNeeded, repackCfg := strategy.ShouldRepackObjects(ctx) require.Equal(t, outerTC.expectedRepack, repackNeeded) require.Equal(t, RepackObjectsConfig{ - FullRepack: false, + Strategy: func() RepackObjectsStrategy { + if repackNeeded { + return RepackObjectsStrategyIncremental + } + return 0 + }(), WriteBitmap: repackNeeded, WriteMultiPackIndex: repackNeeded, }, repackCfg) @@ -693,10 +703,14 @@ func testEagerOptimizationStrategy(t *testing.T, ctx context.Context) { shouldRepackObjects, repackObjectsCfg := tc.strategy.ShouldRepackObjects(ctx) require.True(t, shouldRepackObjects) require.Equal(t, RepackObjectsConfig{ - FullRepack: true, + Strategy: func() RepackObjectsStrategy { + if featureflag.WriteCruftPacks.IsEnabled(ctx) && !tc.strategy.info.IsObjectPool { + return RepackObjectsStrategyFullWithCruft + } + return RepackObjectsStrategyFullWithLooseUnreachable + }(), WriteBitmap: tc.expectWriteBitmap, WriteMultiPackIndex: true, - WriteCruftPack: featureflag.WriteCruftPacks.IsEnabled(ctx) && !tc.strategy.info.IsObjectPool, CruftExpireBefore: expectedExpireBefore, }, repackObjectsCfg) diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go index 181e3fe70..1ef8bdd54 100644 --- a/internal/git/housekeeping/optimize_repository.go +++ b/internal/git/housekeeping/optimize_repository.go @@ -174,10 +174,13 @@ func optimizeRepository( return fmt.Errorf("could not repack: %w", err) } if didRepack { - if repackCfg.FullRepack { - optimizations["packed_objects_full"] = "success" - } else { + switch repackCfg.Strategy { + case RepackObjectsStrategyIncremental: optimizations["packed_objects_incremental"] = "success" + case RepackObjectsStrategyFullWithLooseUnreachable: + optimizations["packed_objects_full"] = "success" + case RepackObjectsStrategyFullWithCruft: + optimizations["packed_objects_cruft"] = "success" } if repackCfg.WriteBitmap { optimizations["written_bitmap"] = "success" diff --git a/internal/git/housekeeping/optimize_repository_ext_test.go b/internal/git/housekeeping/optimize_repository_ext_test.go index c01d7e977..87b25fce2 100644 --- a/internal/git/housekeeping/optimize_repository_ext_test.go +++ b/internal/git/housekeeping/optimize_repository_ext_test.go @@ -204,6 +204,13 @@ func testOptimizeRepositoryObjectPoolMember(t *testing.T, ctx context.Context) { catfileCache := catfile.NewCache(cfg) defer catfileCache.Stop() + fullOrCruft := func() string { + if featureflag.WriteCruftPacks.IsEnabled(ctx) { + return "packed_objects_cruft" + } + return "packed_objects_full" + }() + for _, tc := range []struct { desc string strategyConstructor housekeeping.OptimizationStrategyConstructor @@ -216,7 +223,7 @@ func testOptimizeRepositoryObjectPoolMember(t *testing.T, ctx context.Context) { }, expectedLogEntries: map[string]string{ "packed_refs": "success", - "packed_objects_full": "success", + fullOrCruft: "success", "pruned_objects": "success", "written_commit_graph_full": "success", "written_multi_pack_index": "success", diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go index 19f97a66c..9fbed6210 100644 --- a/internal/git/housekeeping/optimize_repository_test.go +++ b/internal/git/housekeeping/optimize_repository_test.go @@ -69,10 +69,15 @@ func TestRepackIfNeeded(t *testing.T) { didRepack, repackObjectsCfg, err := repackIfNeeded(ctx, repo, mockOptimizationStrategy{ shouldRepackObjects: true, + repackObjectsCfg: RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, + }, }) require.NoError(t, err) require.True(t, didRepack) - require.Equal(t, RepackObjectsConfig{}, repackObjectsCfg) + require.Equal(t, RepackObjectsConfig{ + Strategy: RepackObjectsStrategyIncremental, + }, repackObjectsCfg) requireObjectsState(t, repo, objectsState{ packfiles: 2, @@ -96,13 +101,13 @@ func TestRepackIfNeeded(t *testing.T) { didRepack, repackObjectsCfg, err := repackIfNeeded(ctx, repo, mockOptimizationStrategy{ shouldRepackObjects: true, repackObjectsCfg: RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, }, }) require.NoError(t, err) require.True(t, didRepack) require.Equal(t, RepackObjectsConfig{ - FullRepack: true, + Strategy: RepackObjectsStrategyFullWithLooseUnreachable, }, repackObjectsCfg) requireObjectsState(t, repo, objectsState{ @@ -126,16 +131,14 @@ func TestRepackIfNeeded(t *testing.T) { didRepack, repackObjectsCfg, err := repackIfNeeded(ctx, repo, mockOptimizationStrategy{ shouldRepackObjects: true, repackObjectsCfg: RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, CruftExpireBefore: expiryTime, }, }) require.NoError(t, err) require.True(t, didRepack) require.Equal(t, RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, CruftExpireBefore: expiryTime, }, repackObjectsCfg) @@ -162,16 +165,14 @@ func TestRepackIfNeeded(t *testing.T) { didRepack, repackObjectsCfg, err := repackIfNeeded(ctx, repo, mockOptimizationStrategy{ shouldRepackObjects: true, repackObjectsCfg: RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, CruftExpireBefore: expiryTime, }, }) require.NoError(t, err) require.True(t, didRepack) require.Equal(t, RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: RepackObjectsStrategyFullWithCruft, CruftExpireBefore: expiryTime, }, repackObjectsCfg) diff --git a/internal/git/housekeeping/testhelper_test.go b/internal/git/housekeeping/testhelper_test.go index 4b8e922ad..4e7d1a291 100644 --- a/internal/git/housekeeping/testhelper_test.go +++ b/internal/git/housekeeping/testhelper_test.go @@ -31,6 +31,7 @@ type objectsState struct { looseObjects uint64 packfiles uint64 cruftPacks uint64 + keepPacks uint64 hasBitmap bool hasMultiPackIndex bool hasMultiPackIndexBitmap bool @@ -46,6 +47,7 @@ func requireObjectsState(tb testing.TB, repo *localrepo.Repo, expectedState obje looseObjects: repoInfo.LooseObjects.Count, packfiles: repoInfo.Packfiles.Count, cruftPacks: repoInfo.Packfiles.CruftCount, + keepPacks: repoInfo.Packfiles.KeepCount, hasBitmap: repoInfo.Packfiles.Bitmap.Exists, hasMultiPackIndex: repoInfo.Packfiles.MultiPackIndex.Exists, hasMultiPackIndexBitmap: repoInfo.Packfiles.MultiPackIndexBitmap.Exists, diff --git a/internal/gitaly/service/repository/prune_unreachable_objects.go b/internal/gitaly/service/repository/prune_unreachable_objects.go index 49904e0a6..f19ea799c 100644 --- a/internal/gitaly/service/repository/prune_unreachable_objects.go +++ b/internal/gitaly/service/repository/prune_unreachable_objects.go @@ -57,8 +57,7 @@ func (s *server) PruneUnreachableObjects( // that is to do a full repack. So unfortunately, this is quite expensive. if featureflag.WriteCruftPacks.IsEnabled(ctx) { if err := housekeeping.RepackObjects(ctx, repo, housekeeping.RepackObjectsConfig{ - FullRepack: true, - WriteCruftPack: true, + Strategy: housekeeping.RepackObjectsStrategyFullWithCruft, WriteMultiPackIndex: true, WriteBitmap: len(repoInfo.Alternates) == 0, CruftExpireBefore: expireBefore, diff --git a/internal/gitaly/service/repository/repack.go b/internal/gitaly/service/repository/repack.go index 164902ee7..b82d95da0 100644 --- a/internal/gitaly/service/repository/repack.go +++ b/internal/gitaly/service/repository/repack.go @@ -32,7 +32,7 @@ func (s *server) RepackFull(ctx context.Context, in *gitalypb.RepackFullRequest) repo := s.localrepo(repository) cfg := housekeeping.RepackObjectsConfig{ - FullRepack: true, + Strategy: housekeeping.RepackObjectsStrategyFullWithLooseUnreachable, WriteBitmap: in.GetCreateBitmap(), } @@ -64,7 +64,7 @@ func (s *server) RepackIncremental(ctx context.Context, in *gitalypb.RepackIncre repo := s.localrepo(repository) cfg := housekeeping.RepackObjectsConfig{ - FullRepack: false, + Strategy: housekeeping.RepackObjectsStrategyIncremental, WriteBitmap: false, } |