diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-03-03 14:54:14 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-03-06 09:30:35 +0300 |
commit | 19aec67faa91621e598ecc6bfab6dcf2f9f04a41 (patch) | |
tree | 763781453fce455f5803436bb57c0855c1c06619 | |
parent | a965842fe3cc7212b35f936d59ede06c20868d23 (diff) |
housekeeping: Let strategies control expiration grace period
When pruning objects we have a default grace period of two weeks. This
grace period is dictated by `housekeeping.OptimizeRepository()`, which
has this limit hardcoded. This has two issues:
- `OptimizeRepository()` should not be responsible for the policy of
when exactly we perform certain optimizations. This is the job of
the optimization strategy instead.
- We pass the expiration grace period as approxidate to git-prune(1)
and thus have no fine-grained control of which objects exactly get
pruned. This will become a problem once we introduce cruft packs,
where we will then need to use the same date both when creating
the cruft packs and when pruning objects.
Introduce a new `PruneObjects()` helper function along with a config
struct that allows the caller to set the exact date before which objects
shall be pruned. This allows us to easily move control of that expiry
date into optimization strategies.
-rw-r--r-- | internal/git/housekeeping/objects.go | 36 | ||||
-rw-r--r-- | internal/git/housekeeping/optimization_strategy.go | 39 | ||||
-rw-r--r-- | internal/git/housekeeping/optimization_strategy_test.go | 51 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository.go | 17 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository_test.go | 6 | ||||
-rw-r--r-- | internal/gitaly/service/repository/optimize_test.go | 14 |
6 files changed, 122 insertions, 41 deletions
diff --git a/internal/git/housekeeping/objects.go b/internal/git/housekeeping/objects.go index 042efb798..33f8d9c11 100644 --- a/internal/git/housekeeping/objects.go +++ b/internal/git/housekeeping/objects.go @@ -2,7 +2,9 @@ package housekeeping import ( "context" + "fmt" "strconv" + "time" "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" @@ -15,6 +17,8 @@ const ( // looseObjectLimit is the limit of loose objects we accept both when doing incremental // repacks and when pruning objects. looseObjectLimit = 1024 + // rfc2822DateFormat is the date format that Git typically uses for dates. + rfc2822DateFormat = "Mon Jan 02 2006 15:04:05 -0700" ) // RepackObjectsConfig is configuration for RepackObjects. @@ -87,3 +91,35 @@ func GetRepackGitConfig(ctx context.Context, repo repository.GitRepo, bitmap boo return config } + +// PruneObjectsConfig determines which objects should be pruned in PruneObjects. +type PruneObjectsConfig struct { + // ExpireBefore controls the grace period after which unreachable objects shall be pruned. + // An unreachable object must be older than the given date in order to be considered for + // deletion. + ExpireBefore time.Time +} + +// PruneObjects prunes loose objects from the repository that are already packed or which are +// unreachable and older than the configured expiry date. +func PruneObjects(ctx context.Context, repo *localrepo.Repo, cfg PruneObjectsConfig) error { + if err := repo.ExecAndWait(ctx, git.Command{ + Name: "prune", + Flags: []git.Option{ + // By default, this prunes all unreachable objects regardless of when they + // have last been accessed. This opens us up for races when there are + // concurrent commands which are just at the point of writing objects into + // the repository, but which haven't yet updated any references to make them + // reachable. + // + // To avoid this race, we use a grace window that can be specified by the + // caller so that we only delete objects that are older than this grace + // window. + git.ValueFlag{Name: "--expire", Value: cfg.ExpireBefore.Format(rfc2822DateFormat)}, + }, + }); err != nil { + return fmt.Errorf("executing prune: %w", err) + } + + return nil +} diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go index 0fd5f89f7..ab6a75576 100644 --- a/internal/git/housekeeping/optimization_strategy.go +++ b/internal/git/housekeeping/optimization_strategy.go @@ -3,6 +3,7 @@ package housekeeping import ( "context" "math" + "time" "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" ) @@ -14,8 +15,8 @@ type OptimizationStrategy interface { // how it should be done. ShouldRepackObjects(context.Context) (bool, RepackObjectsConfig) // ShouldPruneObjects determines whether the repository has stale objects that should be - // pruned. - ShouldPruneObjects(context.Context) bool + // pruned and, if so, how it should be done. + ShouldPruneObjects(context.Context) (bool, PruneObjectsConfig) // ShouldRepackReferences determines whether the repository's references need to be // repacked. ShouldRepackReferences(context.Context) bool @@ -27,7 +28,8 @@ type OptimizationStrategy interface { // HeuristicalOptimizationStrategy is an optimization strategy that is based on a set of // heuristics. type HeuristicalOptimizationStrategy struct { - info stats.RepositoryInfo + info stats.RepositoryInfo + expireBefore time.Time } // NewHeuristicalOptimizationStrategy constructs a heuristicalOptimizationStrategy for the given @@ -35,7 +37,8 @@ type HeuristicalOptimizationStrategy struct { // repository can be decided without further disk reads. func NewHeuristicalOptimizationStrategy(info stats.RepositoryInfo) HeuristicalOptimizationStrategy { return HeuristicalOptimizationStrategy{ - info: info, + info: info, + expireBefore: time.Now().Add(stats.StaleObjectsGracePeriod), } } @@ -150,7 +153,7 @@ func (s HeuristicalOptimizationStrategy) ShouldWriteCommitGraph(ctx context.Cont // // To fix this case we will replace the complete commit-chain when we have pruned objects // from the repository. - if s.ShouldPruneObjects(ctx) { + if shouldPrune, _ := s.ShouldPruneObjects(ctx); shouldPrune { return true, WriteCommitGraphConfig{ ReplaceChain: true, } @@ -174,20 +177,22 @@ func (s HeuristicalOptimizationStrategy) ShouldWriteCommitGraph(ctx context.Cont // ShouldPruneObjects determines whether the repository has stale objects that should be pruned. // Object pools are never pruned to not lose data in them, but otherwise we prune when we've found // enough stale objects that might in fact get pruned. -func (s HeuristicalOptimizationStrategy) ShouldPruneObjects(context.Context) bool { +func (s HeuristicalOptimizationStrategy) ShouldPruneObjects(context.Context) (bool, PruneObjectsConfig) { // Pool repositories must never prune any objects, or otherwise we may corrupt members of // that pool if they still refer to that object. if s.info.IsObjectPool { - return false + return false, PruneObjectsConfig{} } // When we have a number of loose objects that is older than two weeks then they have // surpassed the grace period and may thus be pruned. if s.info.LooseObjects.StaleCount <= looseObjectLimit { - return false + return false, PruneObjectsConfig{} } - return true + return true, PruneObjectsConfig{ + ExpireBefore: s.expireBefore, + } } // ShouldRepackReferences determines whether the repository's references need to be repacked based @@ -230,13 +235,15 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackReferences(context.Context) // EagerOptimizationStrategy is a strategy that will eagerly perform optimizations. All of the data // structures will be optimized regardless of whether they already are in an optimal state or not. type EagerOptimizationStrategy struct { - info stats.RepositoryInfo + info stats.RepositoryInfo + expireBefore time.Time } // NewEagerOptimizationStrategy creates a new EagerOptimizationStrategy. func NewEagerOptimizationStrategy(info stats.RepositoryInfo) EagerOptimizationStrategy { return EagerOptimizationStrategy{ - info: info, + info: info, + expireBefore: time.Now().Add(stats.StaleObjectsGracePeriod), } } @@ -261,8 +268,14 @@ func (s EagerOptimizationStrategy) ShouldWriteCommitGraph(context.Context) (bool // ShouldPruneObjects always instructs the caller to prune objects, unless the repository is an // object pool. -func (s EagerOptimizationStrategy) ShouldPruneObjects(context.Context) bool { - return !s.info.IsObjectPool +func (s EagerOptimizationStrategy) ShouldPruneObjects(context.Context) (bool, PruneObjectsConfig) { + if s.info.IsObjectPool { + return false, PruneObjectsConfig{} + } + + return true, PruneObjectsConfig{ + ExpireBefore: s.expireBefore, + } } // ShouldRepackReferences always instructs the caller to repack references. diff --git a/internal/git/housekeeping/optimization_strategy_test.go b/internal/git/housekeeping/optimization_strategy_test.go index fe7367fbb..731ba9a5b 100644 --- a/internal/git/housekeeping/optimization_strategy_test.go +++ b/internal/git/housekeeping/optimization_strategy_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "testing" + "time" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" @@ -282,15 +283,19 @@ func TestHeuristicalOptimizationStrategy_ShouldPruneObjects(t *testing.T) { t.Parallel() ctx := testhelper.Context(t) + expireBefore := time.Now() for _, tc := range []struct { desc string strategy HeuristicalOptimizationStrategy expectedShouldPruneObjects bool + expectedPruneObjectsConfig PruneObjectsConfig }{ { - desc: "empty repository", - strategy: HeuristicalOptimizationStrategy{}, + desc: "empty repository", + strategy: HeuristicalOptimizationStrategy{ + expireBefore: expireBefore, + }, expectedShouldPruneObjects: false, }, { @@ -301,6 +306,7 @@ func TestHeuristicalOptimizationStrategy_ShouldPruneObjects(t *testing.T) { Count: 10000, }, }, + expireBefore: expireBefore, }, expectedShouldPruneObjects: false, }, @@ -312,6 +318,7 @@ func TestHeuristicalOptimizationStrategy_ShouldPruneObjects(t *testing.T) { StaleCount: 1000, }, }, + expireBefore: expireBefore, }, expectedShouldPruneObjects: false, }, @@ -323,19 +330,28 @@ func TestHeuristicalOptimizationStrategy_ShouldPruneObjects(t *testing.T) { StaleCount: 1025, }, }, + expireBefore: expireBefore, }, expectedShouldPruneObjects: true, + expectedPruneObjectsConfig: PruneObjectsConfig{ + ExpireBefore: expireBefore, + }, }, } { t.Run(tc.desc, func(t *testing.T) { t.Run("normal repository", func(t *testing.T) { - require.Equal(t, tc.expectedShouldPruneObjects, tc.strategy.ShouldPruneObjects(ctx)) + shouldPrune, pruneCfg := tc.strategy.ShouldPruneObjects(ctx) + require.Equal(t, tc.expectedShouldPruneObjects, shouldPrune) + require.Equal(t, tc.expectedPruneObjectsConfig, pruneCfg) }) t.Run("object pool", func(t *testing.T) { strategy := tc.strategy strategy.info.IsObjectPool = true - require.False(t, strategy.ShouldPruneObjects(ctx)) + + shouldPrune, pruneCfg := tc.strategy.ShouldPruneObjects(ctx) + require.Equal(t, tc.expectedShouldPruneObjects, shouldPrune) + require.Equal(t, tc.expectedPruneObjectsConfig, pruneCfg) }) }) } @@ -559,16 +575,25 @@ func TestEagerOptimizationStrategy(t *testing.T) { ctx := testhelper.Context(t) + expireBefore := time.Now() + for _, tc := range []struct { desc string strategy EagerOptimizationStrategy expectWriteBitmap bool expectShouldPruneObjects bool + pruneObjectsCfg PruneObjectsConfig }{ { - desc: "no alternate", + desc: "no alternate", + strategy: EagerOptimizationStrategy{ + expireBefore: expireBefore, + }, expectWriteBitmap: true, expectShouldPruneObjects: true, + pruneObjectsCfg: PruneObjectsConfig{ + ExpireBefore: expireBefore, + }, }, { desc: "alternate", @@ -576,9 +601,13 @@ func TestEagerOptimizationStrategy(t *testing.T) { info: stats.RepositoryInfo{ Alternates: []string{"path/to/alternate"}, }, + expireBefore: expireBefore, }, expectWriteBitmap: false, expectShouldPruneObjects: true, + pruneObjectsCfg: PruneObjectsConfig{ + ExpireBefore: expireBefore, + }, }, { desc: "object pool", @@ -586,6 +615,7 @@ func TestEagerOptimizationStrategy(t *testing.T) { info: stats.RepositoryInfo{ IsObjectPool: true, }, + expireBefore: expireBefore, }, expectWriteBitmap: true, expectShouldPruneObjects: false, @@ -597,6 +627,7 @@ func TestEagerOptimizationStrategy(t *testing.T) { IsObjectPool: true, Alternates: []string{"path/to/alternate"}, }, + expireBefore: expireBefore, }, expectWriteBitmap: false, expectShouldPruneObjects: false, @@ -617,7 +648,10 @@ func TestEagerOptimizationStrategy(t *testing.T) { ReplaceChain: true, }, writeCommitGraphCfg) - require.Equal(t, tc.expectShouldPruneObjects, tc.strategy.ShouldPruneObjects(ctx)) + shouldPruneObjects, pruneObjectsCfg := tc.strategy.ShouldPruneObjects(ctx) + require.Equal(t, tc.expectShouldPruneObjects, shouldPruneObjects) + require.Equal(t, tc.pruneObjectsCfg, pruneObjectsCfg) + require.True(t, tc.strategy.ShouldRepackReferences(ctx)) }) } @@ -628,6 +662,7 @@ type mockOptimizationStrategy struct { shouldRepackObjects bool repackObjectsCfg RepackObjectsConfig shouldPruneObjects bool + pruneObjectsCfg PruneObjectsConfig shouldRepackReferences bool shouldWriteCommitGraph bool writeCommitGraphCfg WriteCommitGraphConfig @@ -637,8 +672,8 @@ func (m mockOptimizationStrategy) ShouldRepackObjects(context.Context) (bool, Re return m.shouldRepackObjects, m.repackObjectsCfg } -func (m mockOptimizationStrategy) ShouldPruneObjects(context.Context) bool { - return m.shouldPruneObjects +func (m mockOptimizationStrategy) ShouldPruneObjects(context.Context) (bool, PruneObjectsConfig) { + return m.shouldPruneObjects, m.pruneObjectsCfg } func (m mockOptimizationStrategy) ShouldRepackReferences(context.Context) bool { diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go index bce297315..bc964d8ef 100644 --- a/internal/git/housekeeping/optimize_repository.go +++ b/internal/git/housekeeping/optimize_repository.go @@ -253,22 +253,13 @@ func writeCommitGraphIfNeeded(ctx context.Context, repo *localrepo.Repo, strateg // pruneIfNeeded removes objects from the repository which are either unreachable or which are // already part of a packfile. We use a grace period of two weeks. func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo, strategy OptimizationStrategy) (bool, error) { - if !strategy.ShouldPruneObjects(ctx) { + needed, cfg := strategy.ShouldPruneObjects(ctx) + if !needed { return false, nil } - if err := repo.ExecAndWait(ctx, git.Command{ - Name: "prune", - Flags: []git.Option{ - // By default, this prunes all unreachable objects regardless of when they - // have last been accessed. This opens us up for races when there are - // concurrent commands which are just at the point of writing objects into - // the repository, but which haven't yet updated any references to make them - // reachable. We thus use the same two-week grace period as git-gc(1) does. - git.ValueFlag{Name: "--expire", Value: "two.weeks.ago"}, - }, - }); err != nil { - return false, fmt.Errorf("pruning objects: %w", err) + if err := PruneObjects(ctx, repo, cfg); err != nil { + return true, fmt.Errorf("pruning objects: %w", err) } return true, nil diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go index 8bfaec14e..a10276d7b 100644 --- a/internal/git/housekeeping/optimize_repository_test.go +++ b/internal/git/housekeeping/optimize_repository_test.go @@ -624,6 +624,9 @@ func TestPruneIfNeeded(t *testing.T) { // We shouldn't prune when the strategy determines there aren't enough old objects. didPrune, err := pruneIfNeeded(ctx, repo, mockOptimizationStrategy{ shouldPruneObjects: false, + pruneObjectsCfg: PruneObjectsConfig{ + ExpireBefore: twoWeeksAgo, + }, }) require.NoError(t, err) require.False(t, didPrune) @@ -635,6 +638,9 @@ func TestPruneIfNeeded(t *testing.T) { // But we naturally should prune if told so. didPrune, err = pruneIfNeeded(ctx, repo, mockOptimizationStrategy{ shouldPruneObjects: true, + pruneObjectsCfg: PruneObjectsConfig{ + ExpireBefore: twoWeeksAgo, + }, }) require.NoError(t, err) require.True(t, didPrune) diff --git a/internal/gitaly/service/repository/optimize_test.go b/internal/gitaly/service/repository/optimize_test.go index 4270b38a4..58652041e 100644 --- a/internal/gitaly/service/repository/optimize_test.go +++ b/internal/gitaly/service/repository/optimize_test.go @@ -282,16 +282,16 @@ func TestOptimizeRepository_strategy(t *testing.T) { repoProto, _ := gittest.CreateRepository(t, ctx, cfg) for _, tc := range []struct { - desc string - request *gitalypb.OptimizeRepositoryRequest - expectedStrategy housekeeping.OptimizationStrategy + desc string + request *gitalypb.OptimizeRepositoryRequest + expectedType housekeeping.OptimizationStrategy }{ { desc: "no strategy", request: &gitalypb.OptimizeRepositoryRequest{ Repository: repoProto, }, - expectedStrategy: housekeeping.HeuristicalOptimizationStrategy{}, + expectedType: housekeeping.HeuristicalOptimizationStrategy{}, }, { desc: "heuristical strategy", @@ -299,7 +299,7 @@ func TestOptimizeRepository_strategy(t *testing.T) { Repository: repoProto, Strategy: gitalypb.OptimizeRepositoryRequest_STRATEGY_HEURISTICAL, }, - expectedStrategy: housekeeping.HeuristicalOptimizationStrategy{}, + expectedType: housekeeping.HeuristicalOptimizationStrategy{}, }, { desc: "eager strategy", @@ -307,7 +307,7 @@ func TestOptimizeRepository_strategy(t *testing.T) { Repository: repoProto, Strategy: gitalypb.OptimizeRepositoryRequest_STRATEGY_EAGER, }, - expectedStrategy: housekeeping.EagerOptimizationStrategy{}, + expectedType: housekeeping.EagerOptimizationStrategy{}, }, } { t.Run(tc.desc, func(t *testing.T) { @@ -315,7 +315,7 @@ func TestOptimizeRepository_strategy(t *testing.T) { require.NoError(t, err) testhelper.ProtoEqual(t, &gitalypb.OptimizeRepositoryResponse{}, response) - require.Equal(t, tc.expectedStrategy, <-housekeepingManager.strategyCh) + require.IsType(t, tc.expectedType, <-housekeepingManager.strategyCh) }) } } |