diff options
author | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-04-14 09:02:25 +0300 |
---|---|---|
committer | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-04-14 09:02:25 +0300 |
commit | 3eac8561b37f0c53fd76ffdceaf1285a167529f5 (patch) | |
tree | 2772997f476613e852ed9899aa1ee5e745d176b0 | |
parent | 49a4dfb6f7ca9e3f74dcf0f4d5431a115f1c4d72 (diff) | |
parent | 21d61f7d517a1034ec4de8655a9138a3a3c32103 (diff) |
Merge branch 'pks-housekeeping-adjust-object-expiry-periods' into 'master'
git/housekeeping: Reduce frequency of full repacks
Closes #2774
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5640
Merged-by: Quang-Minh Nguyen <qmnguyen@gitlab.com>
Approved-by: Justin Tobler <jtobler@gitlab.com>
Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r-- | internal/git/housekeeping/optimization_strategy.go | 51 | ||||
-rw-r--r-- | internal/git/housekeeping/optimization_strategy_test.go | 10 | ||||
-rw-r--r-- | internal/git/stats/repository_info.go | 4 |
3 files changed, 35 insertions, 30 deletions
diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go index b7e2b95b1..851702b87 100644 --- a/internal/git/housekeeping/optimization_strategy.go +++ b/internal/git/housekeeping/optimization_strategy.go @@ -9,6 +9,12 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" ) +const ( + // FullRepackCooldownPeriod is the cooldown period that needs to pass since the last full + // repack before we consider doing another full repack. + FullRepackCooldownPeriod = 5 * 24 * time.Hour +) + // OptimizationStrategy is an interface to determine which parts of a repository should be // optimized. type OptimizationStrategy interface { @@ -73,6 +79,7 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackObjects(ctx context.Context // anyway. We should eventually remove this condition though once the fix has landed. if len(s.info.Alternates) == 0 && featureflag.GeometricRepacking.IsEnabled(ctx) { nonCruftPackfilesCount := s.info.Packfiles.Count - s.info.Packfiles.CruftCount + timeSinceLastFullRepack := time.Since(s.info.Packfiles.LastFullRepack) // It is mandatory for us that we perform regular full repacks in repositories so // that we can evict objects which are unreachable into a separate cruft pack. So in @@ -82,14 +89,8 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackObjects(ctx context.Context // This heuristic is simple on purpose: customers care about when objects will be // declared as unreachable and when the pruning grace period starts as it impacts // usage quotas. So with this simple policy we can tell customers that we evict and - // expire unreachable objects once per day. - timeSinceLastFullRepack := time.Since(s.info.Packfiles.LastFullRepack) - if !s.info.IsObjectPool && nonCruftPackfilesCount > 1 && timeSinceLastFullRepack > 24*time.Hour { - cfg.Strategy = RepackObjectsStrategyFullWithCruft - cfg.CruftExpireBefore = s.expireBefore - return true, cfg - } - + // expire unreachable objects on a regular schedule. + // // On the other hand, for object pools, we also need to perform regular full // repacks. The reason is different though, as we don't ever delete objects from // pool repositories anyway. @@ -103,21 +104,25 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackObjects(ctx context.Context // regress over time as new objects are pulled into the pool repository. // // So we perform regular full repacks in the repository to ensure that the delta - // islands will be "freshened" again. As this is nothing that would be visible to - // the end user (except for performance), it should be fine to perform the repack a - // lot less frequent than we perform the full repacks in non-object-pools. - // - // If geometric repacks ever learn to take delta islands into account we can get rid - // of this condition and only do geometric repacks. - if s.info.IsObjectPool && nonCruftPackfilesCount > 1 && timeSinceLastFullRepack > 7*24*time.Hour { - // Using cruft packs would be pointless here as we don't ever want to expire - // unreachable objects. And we don't want to explode unreachable objects - // into loose objects either: for one that'd be inefficient, and second - // they'd only get soaked up by the next geometric repack anyway. - // - // So instead, we do a full repack that appends unreachable objects to the - // end of the new packfile. - cfg.Strategy = RepackObjectsStrategyFullWithUnreachable + // islands will be "freshened" again. If geometric repacks ever learn to take delta + // islands into account we can get rid of this condition and only do geometric + // repacks. + if nonCruftPackfilesCount > 1 && timeSinceLastFullRepack > FullRepackCooldownPeriod { + if s.info.IsObjectPool { + // Using cruft packs would be pointless here as we don't ever want + // to expire unreachable objects. And we don't want to explode + // unreachable objects into loose objects either: for one that'd be + // inefficient, and second they'd only get soaked up by the next + // geometric repack anyway. + // + // So instead, we do a full repack that appends unreachable objects + // to the end of the new packfile. + cfg.Strategy = RepackObjectsStrategyFullWithUnreachable + } else { + cfg.Strategy = RepackObjectsStrategyFullWithCruft + cfg.CruftExpireBefore = s.expireBefore + } + return true, cfg } diff --git a/internal/git/housekeeping/optimization_strategy_test.go b/internal/git/housekeeping/optimization_strategy_test.go index 312b24c6b..f9c9c50e0 100644 --- a/internal/git/housekeeping/optimization_strategy_test.go +++ b/internal/git/housekeeping/optimization_strategy_test.go @@ -124,7 +124,7 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co // yet cross the 24 hour boundary. So we don't // expect a repack. Count: 2, - LastFullRepack: time.Now().Add(-23 * time.Hour), + LastFullRepack: time.Now().Add(-FullRepackCooldownPeriod + time.Hour), MultiPackIndex: stats.MultiPackIndexInfo{ Exists: true, PackfileCount: 2, @@ -144,7 +144,7 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co // crossed the 24 hour boundary, so we should // perform a full repack. Count: 2, - LastFullRepack: time.Now().Add(-24 * time.Hour), + LastFullRepack: time.Now().Add(-FullRepackCooldownPeriod), MultiPackIndex: stats.MultiPackIndexInfo{ Exists: true, PackfileCount: 2, @@ -172,7 +172,7 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co // perform a full repack. Count: 2, CruftCount: 1, - LastFullRepack: time.Now().Add(-24 * time.Hour), + LastFullRepack: time.Now().Add(-FullRepackCooldownPeriod), MultiPackIndex: stats.MultiPackIndexInfo{ Exists: true, PackfileCount: 2, @@ -193,7 +193,7 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co // normal repositories, but have a longer grace // period for the next repack. Count: 2, - LastFullRepack: time.Now().Add(-6 * 24 * time.Hour), + LastFullRepack: time.Now().Add(-FullRepackCooldownPeriod + time.Hour), MultiPackIndex: stats.MultiPackIndexInfo{ Exists: true, PackfileCount: 2, @@ -221,7 +221,7 @@ func testHeuristicalOptimizationStrategyShouldRepackObjects(t *testing.T, ctx co // repositories should get a full repack in case // they have more than a single packfile. Count: 2, - LastFullRepack: time.Now().Add(-7 * 24 * time.Hour), + LastFullRepack: time.Now().Add(-FullRepackCooldownPeriod), MultiPackIndex: stats.MultiPackIndexInfo{ Exists: true, PackfileCount: 2, diff --git a/internal/git/stats/repository_info.go b/internal/git/stats/repository_info.go index 910fb47b6..aa5de1c84 100644 --- a/internal/git/stats/repository_info.go +++ b/internal/git/stats/repository_info.go @@ -20,8 +20,8 @@ import ( const ( // StaleObjectsGracePeriod is time delta that is used to indicate cutoff wherein an object - // would be considered old. Currently this is set to being 2 weeks (2 * 7days * 24hours). - StaleObjectsGracePeriod = -14 * 24 * time.Hour + // would be considered old. Currently this is set to being 10 days. + StaleObjectsGracePeriod = -10 * 24 * time.Hour // FullRepackTimestampFilename is the name of the file that is used as a timestamp for the // last repack that happened in the repository. Whenever a full repack happens, Gitaly will |