Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2023-03-03 14:54:14 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2023-03-06 09:30:35 +0300
commit19aec67faa91621e598ecc6bfab6dcf2f9f04a41 (patch)
tree763781453fce455f5803436bb57c0855c1c06619
parenta965842fe3cc7212b35f936d59ede06c20868d23 (diff)
housekeeping: Let strategies control expiration grace period
When pruning objects we have a default grace period of two weeks. This grace period is dictated by `housekeeping.OptimizeRepository()`, which has this limit hardcoded. This has two issues: - `OptimizeRepository()` should not be responsible for the policy of when exactly we perform certain optimizations. This is the job of the optimization strategy instead. - We pass the expiration grace period as approxidate to git-prune(1) and thus have no fine-grained control of which objects exactly get pruned. This will become a problem once we introduce cruft packs, where we will then need to use the same date both when creating the cruft packs and when pruning objects. Introduce a new `PruneObjects()` helper function along with a config struct that allows the caller to set the exact date before which objects shall be pruned. This allows us to easily move control of that expiry date into optimization strategies.
-rw-r--r--internal/git/housekeeping/objects.go36
-rw-r--r--internal/git/housekeeping/optimization_strategy.go39
-rw-r--r--internal/git/housekeeping/optimization_strategy_test.go51
-rw-r--r--internal/git/housekeeping/optimize_repository.go17
-rw-r--r--internal/git/housekeeping/optimize_repository_test.go6
-rw-r--r--internal/gitaly/service/repository/optimize_test.go14
6 files changed, 122 insertions, 41 deletions
diff --git a/internal/git/housekeeping/objects.go b/internal/git/housekeeping/objects.go
index 042efb798..33f8d9c11 100644
--- a/internal/git/housekeeping/objects.go
+++ b/internal/git/housekeeping/objects.go
@@ -2,7 +2,9 @@ package housekeeping
import (
"context"
+ "fmt"
"strconv"
+ "time"
"gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
@@ -15,6 +17,8 @@ const (
// looseObjectLimit is the limit of loose objects we accept both when doing incremental
// repacks and when pruning objects.
looseObjectLimit = 1024
+ // rfc2822DateFormat is the date format that Git typically uses for dates.
+ rfc2822DateFormat = "Mon Jan 02 2006 15:04:05 -0700"
)
// RepackObjectsConfig is configuration for RepackObjects.
@@ -87,3 +91,35 @@ func GetRepackGitConfig(ctx context.Context, repo repository.GitRepo, bitmap boo
return config
}
+
+// PruneObjectsConfig determines which objects should be pruned in PruneObjects.
+type PruneObjectsConfig struct {
+ // ExpireBefore controls the grace period after which unreachable objects shall be pruned.
+ // An unreachable object must be older than the given date in order to be considered for
+ // deletion.
+ ExpireBefore time.Time
+}
+
+// PruneObjects prunes loose objects from the repository that are already packed or which are
+// unreachable and older than the configured expiry date.
+func PruneObjects(ctx context.Context, repo *localrepo.Repo, cfg PruneObjectsConfig) error {
+ if err := repo.ExecAndWait(ctx, git.Command{
+ Name: "prune",
+ Flags: []git.Option{
+ // By default, this prunes all unreachable objects regardless of when they
+ // have last been accessed. This opens us up for races when there are
+ // concurrent commands which are just at the point of writing objects into
+ // the repository, but which haven't yet updated any references to make them
+ // reachable.
+ //
+ // To avoid this race, we use a grace window that can be specified by the
+ // caller so that we only delete objects that are older than this grace
+ // window.
+ git.ValueFlag{Name: "--expire", Value: cfg.ExpireBefore.Format(rfc2822DateFormat)},
+ },
+ }); err != nil {
+ return fmt.Errorf("executing prune: %w", err)
+ }
+
+ return nil
+}
diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go
index 0fd5f89f7..ab6a75576 100644
--- a/internal/git/housekeeping/optimization_strategy.go
+++ b/internal/git/housekeeping/optimization_strategy.go
@@ -3,6 +3,7 @@ package housekeeping
import (
"context"
"math"
+ "time"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/stats"
)
@@ -14,8 +15,8 @@ type OptimizationStrategy interface {
// how it should be done.
ShouldRepackObjects(context.Context) (bool, RepackObjectsConfig)
// ShouldPruneObjects determines whether the repository has stale objects that should be
- // pruned.
- ShouldPruneObjects(context.Context) bool
+ // pruned and, if so, how it should be done.
+ ShouldPruneObjects(context.Context) (bool, PruneObjectsConfig)
// ShouldRepackReferences determines whether the repository's references need to be
// repacked.
ShouldRepackReferences(context.Context) bool
@@ -27,7 +28,8 @@ type OptimizationStrategy interface {
// HeuristicalOptimizationStrategy is an optimization strategy that is based on a set of
// heuristics.
type HeuristicalOptimizationStrategy struct {
- info stats.RepositoryInfo
+ info stats.RepositoryInfo
+ expireBefore time.Time
}
// NewHeuristicalOptimizationStrategy constructs a heuristicalOptimizationStrategy for the given
@@ -35,7 +37,8 @@ type HeuristicalOptimizationStrategy struct {
// repository can be decided without further disk reads.
func NewHeuristicalOptimizationStrategy(info stats.RepositoryInfo) HeuristicalOptimizationStrategy {
return HeuristicalOptimizationStrategy{
- info: info,
+ info: info,
+ expireBefore: time.Now().Add(stats.StaleObjectsGracePeriod),
}
}
@@ -150,7 +153,7 @@ func (s HeuristicalOptimizationStrategy) ShouldWriteCommitGraph(ctx context.Cont
//
// To fix this case we will replace the complete commit-chain when we have pruned objects
// from the repository.
- if s.ShouldPruneObjects(ctx) {
+ if shouldPrune, _ := s.ShouldPruneObjects(ctx); shouldPrune {
return true, WriteCommitGraphConfig{
ReplaceChain: true,
}
@@ -174,20 +177,22 @@ func (s HeuristicalOptimizationStrategy) ShouldWriteCommitGraph(ctx context.Cont
// ShouldPruneObjects determines whether the repository has stale objects that should be pruned.
// Object pools are never pruned to not lose data in them, but otherwise we prune when we've found
// enough stale objects that might in fact get pruned.
-func (s HeuristicalOptimizationStrategy) ShouldPruneObjects(context.Context) bool {
+func (s HeuristicalOptimizationStrategy) ShouldPruneObjects(context.Context) (bool, PruneObjectsConfig) {
// Pool repositories must never prune any objects, or otherwise we may corrupt members of
// that pool if they still refer to that object.
if s.info.IsObjectPool {
- return false
+ return false, PruneObjectsConfig{}
}
// When we have a number of loose objects that is older than two weeks then they have
// surpassed the grace period and may thus be pruned.
if s.info.LooseObjects.StaleCount <= looseObjectLimit {
- return false
+ return false, PruneObjectsConfig{}
}
- return true
+ return true, PruneObjectsConfig{
+ ExpireBefore: s.expireBefore,
+ }
}
// ShouldRepackReferences determines whether the repository's references need to be repacked based
@@ -230,13 +235,15 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackReferences(context.Context)
// EagerOptimizationStrategy is a strategy that will eagerly perform optimizations. All of the data
// structures will be optimized regardless of whether they already are in an optimal state or not.
type EagerOptimizationStrategy struct {
- info stats.RepositoryInfo
+ info stats.RepositoryInfo
+ expireBefore time.Time
}
// NewEagerOptimizationStrategy creates a new EagerOptimizationStrategy.
func NewEagerOptimizationStrategy(info stats.RepositoryInfo) EagerOptimizationStrategy {
return EagerOptimizationStrategy{
- info: info,
+ info: info,
+ expireBefore: time.Now().Add(stats.StaleObjectsGracePeriod),
}
}
@@ -261,8 +268,14 @@ func (s EagerOptimizationStrategy) ShouldWriteCommitGraph(context.Context) (bool
// ShouldPruneObjects always instructs the caller to prune objects, unless the repository is an
// object pool.
-func (s EagerOptimizationStrategy) ShouldPruneObjects(context.Context) bool {
- return !s.info.IsObjectPool
+func (s EagerOptimizationStrategy) ShouldPruneObjects(context.Context) (bool, PruneObjectsConfig) {
+ if s.info.IsObjectPool {
+ return false, PruneObjectsConfig{}
+ }
+
+ return true, PruneObjectsConfig{
+ ExpireBefore: s.expireBefore,
+ }
}
// ShouldRepackReferences always instructs the caller to repack references.
diff --git a/internal/git/housekeeping/optimization_strategy_test.go b/internal/git/housekeeping/optimization_strategy_test.go
index fe7367fbb..731ba9a5b 100644
--- a/internal/git/housekeeping/optimization_strategy_test.go
+++ b/internal/git/housekeeping/optimization_strategy_test.go
@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"testing"
+ "time"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/stats"
@@ -282,15 +283,19 @@ func TestHeuristicalOptimizationStrategy_ShouldPruneObjects(t *testing.T) {
t.Parallel()
ctx := testhelper.Context(t)
+ expireBefore := time.Now()
for _, tc := range []struct {
desc string
strategy HeuristicalOptimizationStrategy
expectedShouldPruneObjects bool
+ expectedPruneObjectsConfig PruneObjectsConfig
}{
{
- desc: "empty repository",
- strategy: HeuristicalOptimizationStrategy{},
+ desc: "empty repository",
+ strategy: HeuristicalOptimizationStrategy{
+ expireBefore: expireBefore,
+ },
expectedShouldPruneObjects: false,
},
{
@@ -301,6 +306,7 @@ func TestHeuristicalOptimizationStrategy_ShouldPruneObjects(t *testing.T) {
Count: 10000,
},
},
+ expireBefore: expireBefore,
},
expectedShouldPruneObjects: false,
},
@@ -312,6 +318,7 @@ func TestHeuristicalOptimizationStrategy_ShouldPruneObjects(t *testing.T) {
StaleCount: 1000,
},
},
+ expireBefore: expireBefore,
},
expectedShouldPruneObjects: false,
},
@@ -323,19 +330,28 @@ func TestHeuristicalOptimizationStrategy_ShouldPruneObjects(t *testing.T) {
StaleCount: 1025,
},
},
+ expireBefore: expireBefore,
},
expectedShouldPruneObjects: true,
+ expectedPruneObjectsConfig: PruneObjectsConfig{
+ ExpireBefore: expireBefore,
+ },
},
} {
t.Run(tc.desc, func(t *testing.T) {
t.Run("normal repository", func(t *testing.T) {
- require.Equal(t, tc.expectedShouldPruneObjects, tc.strategy.ShouldPruneObjects(ctx))
+ shouldPrune, pruneCfg := tc.strategy.ShouldPruneObjects(ctx)
+ require.Equal(t, tc.expectedShouldPruneObjects, shouldPrune)
+ require.Equal(t, tc.expectedPruneObjectsConfig, pruneCfg)
})
t.Run("object pool", func(t *testing.T) {
strategy := tc.strategy
strategy.info.IsObjectPool = true
- require.False(t, strategy.ShouldPruneObjects(ctx))
+
+ shouldPrune, pruneCfg := tc.strategy.ShouldPruneObjects(ctx)
+ require.Equal(t, tc.expectedShouldPruneObjects, shouldPrune)
+ require.Equal(t, tc.expectedPruneObjectsConfig, pruneCfg)
})
})
}
@@ -559,16 +575,25 @@ func TestEagerOptimizationStrategy(t *testing.T) {
ctx := testhelper.Context(t)
+ expireBefore := time.Now()
+
for _, tc := range []struct {
desc string
strategy EagerOptimizationStrategy
expectWriteBitmap bool
expectShouldPruneObjects bool
+ pruneObjectsCfg PruneObjectsConfig
}{
{
- desc: "no alternate",
+ desc: "no alternate",
+ strategy: EagerOptimizationStrategy{
+ expireBefore: expireBefore,
+ },
expectWriteBitmap: true,
expectShouldPruneObjects: true,
+ pruneObjectsCfg: PruneObjectsConfig{
+ ExpireBefore: expireBefore,
+ },
},
{
desc: "alternate",
@@ -576,9 +601,13 @@ func TestEagerOptimizationStrategy(t *testing.T) {
info: stats.RepositoryInfo{
Alternates: []string{"path/to/alternate"},
},
+ expireBefore: expireBefore,
},
expectWriteBitmap: false,
expectShouldPruneObjects: true,
+ pruneObjectsCfg: PruneObjectsConfig{
+ ExpireBefore: expireBefore,
+ },
},
{
desc: "object pool",
@@ -586,6 +615,7 @@ func TestEagerOptimizationStrategy(t *testing.T) {
info: stats.RepositoryInfo{
IsObjectPool: true,
},
+ expireBefore: expireBefore,
},
expectWriteBitmap: true,
expectShouldPruneObjects: false,
@@ -597,6 +627,7 @@ func TestEagerOptimizationStrategy(t *testing.T) {
IsObjectPool: true,
Alternates: []string{"path/to/alternate"},
},
+ expireBefore: expireBefore,
},
expectWriteBitmap: false,
expectShouldPruneObjects: false,
@@ -617,7 +648,10 @@ func TestEagerOptimizationStrategy(t *testing.T) {
ReplaceChain: true,
}, writeCommitGraphCfg)
- require.Equal(t, tc.expectShouldPruneObjects, tc.strategy.ShouldPruneObjects(ctx))
+ shouldPruneObjects, pruneObjectsCfg := tc.strategy.ShouldPruneObjects(ctx)
+ require.Equal(t, tc.expectShouldPruneObjects, shouldPruneObjects)
+ require.Equal(t, tc.pruneObjectsCfg, pruneObjectsCfg)
+
require.True(t, tc.strategy.ShouldRepackReferences(ctx))
})
}
@@ -628,6 +662,7 @@ type mockOptimizationStrategy struct {
shouldRepackObjects bool
repackObjectsCfg RepackObjectsConfig
shouldPruneObjects bool
+ pruneObjectsCfg PruneObjectsConfig
shouldRepackReferences bool
shouldWriteCommitGraph bool
writeCommitGraphCfg WriteCommitGraphConfig
@@ -637,8 +672,8 @@ func (m mockOptimizationStrategy) ShouldRepackObjects(context.Context) (bool, Re
return m.shouldRepackObjects, m.repackObjectsCfg
}
-func (m mockOptimizationStrategy) ShouldPruneObjects(context.Context) bool {
- return m.shouldPruneObjects
+func (m mockOptimizationStrategy) ShouldPruneObjects(context.Context) (bool, PruneObjectsConfig) {
+ return m.shouldPruneObjects, m.pruneObjectsCfg
}
func (m mockOptimizationStrategy) ShouldRepackReferences(context.Context) bool {
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go
index bce297315..bc964d8ef 100644
--- a/internal/git/housekeeping/optimize_repository.go
+++ b/internal/git/housekeeping/optimize_repository.go
@@ -253,22 +253,13 @@ func writeCommitGraphIfNeeded(ctx context.Context, repo *localrepo.Repo, strateg
// pruneIfNeeded removes objects from the repository which are either unreachable or which are
// already part of a packfile. We use a grace period of two weeks.
func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo, strategy OptimizationStrategy) (bool, error) {
- if !strategy.ShouldPruneObjects(ctx) {
+ needed, cfg := strategy.ShouldPruneObjects(ctx)
+ if !needed {
return false, nil
}
- if err := repo.ExecAndWait(ctx, git.Command{
- Name: "prune",
- Flags: []git.Option{
- // By default, this prunes all unreachable objects regardless of when they
- // have last been accessed. This opens us up for races when there are
- // concurrent commands which are just at the point of writing objects into
- // the repository, but which haven't yet updated any references to make them
- // reachable. We thus use the same two-week grace period as git-gc(1) does.
- git.ValueFlag{Name: "--expire", Value: "two.weeks.ago"},
- },
- }); err != nil {
- return false, fmt.Errorf("pruning objects: %w", err)
+ if err := PruneObjects(ctx, repo, cfg); err != nil {
+ return true, fmt.Errorf("pruning objects: %w", err)
}
return true, nil
diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go
index 8bfaec14e..a10276d7b 100644
--- a/internal/git/housekeeping/optimize_repository_test.go
+++ b/internal/git/housekeeping/optimize_repository_test.go
@@ -624,6 +624,9 @@ func TestPruneIfNeeded(t *testing.T) {
// We shouldn't prune when the strategy determines there aren't enough old objects.
didPrune, err := pruneIfNeeded(ctx, repo, mockOptimizationStrategy{
shouldPruneObjects: false,
+ pruneObjectsCfg: PruneObjectsConfig{
+ ExpireBefore: twoWeeksAgo,
+ },
})
require.NoError(t, err)
require.False(t, didPrune)
@@ -635,6 +638,9 @@ func TestPruneIfNeeded(t *testing.T) {
// But we naturally should prune if told so.
didPrune, err = pruneIfNeeded(ctx, repo, mockOptimizationStrategy{
shouldPruneObjects: true,
+ pruneObjectsCfg: PruneObjectsConfig{
+ ExpireBefore: twoWeeksAgo,
+ },
})
require.NoError(t, err)
require.True(t, didPrune)
diff --git a/internal/gitaly/service/repository/optimize_test.go b/internal/gitaly/service/repository/optimize_test.go
index 4270b38a4..58652041e 100644
--- a/internal/gitaly/service/repository/optimize_test.go
+++ b/internal/gitaly/service/repository/optimize_test.go
@@ -282,16 +282,16 @@ func TestOptimizeRepository_strategy(t *testing.T) {
repoProto, _ := gittest.CreateRepository(t, ctx, cfg)
for _, tc := range []struct {
- desc string
- request *gitalypb.OptimizeRepositoryRequest
- expectedStrategy housekeeping.OptimizationStrategy
+ desc string
+ request *gitalypb.OptimizeRepositoryRequest
+ expectedType housekeeping.OptimizationStrategy
}{
{
desc: "no strategy",
request: &gitalypb.OptimizeRepositoryRequest{
Repository: repoProto,
},
- expectedStrategy: housekeeping.HeuristicalOptimizationStrategy{},
+ expectedType: housekeeping.HeuristicalOptimizationStrategy{},
},
{
desc: "heuristical strategy",
@@ -299,7 +299,7 @@ func TestOptimizeRepository_strategy(t *testing.T) {
Repository: repoProto,
Strategy: gitalypb.OptimizeRepositoryRequest_STRATEGY_HEURISTICAL,
},
- expectedStrategy: housekeeping.HeuristicalOptimizationStrategy{},
+ expectedType: housekeeping.HeuristicalOptimizationStrategy{},
},
{
desc: "eager strategy",
@@ -307,7 +307,7 @@ func TestOptimizeRepository_strategy(t *testing.T) {
Repository: repoProto,
Strategy: gitalypb.OptimizeRepositoryRequest_STRATEGY_EAGER,
},
- expectedStrategy: housekeeping.EagerOptimizationStrategy{},
+ expectedType: housekeeping.EagerOptimizationStrategy{},
},
} {
t.Run(tc.desc, func(t *testing.T) {
@@ -315,7 +315,7 @@ func TestOptimizeRepository_strategy(t *testing.T) {
require.NoError(t, err)
testhelper.ProtoEqual(t, &gitalypb.OptimizeRepositoryResponse{}, response)
- require.Equal(t, tc.expectedStrategy, <-housekeepingManager.strategyCh)
+ require.IsType(t, tc.expectedType, <-housekeepingManager.strategyCh)
})
}
}