Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-07-12 11:48:14 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-07-15 09:57:06 +0300
commit8c72787d912d6deda52ea90352e35950bc3f1e48 (patch)
tree2b61a089666b7960429d609157179a8bcc4aa0de
parent84e99923567045f71b90b804f07f81ea4fef977d (diff)
housekeeping: Repack object pools more aggressively
Object pools and normal repositories currently share the same heuristics to decide whether to repack objects or not. While this strategy works, it is not ideal for pool repositories because they may be reused across many forks. It is thus desirable for us to keep them as efficient as reasonably possible. Tweak the heuristics to repack object pools more aggressively. Right now this shouldn't make much of a difference: object pools are only updated via `FetchIntoObjectPool()`, which knows to do a full repack when it has fetched objects. As a result, no object pool should ever have more than a single packfile in general. This will make a difference though when we migrate object pools to not do their own housekeeping anymore, but to use `OptimizeRepository()` instead: we'll want to keep similar semantics as we have right now and aggressively repack. Note that we also need to fix some of our tests which used git-repack(1) with the `-A` flag to bring repositories into the state we want to test repository maintenance on. This will leave behind the old, superseded packfile though and thus we end up with two packfiles even though the intent of the test was to only test with a single packfile. This didn't matter before where we required at least five packfiles before we pack objects, but does now where the limit for object pools is at two packs.
-rw-r--r--internal/git/housekeeping/optimize_repository.go35
-rw-r--r--internal/git/housekeeping/optimize_repository_test.go78
2 files changed, 81 insertions, 32 deletions
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go
index 4bd48999a..ef74f8f0d 100644
--- a/internal/git/housekeeping/optimize_repository.go
+++ b/internal/git/housekeeping/optimize_repository.go
@@ -215,25 +215,36 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) {
// relative though: for small repositories it's fine to do full repacks regularly, but for
// large repositories we need to be more careful. We thus use a heuristic of "repository
// largeness": we take the biggest packfile that exists, and then the maximum allowed number
- // of packfiles is `log(largestpackfile_size_in_mb) / log(1.3)`. This gives the following
- // allowed number of packfiles:
+ // of packfiles is `log(largestpackfile_size_in_mb) / log(1.3)` for normal repositories and
+ // `log(largestpackfile_size_in_mb) / log(10.0)` for pools. This gives the following allowed
+ // number of packfiles:
//
- // - No packfile: 5 packfile. This is a special case.
- // - 10MB packfile: 8 packfiles.
- // - 100MB packfile: 17 packfiles.
- // - 500MB packfile: 23 packfiles.
- // - 1GB packfile: 26 packfiles.
- // - 5GB packfile: 32 packfiles.
- // - 10GB packfile: 35 packfiles.
- // - 100GB packfile: 43 packfiles.
+ // -------------------------------------------------------------------------------------
+ // | largest packfile size | allowed packfiles for repos | allowed packfiles for pools |
+ // -------------------------------------------------------------------------------------
+ // | none or <10MB | 5 | 2 |
+ // | 10MB | 8 | 2 |
+ // | 100MB | 17 | 2 |
+ // | 500MB | 23 | 2 |
+ // | 1GB | 26 | 3 |
+ // | 5GB | 32 | 3 |
+ // | 10GB | 35 | 4 |
+ // | 100GB | 43 | 5 |
+ // -------------------------------------------------------------------------------------
//
// The goal is to have a comparatively quick ramp-up of allowed packfiles as the repository
// size grows, but then slow down such that we're effectively capped and don't end up with
- // an excessive amount of packfiles.
+ // an excessive amount of packfiles. On the other hand, pool repositories are potentially
+ // reused as basis for many forks and should thus be packed much more aggressively.
//
// This is a heuristic and thus imperfect by necessity. We may tune it as we gain experience
// with the way it behaves.
- if int64(math.Max(5, math.Log(float64(largestPackfileSize))/math.Log(1.3))) <= packfileCount {
+ lowerLimit, log := 5.0, 1.3
+ if IsPoolRepository(repo) {
+ lowerLimit, log = 2.0, 10.0
+ }
+
+ if int64(math.Max(lowerLimit, math.Log(float64(largestPackfileSize))/math.Log(log))) <= packfileCount {
return true, RepackObjectsConfig{
FullRepack: true,
WriteBitmap: !hasAlternate,
diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go
index 59bbd5cd1..939dab9b1 100644
--- a/internal/git/housekeeping/optimize_repository_test.go
+++ b/internal/git/housekeeping/optimize_repository_test.go
@@ -148,36 +148,44 @@ func TestNeedsRepacking(t *testing.T) {
const megaByte = 1024 * 1024
for _, tc := range []struct {
- packfileSize int64
- requiredPackfiles int
+ packfileSize int64
+ requiredPackfiles int
+ requiredPackfilesForPool int
}{
{
- packfileSize: 1,
- requiredPackfiles: 5,
+ packfileSize: 1,
+ requiredPackfiles: 5,
+ requiredPackfilesForPool: 2,
},
{
- packfileSize: 5 * megaByte,
- requiredPackfiles: 6,
+ packfileSize: 5 * megaByte,
+ requiredPackfiles: 6,
+ requiredPackfilesForPool: 2,
},
{
- packfileSize: 10 * megaByte,
- requiredPackfiles: 8,
+ packfileSize: 10 * megaByte,
+ requiredPackfiles: 8,
+ requiredPackfilesForPool: 2,
},
{
- packfileSize: 50 * megaByte,
- requiredPackfiles: 14,
+ packfileSize: 50 * megaByte,
+ requiredPackfiles: 14,
+ requiredPackfilesForPool: 2,
},
{
- packfileSize: 100 * megaByte,
- requiredPackfiles: 17,
+ packfileSize: 100 * megaByte,
+ requiredPackfiles: 17,
+ requiredPackfilesForPool: 2,
},
{
- packfileSize: 500 * megaByte,
- requiredPackfiles: 23,
+ packfileSize: 500 * megaByte,
+ requiredPackfiles: 23,
+ requiredPackfilesForPool: 2,
},
{
- packfileSize: 1000 * megaByte,
- requiredPackfiles: 26,
+ packfileSize: 1001 * megaByte,
+ requiredPackfiles: 26,
+ requiredPackfilesForPool: 3,
},
// Let's not go any further than this, we're thrashing the temporary directory.
} {
@@ -203,10 +211,15 @@ func TestNeedsRepacking(t *testing.T) {
require.NoError(t, os.WriteFile(bigPackPath, nil, 0o644))
require.NoError(t, os.Truncate(bigPackPath, tc.packfileSize))
+ requiredPackfiles := tc.requiredPackfiles
+ if IsPoolRepository(repoProto) {
+ requiredPackfiles = tc.requiredPackfilesForPool
+ }
+
// And then we create one less packfile than we need to hit the boundary.
// This is done to assert that we indeed don't repack before hitting the
// boundary.
- for i := 0; i < tc.requiredPackfiles-2; i++ {
+ for i := 0; i < requiredPackfiles-2; i++ {
additionalPackfile, err := os.Create(filepath.Join(packDir, fmt.Sprintf("%d.pack", i)))
require.NoError(t, err)
testhelper.MustClose(t, additionalPackfile)
@@ -543,11 +556,36 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0], gittest.CloneRepoOpts{
RelativePath: relativePath,
})
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-d", "--write-bitmap-index")
+ return repo
+ },
+ expectedMetrics: `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository
+# TYPE gitaly_housekeeping_tasks_total counter
+gitaly_housekeeping_tasks_total{housekeeping_task="packed_objects_full", status="success"} 1
+gitaly_housekeeping_tasks_total{housekeeping_task="written_bitmap", status="success"} 1
+gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
+`,
+ },
+ {
+ desc: "repository with multiple packfiles packs only for object pool",
+ setup: func(t *testing.T, relativePath string) *gitalypb.Repository {
+ repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0], gittest.CloneRepoOpts{
+ RelativePath: relativePath,
+ })
+
+ // Note: git-repack(1) without "-d" will _not_ delete the old
+ // packfile and thus end up with two packfiles.
gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index")
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths")
+
return repo
},
expectedMetrics: `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository
# TYPE gitaly_housekeeping_tasks_total counter
+gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
+`,
+ expectedMetricsForPool: `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository
+# TYPE gitaly_housekeeping_tasks_total counter
gitaly_housekeeping_tasks_total{housekeeping_task="packed_objects_full", status="success"} 1
gitaly_housekeeping_tasks_total{housekeeping_task="written_bitmap", status="success"} 1
gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
@@ -559,7 +597,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0], gittest.CloneRepoOpts{
RelativePath: relativePath,
})
- gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index")
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-d", "--write-bitmap-index")
gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths")
return repo
},
@@ -574,7 +612,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0], gittest.CloneRepoOpts{
RelativePath: relativePath,
})
- gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index")
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-d", "--write-bitmap-index")
gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths")
// The repack won't repack the following objects because they're
@@ -606,7 +644,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0], gittest.CloneRepoOpts{
RelativePath: relativePath,
})
- gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index")
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-d", "--write-bitmap-index")
gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths")
// The repack won't repack the following objects because they're