diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-30 09:00:44 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-30 09:00:44 +0300 |
commit | 6e90b48ee1c271e244615eb255070fe451a1f3e7 (patch) | |
tree | 8d1847561924ea9a139a8d39cbd0eba7c8cdbcfe /internal/git | |
parent | d7eb8938be2bd217e37e5cf83a9374ae27c0a6b9 (diff) |
housekeeping: Skip repacking empty repositories
When a repository does not have any bitmaps or in case it is missing
bloom filters in the commit graph we always try to do a full repack in
order to generate these data structures. This logic is required because:
- Bitmaps are only generated by git-repack(1) for full repacks. This
limitation exists because there can only be one bitmap per
repository.
- In case commit-graphs exist but missing bloom filters we need to
completely rewrite them in order to enable this extension.
While the logic makes sense, it also causes us to repack repositories
which are empty: they don't contain either of these data structures, and
consequentially we think we need a full repack. While this is not a huge
problem because git-repack(1) would finish fast anyway in case the repo
doesn't contain any objects, we still needlessly spawn a process. Also,
our metrics report that we're doing a lot of full repacks, which can be
confusing.
Improve our heuristics by taking into account whether the repository has
objects at all. If there aren't any, we can avoid all this busywork and
just skip repacking altogether.
Changelog: changed
Diffstat (limited to 'internal/git')
-rw-r--r-- | internal/git/housekeeping/optimize_repository.go | 26 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository_test.go | 15 |
2 files changed, 18 insertions, 23 deletions
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go index f4fc8419e..39257fd96 100644 --- a/internal/git/housekeeping/optimize_repository.go +++ b/internal/git/housekeeping/optimize_repository.go @@ -138,6 +138,22 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) { return false, RepackObjectsConfig{}, fmt.Errorf("getting repository path: %w", err) } + largestPackfileSize, packfileCount, err := packfileSizeAndCount(repo) + if err != nil { + return false, RepackObjectsConfig{}, fmt.Errorf("checking largest packfile size: %w", err) + } + + looseObjectCount, err := estimateLooseObjectCount(repo, time.Now()) + if err != nil { + return false, RepackObjectsConfig{}, fmt.Errorf("estimating loose object count: %w", err) + } + + // If there are neither packfiles nor loose objects in this repository then there is no need + // to repack anything. + if packfileCount == 0 && looseObjectCount == 0 { + return false, RepackObjectsConfig{}, nil + } + altFile, err := repo.InfoAlternatesPath() if err != nil { return false, RepackObjectsConfig{}, helper.ErrInternal(err) @@ -190,11 +206,6 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) { }, nil } - largestPackfileSize, packfileCount, err := packfileSizeAndCount(repo) - if err != nil { - return false, RepackObjectsConfig{}, fmt.Errorf("checking largest packfile size: %w", err) - } - // Whenever we do an incremental repack we create a new packfile, and as a result Git may // have to look into every one of the packfiles to find objects. This is less efficient the // more packfiles we have, but we cannot repack the whole repository every time either given @@ -229,11 +240,6 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) { }, nil } - looseObjectCount, err := estimateLooseObjectCount(repo, time.Now()) - if err != nil { - return false, RepackObjectsConfig{}, fmt.Errorf("estimating loose object count: %w", err) - } - // Most Git commands do not write packfiles directly, but instead write loose objects into // the object database. So while we now know that there ain't too many packfiles, we still // need to check whether we have too many objects. diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go index 086e3572e..8aad4dd8f 100644 --- a/internal/git/housekeeping/optimize_repository_test.go +++ b/internal/git/housekeeping/optimize_repository_test.go @@ -48,20 +48,11 @@ func TestNeedsRepacking(t *testing.T) { expectedConfig RepackObjectsConfig }{ { - desc: "empty repo", + desc: "empty repo does nothing", setup: func(t *testing.T) *gitalypb.Repository { repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0]) return repoProto }, - // This is a bug: if the repo is empty then we wouldn't ever generate a - // packfile, but we determine a repack is needed because it's missing a - // bitmap. It's a rather benign bug though: git-repack(1) will exit - // immediately because it knows that there's nothing to repack. - expectedNeeded: true, - expectedConfig: RepackObjectsConfig{ - FullRepack: true, - WriteBitmap: true, - }, }, { desc: "missing bitmap", @@ -510,15 +501,13 @@ func TestOptimizeRepository(t *testing.T) { expectedMetrics string }{ { - desc: "empty repository tries to write bitmap", + desc: "empty repository does nothing", setup: func(t *testing.T) *gitalypb.Repository { repo, _ := gittest.InitRepo(t, cfg, cfg.Storages[0]) return repo }, expectedMetrics: `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository # TYPE gitaly_housekeeping_tasks_total counter -gitaly_housekeeping_tasks_total{housekeeping_task="packed_objects_full", status="success"} 1 -gitaly_housekeeping_tasks_total{housekeeping_task="written_bitmap", status="success"} 1 gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 `, }, |