Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-30 09:00:44 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-30 09:00:44 +0300
commit6e90b48ee1c271e244615eb255070fe451a1f3e7 (patch)
tree8d1847561924ea9a139a8d39cbd0eba7c8cdbcfe
parentd7eb8938be2bd217e37e5cf83a9374ae27c0a6b9 (diff)
housekeeping: Skip repacking empty repositories
When a repository does not have any bitmaps or in case it is missing bloom filters in the commit graph we always try to do a full repack in order to generate these data structures. This logic is required because: - Bitmaps are only generated by git-repack(1) for full repacks. This limitation exists because there can only be one bitmap per repository. - In case commit-graphs exist but missing bloom filters we need to completely rewrite them in order to enable this extension. While the logic makes sense, it also causes us to repack repositories which are empty: they don't contain either of these data structures, and consequentially we think we need a full repack. While this is not a huge problem because git-repack(1) would finish fast anyway in case the repo doesn't contain any objects, we still needlessly spawn a process. Also, our metrics report that we're doing a lot of full repacks, which can be confusing. Improve our heuristics by taking into account whether the repository has objects at all. If there aren't any, we can avoid all this busywork and just skip repacking altogether. Changelog: changed
-rw-r--r--internal/git/housekeeping/optimize_repository.go26
-rw-r--r--internal/git/housekeeping/optimize_repository_test.go15
-rw-r--r--internal/gitaly/service/repository/optimize_test.go6
3 files changed, 24 insertions, 23 deletions
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go
index f4fc8419e..39257fd96 100644
--- a/internal/git/housekeeping/optimize_repository.go
+++ b/internal/git/housekeeping/optimize_repository.go
@@ -138,6 +138,22 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) {
return false, RepackObjectsConfig{}, fmt.Errorf("getting repository path: %w", err)
}
+ largestPackfileSize, packfileCount, err := packfileSizeAndCount(repo)
+ if err != nil {
+ return false, RepackObjectsConfig{}, fmt.Errorf("checking largest packfile size: %w", err)
+ }
+
+ looseObjectCount, err := estimateLooseObjectCount(repo, time.Now())
+ if err != nil {
+ return false, RepackObjectsConfig{}, fmt.Errorf("estimating loose object count: %w", err)
+ }
+
+ // If there are neither packfiles nor loose objects in this repository then there is no need
+ // to repack anything.
+ if packfileCount == 0 && looseObjectCount == 0 {
+ return false, RepackObjectsConfig{}, nil
+ }
+
altFile, err := repo.InfoAlternatesPath()
if err != nil {
return false, RepackObjectsConfig{}, helper.ErrInternal(err)
@@ -190,11 +206,6 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) {
}, nil
}
- largestPackfileSize, packfileCount, err := packfileSizeAndCount(repo)
- if err != nil {
- return false, RepackObjectsConfig{}, fmt.Errorf("checking largest packfile size: %w", err)
- }
-
// Whenever we do an incremental repack we create a new packfile, and as a result Git may
// have to look into every one of the packfiles to find objects. This is less efficient the
// more packfiles we have, but we cannot repack the whole repository every time either given
@@ -229,11 +240,6 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) {
}, nil
}
- looseObjectCount, err := estimateLooseObjectCount(repo, time.Now())
- if err != nil {
- return false, RepackObjectsConfig{}, fmt.Errorf("estimating loose object count: %w", err)
- }
-
// Most Git commands do not write packfiles directly, but instead write loose objects into
// the object database. So while we now know that there ain't too many packfiles, we still
// need to check whether we have too many objects.
diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go
index 086e3572e..8aad4dd8f 100644
--- a/internal/git/housekeeping/optimize_repository_test.go
+++ b/internal/git/housekeeping/optimize_repository_test.go
@@ -48,20 +48,11 @@ func TestNeedsRepacking(t *testing.T) {
expectedConfig RepackObjectsConfig
}{
{
- desc: "empty repo",
+ desc: "empty repo does nothing",
setup: func(t *testing.T) *gitalypb.Repository {
repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0])
return repoProto
},
- // This is a bug: if the repo is empty then we wouldn't ever generate a
- // packfile, but we determine a repack is needed because it's missing a
- // bitmap. It's a rather benign bug though: git-repack(1) will exit
- // immediately because it knows that there's nothing to repack.
- expectedNeeded: true,
- expectedConfig: RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- },
},
{
desc: "missing bitmap",
@@ -510,15 +501,13 @@ func TestOptimizeRepository(t *testing.T) {
expectedMetrics string
}{
{
- desc: "empty repository tries to write bitmap",
+ desc: "empty repository does nothing",
setup: func(t *testing.T) *gitalypb.Repository {
repo, _ := gittest.InitRepo(t, cfg, cfg.Storages[0])
return repo
},
expectedMetrics: `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository
# TYPE gitaly_housekeeping_tasks_total counter
-gitaly_housekeeping_tasks_total{housekeeping_task="packed_objects_full", status="success"} 1
-gitaly_housekeeping_tasks_total{housekeeping_task="written_bitmap", status="success"} 1
gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
`,
},
diff --git a/internal/gitaly/service/repository/optimize_test.go b/internal/gitaly/service/repository/optimize_test.go
index 0bc1932d5..a843b5668 100644
--- a/internal/gitaly/service/repository/optimize_test.go
+++ b/internal/gitaly/service/repository/optimize_test.go
@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
+ "strings"
"testing"
"time"
@@ -107,6 +108,11 @@ func TestOptimizeRepository(t *testing.T) {
)
}
+ // Write a blob whose OID is known to have a "17" prefix, which is required such that
+ // OptimizeRepository would try to repack at all.
+ blobOIDWith17Prefix := gittest.WriteBlob(t, cfg, testRepoPath, []byte("32"))
+ require.True(t, strings.HasPrefix(blobOIDWith17Prefix.String(), "17"))
+
bitmaps, err := filepath.Glob(filepath.Join(testRepoPath, "objects", "pack", "*.bitmap"))
require.NoError(t, err)
require.Empty(t, bitmaps)