Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-21 15:44:46 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-22 09:23:56 +0300
commit8eb27d95fed895f511f81b227a08c69393689bab (patch)
tree0b5a2b30d4cd9697ad6a7ba60f21fed0adc43704
parentdca634281da8ef08f51eaa0019c6ddac7a531d73 (diff)
housekeeping: Skip prune if there are no old objects
The `OptimizeRepository()` RPC heuristically executes git-prune(1). The heuristic we currently use counts the number of loose objects which exist after having repacked the repository, and, if they exceed a given threshold, we'll try to remove them. This also causes us to try and prune objects though in case there are unreachable, but recent objects. But because unreachable objects don't get packed, and because we only prune objects older than two weeks, this will in many cases cause us to execute git-prune(1) without any need. Improve the heuristic to only prune objects in case there are loose objects which have an mtime older than two weeks. This should in the general case avoid useless prunes, but still detect the case where there is work to be done. Changelog: performance
-rw-r--r--internal/git/housekeeping/optimize_repository.go31
-rw-r--r--internal/git/housekeeping/optimize_repository_test.go92
2 files changed, 108 insertions, 15 deletions
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go
index e44f2ccf5..09d923c06 100644
--- a/internal/git/housekeeping/optimize_repository.go
+++ b/internal/git/housekeeping/optimize_repository.go
@@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"strings"
+ "time"
"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
"github.com/prometheus/client_golang/prometheus"
@@ -226,7 +227,7 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) {
}, nil
}
- looseObjectCount, err := estimateLooseObjectCount(repo)
+ looseObjectCount, err := estimateLooseObjectCount(repo, time.Now())
if err != nil {
return false, RepackObjectsConfig{}, fmt.Errorf("estimating loose object count: %w", err)
}
@@ -300,7 +301,10 @@ func packfileSizeAndCount(repo *localrepo.Repo) (int64, int64, error) {
// object name being derived via a cryptographic hash we know that in the general case, objects are
// evenly distributed across their sharding directories. We can thus estimate the number of loose
// objects by opening a single sharding directory and counting its entries.
-func estimateLooseObjectCount(repo *localrepo.Repo) (int64, error) {
+//
+// If a cutoff date is given, then this function will only take into account objects which are
+// older than the given point in time.
+func estimateLooseObjectCount(repo *localrepo.Repo, cutoffDate time.Time) (int64, error) {
repoPath, err := repo.Path()
if err != nil {
return 0, fmt.Errorf("getting repository path: %w", err)
@@ -322,6 +326,19 @@ func estimateLooseObjectCount(repo *localrepo.Repo) (int64, error) {
continue
}
+ entryInfo, err := entry.Info()
+ if err != nil {
+ if errors.Is(err, fs.ErrNotExist) {
+ continue
+ }
+
+ return 0, fmt.Errorf("reading object info: %w", err)
+ }
+
+ if entryInfo.ModTime().After(cutoffDate) {
+ continue
+ }
+
looseObjects++
}
@@ -338,7 +355,9 @@ func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
return false, nil
}
- looseObjectCount, err := estimateLooseObjectCount(repo)
+ // Only count objects older than two weeks. Objects which are more recent than that wouldn't
+ // get pruned anyway and thus cause us to prune all the time during the grace period.
+ looseObjectCount, err := estimateLooseObjectCount(repo, time.Now().AddDate(0, 0, -14))
if err != nil {
return false, fmt.Errorf("estimating loose object count: %w", err)
}
@@ -350,9 +369,9 @@ func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
// time.
//
// Using the same limit here doesn't quite fix this case: the unreachable objects would only
- // be pruned after a grace period of two weeks. But at least we know that we will eventually
- // prune up those unreachable objects, at which point we won't try to do another incremental
- // repack.
+ // be pruned after a grace period of two weeks. Because of that we only count objects which
+ // are older than this grace period such that we don't prune if there aren't any old and
+ // unreachable objects.
if looseObjectCount <= looseObjectLimit {
return false, nil
}
diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go
index 4762220d4..e8bd6f0dd 100644
--- a/internal/git/housekeeping/optimize_repository_test.go
+++ b/internal/git/housekeeping/optimize_repository_test.go
@@ -424,7 +424,7 @@ func TestEstimateLooseObjectCount(t *testing.T) {
repo := localrepo.NewTestRepo(t, cfg, repoProto)
t.Run("empty repository", func(t *testing.T) {
- looseObjects, err := estimateLooseObjectCount(repo)
+ looseObjects, err := estimateLooseObjectCount(repo, time.Now())
require.NoError(t, err)
require.Zero(t, looseObjects)
})
@@ -437,7 +437,7 @@ func TestEstimateLooseObjectCount(t *testing.T) {
require.NoError(t, err)
testhelper.MustClose(t, object)
- looseObjects, err := estimateLooseObjectCount(repo)
+ looseObjects, err := estimateLooseObjectCount(repo, time.Now())
require.NoError(t, err)
require.Zero(t, looseObjects)
})
@@ -450,7 +450,7 @@ func TestEstimateLooseObjectCount(t *testing.T) {
require.NoError(t, err)
testhelper.MustClose(t, object)
- looseObjects, err := estimateLooseObjectCount(repo)
+ looseObjects, err := estimateLooseObjectCount(repo, time.Now())
require.NoError(t, err)
require.Equal(t, int64(256), looseObjects)
@@ -459,10 +459,43 @@ func TestEstimateLooseObjectCount(t *testing.T) {
require.NoError(t, err)
testhelper.MustClose(t, object)
- looseObjects, err = estimateLooseObjectCount(repo)
+ looseObjects, err = estimateLooseObjectCount(repo, time.Now())
require.NoError(t, err)
require.Equal(t, int64(512), looseObjects)
})
+
+ t.Run("object in estimation shard with grace period", func(t *testing.T) {
+ estimationShard := filepath.Join(repoPath, "objects", "17")
+ require.NoError(t, os.MkdirAll(estimationShard, 0o755))
+
+ objectPaths := []string{
+ filepath.Join(estimationShard, "123456"),
+ filepath.Join(estimationShard, "654321"),
+ }
+
+ cutoffDate := time.Now()
+ afterCutoffDate := cutoffDate.Add(1 * time.Minute)
+ beforeCutoffDate := cutoffDate.Add(-1 * time.Minute)
+
+ for _, objectPath := range objectPaths {
+ require.NoError(t, os.WriteFile(objectPath, nil, 0o644))
+ require.NoError(t, os.Chtimes(objectPath, afterCutoffDate, afterCutoffDate))
+ }
+
+ // Objects are recent, so with the cutoff-date they shouldn't be counted.
+ looseObjects, err := estimateLooseObjectCount(repo, cutoffDate)
+ require.NoError(t, err)
+ require.Equal(t, int64(0), looseObjects)
+
+ for i, objectPath := range objectPaths {
+ // Modify the object's mtime should cause it to be counted.
+ require.NoError(t, os.Chtimes(objectPath, beforeCutoffDate, beforeCutoffDate))
+
+ looseObjects, err = estimateLooseObjectCount(repo, cutoffDate)
+ require.NoError(t, err)
+ require.Equal(t, int64((i+1)*256), looseObjects)
+ }
+ })
}
func TestOptimizeRepository(t *testing.T) {
@@ -519,7 +552,7 @@ func TestOptimizeRepository(t *testing.T) {
},
},
{
- desc: "loose objects get pruned",
+ desc: "recent loose objects don't get pruned",
setup: func(t *testing.T) *gitalypb.Repository {
repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index")
@@ -528,9 +561,41 @@ func TestOptimizeRepository(t *testing.T) {
// The repack won't repack the following objects because they're
// broken, and thus we'll retry to prune them afterwards.
require.NoError(t, os.MkdirAll(filepath.Join(repoPath, "objects", "17"), 0o755))
+
+ // We set the object's mtime to be almost two weeks ago. Given that
+ // our timeout is at exactly two weeks this shouldn't caused them to
+ // get pruned.
+ almostTwoWeeksAgo := time.Now().AddDate(0, 0, -14).Add(time.Minute)
+
for i := 0; i < 10; i++ {
blobPath := filepath.Join(repoPath, "objects", "17", fmt.Sprintf("%d", i))
require.NoError(t, os.WriteFile(blobPath, nil, 0o644))
+ require.NoError(t, os.Chtimes(blobPath, almostTwoWeeksAgo, almostTwoWeeksAgo))
+ }
+
+ return repo
+ },
+ expectedOptimizations: map[string]float64{
+ "packed_objects_incremental": 1,
+ },
+ },
+ {
+ desc: "old loose objects get pruned",
+ setup: func(t *testing.T) *gitalypb.Repository {
+ repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index")
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths")
+
+ // The repack won't repack the following objects because they're
+ // broken, and thus we'll retry to prune them afterwards.
+ require.NoError(t, os.MkdirAll(filepath.Join(repoPath, "objects", "17"), 0o755))
+
+ moreThanTwoWeeksAgo := time.Now().AddDate(0, 0, -14).Add(-time.Minute)
+
+ for i := 0; i < 10; i++ {
+ blobPath := filepath.Join(repoPath, "objects", "17", fmt.Sprintf("%d", i))
+ require.NoError(t, os.WriteFile(blobPath, nil, 0o644))
+ require.NoError(t, os.Chtimes(blobPath, moreThanTwoWeeksAgo, moreThanTwoWeeksAgo))
}
return repo
@@ -756,7 +821,7 @@ func TestPruneIfNeeded(t *testing.T) {
require.False(t, didPrune)
})
- t.Run("repo with five 17-prefixed objects does prune", func(t *testing.T) {
+ t.Run("repo with five 17-prefixed objects does prune after grace period", func(t *testing.T) {
repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -772,15 +837,21 @@ func TestPruneIfNeeded(t *testing.T) {
blobs = append(blobs, blobID)
}
+ // We also write one blob that stays recent to verify it doesn't get pruned.
+ recentBlob := gittest.WriteBlob(t, cfg, repoPath, []byte("922"))
+ require.True(t, strings.HasPrefix(recentBlob.String(), "17"))
+
+ // We shouldn't want to prune anything yet because there is no object older than two
+ // weeks.
didPrune, err := pruneIfNeeded(ctx, repo)
require.NoError(t, err)
- require.True(t, didPrune)
+ require.False(t, didPrune)
- // We shouldn't prune any objects which aren't older than the grace period of two
- // weeks.
+ // Consequentially, the objects shouldn't have been pruned.
for _, blob := range blobs {
require.FileExists(t, objectPath(blob))
}
+ require.FileExists(t, objectPath(recentBlob))
// Now we modify the object's times to be older than two weeks.
twoWeeksAgo := time.Now().Add(-1 * 2 * 7 * 24 * time.Hour)
@@ -799,5 +870,8 @@ func TestPruneIfNeeded(t *testing.T) {
for _, blob := range blobs {
require.NoFileExists(t, objectPath(blob))
}
+
+ // The recent blob should continue to exist though.
+ require.FileExists(t, objectPath(recentBlob))
})
}