Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-22 11:06:28 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-22 11:06:28 +0300
commit719c5a5bd2b5ddb54de519d6873ccb1636f7b450 (patch)
treed904c996780c48a42e60967b8aebbd6962f71dc3
parentf21e9469e94600f50ecb01b98d46f54dd7b33b5c (diff)
parent8eb27d95fed895f511f81b227a08c69393689bab (diff)
Merge branch 'pks-housekeeping-prune-heuristics-mtime' into 'master'
housekeeping: Skip prune if there are no old objects Closes #4121 See merge request gitlab-org/gitaly!4423
-rw-r--r--internal/git/housekeeping/optimize_repository.go31
-rw-r--r--internal/git/housekeeping/optimize_repository_test.go92
2 files changed, 108 insertions, 15 deletions
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go
index e44f2ccf5..09d923c06 100644
--- a/internal/git/housekeeping/optimize_repository.go
+++ b/internal/git/housekeeping/optimize_repository.go
@@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"strings"
+ "time"
"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
"github.com/prometheus/client_golang/prometheus"
@@ -226,7 +227,7 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) {
}, nil
}
- looseObjectCount, err := estimateLooseObjectCount(repo)
+ looseObjectCount, err := estimateLooseObjectCount(repo, time.Now())
if err != nil {
return false, RepackObjectsConfig{}, fmt.Errorf("estimating loose object count: %w", err)
}
@@ -300,7 +301,10 @@ func packfileSizeAndCount(repo *localrepo.Repo) (int64, int64, error) {
// object name being derived via a cryptographic hash we know that in the general case, objects are
// evenly distributed across their sharding directories. We can thus estimate the number of loose
// objects by opening a single sharding directory and counting its entries.
-func estimateLooseObjectCount(repo *localrepo.Repo) (int64, error) {
+//
+// If a cutoff date is given, then this function will only take into account objects which are
+// older than the given point in time.
+func estimateLooseObjectCount(repo *localrepo.Repo, cutoffDate time.Time) (int64, error) {
repoPath, err := repo.Path()
if err != nil {
return 0, fmt.Errorf("getting repository path: %w", err)
@@ -322,6 +326,19 @@ func estimateLooseObjectCount(repo *localrepo.Repo) (int64, error) {
continue
}
+ entryInfo, err := entry.Info()
+ if err != nil {
+ if errors.Is(err, fs.ErrNotExist) {
+ continue
+ }
+
+ return 0, fmt.Errorf("reading object info: %w", err)
+ }
+
+ if entryInfo.ModTime().After(cutoffDate) {
+ continue
+ }
+
looseObjects++
}
@@ -338,7 +355,9 @@ func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
return false, nil
}
- looseObjectCount, err := estimateLooseObjectCount(repo)
+ // Only count objects older than two weeks. Objects which are more recent than that wouldn't
+ // get pruned anyway and thus cause us to prune all the time during the grace period.
+ looseObjectCount, err := estimateLooseObjectCount(repo, time.Now().AddDate(0, 0, -14))
if err != nil {
return false, fmt.Errorf("estimating loose object count: %w", err)
}
@@ -350,9 +369,9 @@ func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
// time.
//
// Using the same limit here doesn't quite fix this case: the unreachable objects would only
- // be pruned after a grace period of two weeks. But at least we know that we will eventually
- // prune up those unreachable objects, at which point we won't try to do another incremental
- // repack.
+ // be pruned after a grace period of two weeks. Because of that we only count objects which
+ // are older than this grace period such that we don't prune if there aren't any old and
+ // unreachable objects.
if looseObjectCount <= looseObjectLimit {
return false, nil
}
diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go
index 4762220d4..e8bd6f0dd 100644
--- a/internal/git/housekeeping/optimize_repository_test.go
+++ b/internal/git/housekeeping/optimize_repository_test.go
@@ -424,7 +424,7 @@ func TestEstimateLooseObjectCount(t *testing.T) {
repo := localrepo.NewTestRepo(t, cfg, repoProto)
t.Run("empty repository", func(t *testing.T) {
- looseObjects, err := estimateLooseObjectCount(repo)
+ looseObjects, err := estimateLooseObjectCount(repo, time.Now())
require.NoError(t, err)
require.Zero(t, looseObjects)
})
@@ -437,7 +437,7 @@ func TestEstimateLooseObjectCount(t *testing.T) {
require.NoError(t, err)
testhelper.MustClose(t, object)
- looseObjects, err := estimateLooseObjectCount(repo)
+ looseObjects, err := estimateLooseObjectCount(repo, time.Now())
require.NoError(t, err)
require.Zero(t, looseObjects)
})
@@ -450,7 +450,7 @@ func TestEstimateLooseObjectCount(t *testing.T) {
require.NoError(t, err)
testhelper.MustClose(t, object)
- looseObjects, err := estimateLooseObjectCount(repo)
+ looseObjects, err := estimateLooseObjectCount(repo, time.Now())
require.NoError(t, err)
require.Equal(t, int64(256), looseObjects)
@@ -459,10 +459,43 @@ func TestEstimateLooseObjectCount(t *testing.T) {
require.NoError(t, err)
testhelper.MustClose(t, object)
- looseObjects, err = estimateLooseObjectCount(repo)
+ looseObjects, err = estimateLooseObjectCount(repo, time.Now())
require.NoError(t, err)
require.Equal(t, int64(512), looseObjects)
})
+
+ t.Run("object in estimation shard with grace period", func(t *testing.T) {
+ estimationShard := filepath.Join(repoPath, "objects", "17")
+ require.NoError(t, os.MkdirAll(estimationShard, 0o755))
+
+ objectPaths := []string{
+ filepath.Join(estimationShard, "123456"),
+ filepath.Join(estimationShard, "654321"),
+ }
+
+ cutoffDate := time.Now()
+ afterCutoffDate := cutoffDate.Add(1 * time.Minute)
+ beforeCutoffDate := cutoffDate.Add(-1 * time.Minute)
+
+ for _, objectPath := range objectPaths {
+ require.NoError(t, os.WriteFile(objectPath, nil, 0o644))
+ require.NoError(t, os.Chtimes(objectPath, afterCutoffDate, afterCutoffDate))
+ }
+
+ // Objects are recent, so with the cutoff-date they shouldn't be counted.
+ looseObjects, err := estimateLooseObjectCount(repo, cutoffDate)
+ require.NoError(t, err)
+ require.Equal(t, int64(0), looseObjects)
+
+ for i, objectPath := range objectPaths {
+ // Modify the object's mtime should cause it to be counted.
+ require.NoError(t, os.Chtimes(objectPath, beforeCutoffDate, beforeCutoffDate))
+
+ looseObjects, err = estimateLooseObjectCount(repo, cutoffDate)
+ require.NoError(t, err)
+ require.Equal(t, int64((i+1)*256), looseObjects)
+ }
+ })
}
func TestOptimizeRepository(t *testing.T) {
@@ -519,7 +552,7 @@ func TestOptimizeRepository(t *testing.T) {
},
},
{
- desc: "loose objects get pruned",
+ desc: "recent loose objects don't get pruned",
setup: func(t *testing.T) *gitalypb.Repository {
repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index")
@@ -528,9 +561,41 @@ func TestOptimizeRepository(t *testing.T) {
// The repack won't repack the following objects because they're
// broken, and thus we'll retry to prune them afterwards.
require.NoError(t, os.MkdirAll(filepath.Join(repoPath, "objects", "17"), 0o755))
+
+ // We set the object's mtime to be almost two weeks ago. Given that
+ // our timeout is at exactly two weeks this shouldn't caused them to
+ // get pruned.
+ almostTwoWeeksAgo := time.Now().AddDate(0, 0, -14).Add(time.Minute)
+
for i := 0; i < 10; i++ {
blobPath := filepath.Join(repoPath, "objects", "17", fmt.Sprintf("%d", i))
require.NoError(t, os.WriteFile(blobPath, nil, 0o644))
+ require.NoError(t, os.Chtimes(blobPath, almostTwoWeeksAgo, almostTwoWeeksAgo))
+ }
+
+ return repo
+ },
+ expectedOptimizations: map[string]float64{
+ "packed_objects_incremental": 1,
+ },
+ },
+ {
+ desc: "old loose objects get pruned",
+ setup: func(t *testing.T) *gitalypb.Repository {
+ repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index")
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths")
+
+ // The repack won't repack the following objects because they're
+ // broken, and thus we'll retry to prune them afterwards.
+ require.NoError(t, os.MkdirAll(filepath.Join(repoPath, "objects", "17"), 0o755))
+
+ moreThanTwoWeeksAgo := time.Now().AddDate(0, 0, -14).Add(-time.Minute)
+
+ for i := 0; i < 10; i++ {
+ blobPath := filepath.Join(repoPath, "objects", "17", fmt.Sprintf("%d", i))
+ require.NoError(t, os.WriteFile(blobPath, nil, 0o644))
+ require.NoError(t, os.Chtimes(blobPath, moreThanTwoWeeksAgo, moreThanTwoWeeksAgo))
}
return repo
@@ -756,7 +821,7 @@ func TestPruneIfNeeded(t *testing.T) {
require.False(t, didPrune)
})
- t.Run("repo with five 17-prefixed objects does prune", func(t *testing.T) {
+ t.Run("repo with five 17-prefixed objects does prune after grace period", func(t *testing.T) {
repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -772,15 +837,21 @@ func TestPruneIfNeeded(t *testing.T) {
blobs = append(blobs, blobID)
}
+ // We also write one blob that stays recent to verify it doesn't get pruned.
+ recentBlob := gittest.WriteBlob(t, cfg, repoPath, []byte("922"))
+ require.True(t, strings.HasPrefix(recentBlob.String(), "17"))
+
+ // We shouldn't want to prune anything yet because there is no object older than two
+ // weeks.
didPrune, err := pruneIfNeeded(ctx, repo)
require.NoError(t, err)
- require.True(t, didPrune)
+ require.False(t, didPrune)
- // We shouldn't prune any objects which aren't older than the grace period of two
- // weeks.
+ // Consequentially, the objects shouldn't have been pruned.
for _, blob := range blobs {
require.FileExists(t, objectPath(blob))
}
+ require.FileExists(t, objectPath(recentBlob))
// Now we modify the object's times to be older than two weeks.
twoWeeksAgo := time.Now().Add(-1 * 2 * 7 * 24 * time.Hour)
@@ -799,5 +870,8 @@ func TestPruneIfNeeded(t *testing.T) {
for _, blob := range blobs {
require.NoFileExists(t, objectPath(blob))
}
+
+ // The recent blob should continue to exist though.
+ require.FileExists(t, objectPath(recentBlob))
})
}