diff options
author | John Cai <jcai@gitlab.com> | 2022-03-15 19:52:28 +0300 |
---|---|---|
committer | John Cai <jcai@gitlab.com> | 2022-03-15 19:52:28 +0300 |
commit | 8b033b1763e80468b4f11a572e1425d1064692e2 (patch) | |
tree | a9075666eef5a40edf6f5a4a12eb918fb2d84ae4 | |
parent | 4ef97df05e54269d90fdbd4d2f59fcc29b1afcdf (diff) | |
parent | a58c4be993173b767e4f301b3e78f82f257c8acb (diff) |
Merge branch 'pks-housekeeping-prune-with-grace-period' into 'master'
housekeeping: Don't prune recent objects
See merge request gitlab-org/gitaly!4410
-rw-r--r-- | internal/git/housekeeping/optimize_repository.go | 8 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository_test.go | 99 |
2 files changed, 107 insertions, 0 deletions
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go index 15e733b26..9fb60031d 100644 --- a/internal/git/housekeeping/optimize_repository.go +++ b/internal/git/housekeeping/optimize_repository.go @@ -332,6 +332,14 @@ func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) { if err := repo.ExecAndWait(ctx, git.SubCmd{ Name: "prune", + Flags: []git.Option{ + // By default, this prunes all unreachable objects regardless of when they + // have last been accessed. This opens us up for races when there are + // concurrent commands which are just at the point of writing objects into + // the repository, but which haven't yet updated any references to make them + // reachable. We thus use the same two-week grace period as git-gc(1) does. + git.ValueFlag{Name: "--expire", Value: "two.weeks.ago"}, + }, }); err != nil { return false, fmt.Errorf("pruning objects: %w", err) } diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go index 8a74b51b2..3bad8ede3 100644 --- a/internal/git/housekeeping/optimize_repository_test.go +++ b/internal/git/housekeeping/optimize_repository_test.go @@ -5,11 +5,14 @@ import ( "io" "os" "path/filepath" + "strings" "testing" + "time" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v14/internal/backchannel" + "gitlab.com/gitlab-org/gitaly/v14/internal/git" "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v14/internal/git/stats" @@ -572,3 +575,99 @@ func TestOptimizeRepository(t *testing.T) { }) } } + +func TestPruneIfNeeded(t *testing.T) { + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + t.Run("empty repo does not prune", func(t *testing.T) { + repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + didPrune, err := pruneIfNeeded(ctx, repo) + require.NoError(t, err) + require.False(t, didPrune) + }) + + t.Run("repo with single object does not prune", func(t *testing.T) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + gittest.WriteBlob(t, cfg, repoPath, []byte("something")) + + didPrune, err := pruneIfNeeded(ctx, repo) + require.NoError(t, err) + require.False(t, didPrune) + }) + + t.Run("repo with single 17-prefixed objects does not prune", func(t *testing.T) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("32")) + require.True(t, strings.HasPrefix(blobID.String(), "17")) + + didPrune, err := pruneIfNeeded(ctx, repo) + require.NoError(t, err) + require.False(t, didPrune) + }) + + t.Run("repo with four 17-prefixed objects does not prune", func(t *testing.T) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + for _, contents := range []string{"32", "119", "334", "782"} { + blobID := gittest.WriteBlob(t, cfg, repoPath, []byte(contents)) + require.True(t, strings.HasPrefix(blobID.String(), "17")) + } + + didPrune, err := pruneIfNeeded(ctx, repo) + require.NoError(t, err) + require.False(t, didPrune) + }) + + t.Run("repo with five 17-prefixed objects does prune", func(t *testing.T) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + objectPath := func(oid git.ObjectID) string { + return filepath.Join(repoPath, "objects", oid.String()[0:2], oid.String()[2:]) + } + + // Contents with 17-prefix were brute forced with git-hash-object(1). + var blobs []git.ObjectID + for _, contents := range []string{"32", "119", "334", "782", "907"} { + blobID := gittest.WriteBlob(t, cfg, repoPath, []byte(contents)) + require.True(t, strings.HasPrefix(blobID.String(), "17")) + blobs = append(blobs, blobID) + } + + didPrune, err := pruneIfNeeded(ctx, repo) + require.NoError(t, err) + require.True(t, didPrune) + + // We shouldn't prune any objects which aren't older than the grace period of two + // weeks. + for _, blob := range blobs { + require.FileExists(t, objectPath(blob)) + } + + // Now we modify the object's times to be older than two weeks. + twoWeeksAgo := time.Now().Add(-1 * 2 * 7 * 24 * time.Hour) + for _, blob := range blobs { + require.NoError(t, os.Chtimes(objectPath(blob), twoWeeksAgo, twoWeeksAgo)) + } + + // Because we didn't prune objects before due to the grace period, the still exist + // and thus we would still want to prune here. + didPrune, err = pruneIfNeeded(ctx, repo) + require.NoError(t, err) + require.True(t, didPrune) + + // But this time the objects shouldn't exist anymore because they were older than + // the grace period. + for _, blob := range blobs { + require.NoFileExists(t, objectPath(blob)) + } + }) +} |