Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-18 10:02:28 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-18 10:02:28 +0300
commitc1b91e3ac7fcd9d3e593d31a56c9bf4e0d778240 (patch)
treebb631d1503e080d2d1d5ed8432fb93d49ff6ef09
parentd35ed2db0cdea9f7494a7f6758c06bacf0160c53 (diff)
housekeeping: Don't prune recent objectspks-housekeeping-prune-with-grace-period-v14.8
When running git-gc(1), Git automatically executes git-prune(1) with a grace period of two weeks: only when objects are older than that grace period will they be pruned. This is really important to fix a race condition with concurrent commands, which all will first write new objects into the repository before making them reachable via a ref update. This means that there is a brief period between writing the new objects and making them reachable. If objects would be pruned without a grace period, it could kick in right before updating the references and thus cause repository corruption. The two-weeks grace period bridges this gap by giving enough head room to finish the transaction. With the recent introduction of the heuristics-based OptimizeRepository RPC we have also started to use git-prune(1). The assumption was that the two-weeks grace period was the default of git-prune(1), but as it turns out it is only the default of git-gc(1). So because we are now running git-prune(1) without any parameters, we accidentally started to delete all unreachable objects, even if they had just been created. Curiously, this bug only started to surface as we were migrating Rails to use OptimizeRepository. Seemingly, because we previously only were executing the RPC as part of our nightly job, the logic didn't trigger in the right moment and thus never caused any (known) problems. Still, this is a serious bug. Fix this issue by passing `--expire=two.weeks.ago` to git-prune(1). Changelog: fixed
-rw-r--r--internal/gitaly/service/repository/optimize.go8
-rw-r--r--internal/gitaly/service/repository/optimize_test.go98
2 files changed, 106 insertions, 0 deletions
diff --git a/internal/gitaly/service/repository/optimize.go b/internal/gitaly/service/repository/optimize.go
index b13566036..f80506e2e 100644
--- a/internal/gitaly/service/repository/optimize.go
+++ b/internal/gitaly/service/repository/optimize.go
@@ -341,6 +341,14 @@ func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
if err := repo.ExecAndWait(ctx, git.SubCmd{
Name: "prune",
+ Flags: []git.Option{
+ // By default, this prunes all unreachable objects regardless of when they
+ // have last been accessed. This opens us up for races when there are
+ // concurrent commands which are just at the point of writing objects into
+ // the repository, but which haven't yet updated any references to make them
+ // reachable. We thus use the same two-week grace period as git-gc(1) does.
+ git.ValueFlag{Name: "--expire", Value: "two.weeks.ago"},
+ },
}); err != nil {
return false, fmt.Errorf("pruning objects: %w", err)
}
diff --git a/internal/gitaly/service/repository/optimize_test.go b/internal/gitaly/service/repository/optimize_test.go
index f7e4152e0..916aa433f 100644
--- a/internal/gitaly/service/repository/optimize_test.go
+++ b/internal/gitaly/service/repository/optimize_test.go
@@ -6,6 +6,7 @@ import (
"io"
"os"
"path/filepath"
+ "strings"
"testing"
"time"
@@ -15,6 +16,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/stats"
"gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg"
"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
"google.golang.org/grpc/codes"
)
@@ -743,3 +745,99 @@ func TestEstimateLooseObjectCount(t *testing.T) {
require.Equal(t, int64(512), looseObjects)
})
}
+
+func TestPruneIfNeededWithRealObjects(t *testing.T) {
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ t.Run("empty repo does not prune", func(t *testing.T) {
+ repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ didPrune, err := pruneIfNeeded(ctx, repo)
+ require.NoError(t, err)
+ require.False(t, didPrune)
+ })
+
+ t.Run("repo with single object does not prune", func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ gittest.WriteBlob(t, cfg, repoPath, []byte("something"))
+
+ didPrune, err := pruneIfNeeded(ctx, repo)
+ require.NoError(t, err)
+ require.False(t, didPrune)
+ })
+
+ t.Run("repo with single 17-prefixed objects does not prune", func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("32"))
+ require.True(t, strings.HasPrefix(blobID.String(), "17"))
+
+ didPrune, err := pruneIfNeeded(ctx, repo)
+ require.NoError(t, err)
+ require.False(t, didPrune)
+ })
+
+ t.Run("repo with four 17-prefixed objects does not prune", func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ for _, contents := range []string{"32", "119", "334", "782"} {
+ blobID := gittest.WriteBlob(t, cfg, repoPath, []byte(contents))
+ require.True(t, strings.HasPrefix(blobID.String(), "17"))
+ }
+
+ didPrune, err := pruneIfNeeded(ctx, repo)
+ require.NoError(t, err)
+ require.False(t, didPrune)
+ })
+
+ t.Run("repo with five 17-prefixed objects does prune", func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ objectPath := func(oid git.ObjectID) string {
+ return filepath.Join(repoPath, "objects", oid.String()[0:2], oid.String()[2:])
+ }
+
+ // Contents with 17-prefix were brute forced with git-hash-object(1).
+ var blobs []git.ObjectID
+ for _, contents := range []string{"32", "119", "334", "782", "907"} {
+ blobID := gittest.WriteBlob(t, cfg, repoPath, []byte(contents))
+ require.True(t, strings.HasPrefix(blobID.String(), "17"))
+ blobs = append(blobs, blobID)
+ }
+
+ didPrune, err := pruneIfNeeded(ctx, repo)
+ require.NoError(t, err)
+ require.True(t, didPrune)
+
+ // We shouldn't prune any objects which aren't older than the grace period of two
+ // weeks.
+ for _, blob := range blobs {
+ require.FileExists(t, objectPath(blob))
+ }
+
+ // Now we modify the object's times to be older than two weeks.
+ twoWeeksAgo := time.Now().Add(-1 * 2 * 7 * 24 * time.Hour)
+ for _, blob := range blobs {
+ require.NoError(t, os.Chtimes(objectPath(blob), twoWeeksAgo, twoWeeksAgo))
+ }
+
+ // Because we didn't prune objects before due to the grace period, the still exist
+ // and thus we would still want to prune here.
+ didPrune, err = pruneIfNeeded(ctx, repo)
+ require.NoError(t, err)
+ require.True(t, didPrune)
+
+ // But this time the objects shouldn't exist anymore because they were older than
+ // the grace period.
+ for _, blob := range blobs {
+ require.NoFileExists(t, objectPath(blob))
+ }
+ })
+}