From d6fc61e3b340055b4bceb9e2d931505e3d96aef8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 17 Mar 2023 12:37:55 +0100 Subject: repository: Return error when trying to prune objects in pool repos The PruneUnreachableObjects RPC only purpose is to prune unreachable objects. While this should theoretically be a relatively safe operation for normal repositories, it definitiely isn't for object pools as any pool members may have deduplicated their objects so that they rely on the objects to keep existing in the pool repository. The concept of reachability does not really play any role here: once an object is in an object pool it must stay there. The PruneUnreachableObjects RPC does not have any safeguards for this though. Let's put one into place and return an error in case we are unexpectedly executed on an object pool. Changelog: changed --- .../gitaly/service/repository/prune_unreachable_objects.go | 11 +++++++++++ .../service/repository/prune_unreachable_objects_test.go | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/internal/gitaly/service/repository/prune_unreachable_objects.go b/internal/gitaly/service/repository/prune_unreachable_objects.go index 8706713b1..b534b06d5 100644 --- a/internal/gitaly/service/repository/prune_unreachable_objects.go +++ b/internal/gitaly/service/repository/prune_unreachable_objects.go @@ -2,6 +2,7 @@ package repository import ( "context" + "fmt" "time" "gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping" @@ -32,6 +33,16 @@ func (s *server) PruneUnreachableObjects( return nil, err } + // Verify that the repository is not an object pool. Pruning objects in object pools is not + // a safe operation and is likely to cause corruption of object pool members. + repoInfo, err := stats.RepositoryInfoForRepository(repo) + if err != nil { + return nil, fmt.Errorf("deriving repository info: %w", err) + } + if repoInfo.IsObjectPool { + return nil, structerr.NewInvalidArgument("pruning objects for object pool") + } + if err := housekeeping.PruneObjects(ctx, repo, housekeeping.PruneObjectsConfig{ ExpireBefore: time.Now().Add(-30 * time.Minute), }); err != nil { diff --git a/internal/gitaly/service/repository/prune_unreachable_objects_test.go b/internal/gitaly/service/repository/prune_unreachable_objects_test.go index d4badb60d..30cc3d666 100644 --- a/internal/gitaly/service/repository/prune_unreachable_objects_test.go +++ b/internal/gitaly/service/repository/prune_unreachable_objects_test.go @@ -162,4 +162,17 @@ func TestPruneUnreachableObjects(t *testing.T) { // that it doesn't refer to the pruned commit anymore. gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "verify") }) + + t.Run("object pool", func(t *testing.T) { + repo, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + RelativePath: gittest.NewObjectPoolName(t), + }) + + _, err := client.PruneUnreachableObjects(ctx, &gitalypb.PruneUnreachableObjectsRequest{ + Repository: repo, + }) + testhelper.RequireGrpcError(t, + structerr.NewInvalidArgument("pruning objects for object pool"), err, + ) + }) } -- cgit v1.2.3 From bd98b50cd249a250b35272c62ece295a7d68ad77 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 17 Mar 2023 13:01:27 +0100 Subject: repository: Consider cruft packs in PruneUnreachableObjects When pruning objects via PruneUnreachableObjects, then we execute git-prune(1) with a reduced grace period of 30 minutes. This only works to prune loose objects though, and will never delete objects part of a cruft pack. This means that as we're moving unreachable objects into cruft packs, that those objects will in fact not get pruned as expected anymore. Fix this bug by repacking the repository with cruft packs and a reduced expiry time. Changelog: fixed --- .../repository/prune_unreachable_objects.go | 20 +++++- .../repository/prune_unreachable_objects_test.go | 82 +++++++++++++++++++++- 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/internal/gitaly/service/repository/prune_unreachable_objects.go b/internal/gitaly/service/repository/prune_unreachable_objects.go index b534b06d5..49904e0a6 100644 --- a/internal/gitaly/service/repository/prune_unreachable_objects.go +++ b/internal/gitaly/service/repository/prune_unreachable_objects.go @@ -8,6 +8,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" ) @@ -43,12 +44,29 @@ func (s *server) PruneUnreachableObjects( return nil, structerr.NewInvalidArgument("pruning objects for object pool") } + expireBefore := time.Now().Add(-30 * time.Minute) + + // We need to prune loose unreachable objects that exist in the repository. if err := housekeeping.PruneObjects(ctx, repo, housekeeping.PruneObjectsConfig{ - ExpireBefore: time.Now().Add(-30 * time.Minute), + ExpireBefore: expireBefore, }); err != nil { return nil, structerr.NewInternal("pruning objects: %w", err) } + // But we also have to prune unreachable objects part of cruft packs. The only way to do + // that is to do a full repack. So unfortunately, this is quite expensive. + if featureflag.WriteCruftPacks.IsEnabled(ctx) { + if err := housekeeping.RepackObjects(ctx, repo, housekeeping.RepackObjectsConfig{ + FullRepack: true, + WriteCruftPack: true, + WriteMultiPackIndex: true, + WriteBitmap: len(repoInfo.Alternates) == 0, + CruftExpireBefore: expireBefore, + }); err != nil { + return nil, structerr.NewInternal("repacking objects: %w", err) + } + } + // Rewrite the commit-graph so that it doesn't contain references to pruned commits // anymore. if err := housekeeping.WriteCommitGraph(ctx, repo, housekeeping.WriteCommitGraphConfig{ diff --git a/internal/gitaly/service/repository/prune_unreachable_objects_test.go b/internal/gitaly/service/repository/prune_unreachable_objects_test.go index 30cc3d666..cf2510427 100644 --- a/internal/gitaly/service/repository/prune_unreachable_objects_test.go +++ b/internal/gitaly/service/repository/prune_unreachable_objects_test.go @@ -3,6 +3,7 @@ package repository import ( + "context" "os" "path/filepath" "testing" @@ -11,6 +12,9 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" @@ -18,8 +22,12 @@ import ( func TestPruneUnreachableObjects(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.WriteCruftPacks).Run(t, testPruneUnreachableObjects) +} + +func testPruneUnreachableObjects(t *testing.T, ctx context.Context) { + t.Parallel() - ctx := testhelper.Context(t) cfg, client := setupRepositoryServiceWithoutRepo(t) setObjectTime := func(t *testing.T, repoPath string, objectID git.ObjectID, when time.Time) { @@ -163,6 +171,78 @@ func TestPruneUnreachableObjects(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "verify") }) + t.Run("repository with recent objects in cruft pack", func(t *testing.T) { + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + // Write two commits into the repository and create a commit-graph. The second + // commit will become unreachable and will be pruned, but will be contained in the + // commit-graph. + reachableCommitID := gittest.WriteCommit(t, cfg, repoPath) + unreachableCommitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(reachableCommitID), gittest.WithBranch("main")) + + // Reset the "main" branch back to the initial root commit ID. + gittest.Exec(t, cfg, "-C", repoPath, "update-ref", "refs/heads/main", reachableCommitID.String()) + + // We now repack the repository with cruft packs. The result should be that we have + // two packs in the repository. + gittest.Exec(t, cfg, "-C", repoPath, "repack", "--cruft", "-d") + packfilesInfo, err := stats.PackfilesInfoForRepository(repo) + require.NoError(t, err) + require.EqualValues(t, 1, packfilesInfo.Count) + require.EqualValues(t, 1, packfilesInfo.CruftCount) + + _, err = client.PruneUnreachableObjects(ctx, &gitalypb.PruneUnreachableObjectsRequest{ + Repository: repoProto, + }) + require.NoError(t, err) + + // Both commits should still exist as they are still recent. + gittest.RequireObjectExists(t, cfg, repoPath, reachableCommitID) + gittest.RequireObjectExists(t, cfg, repoPath, unreachableCommitID) + }) + + t.Run("repository with stale objects in cruft pack", func(t *testing.T) { + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + // Write two commits into the repository and create a commit-graph. The second + // commit will become unreachable and will be pruned, but will be contained in the + // commit-graph. + reachableCommitID := gittest.WriteCommit(t, cfg, repoPath) + unreachableCommitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(reachableCommitID), gittest.WithBranch("main")) + + // Set the object time of both commits to be older than our 30 minute grace period. + // As a result, they would both be removed via cruft packs if they were to be + // considered unreachable. + setObjectTime(t, repoPath, reachableCommitID, time.Now().Add(-31*time.Minute)) + setObjectTime(t, repoPath, unreachableCommitID, time.Now().Add(-31*time.Minute)) + + // Reset the "main" branch back to the initial root commit ID. + gittest.Exec(t, cfg, "-C", repoPath, "update-ref", "refs/heads/main", reachableCommitID.String()) + + // We now repack the repository with cruft packs. The result should be that we have + // two packs in the repository. + gittest.Exec(t, cfg, "-C", repoPath, "repack", "--cruft", "-d") + packfilesInfo, err := stats.PackfilesInfoForRepository(repo) + require.NoError(t, err) + require.EqualValues(t, 1, packfilesInfo.Count) + require.EqualValues(t, 1, packfilesInfo.CruftCount) + + _, err = client.PruneUnreachableObjects(ctx, &gitalypb.PruneUnreachableObjectsRequest{ + Repository: repoProto, + }) + require.NoError(t, err) + + // The reachable commit should exist, but the unreachable one shouldn't. + gittest.RequireObjectExists(t, cfg, repoPath, reachableCommitID) + if featureflag.WriteCruftPacks.IsEnabled(ctx) { + gittest.RequireObjectNotExists(t, cfg, repoPath, unreachableCommitID) + } else { + gittest.RequireObjectExists(t, cfg, repoPath, unreachableCommitID) + } + }) + t.Run("object pool", func(t *testing.T) { repo, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ RelativePath: gittest.NewObjectPoolName(t), -- cgit v1.2.3