diff options
author | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-03-22 08:37:25 +0300 |
---|---|---|
committer | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-03-22 08:37:25 +0300 |
commit | c84d44f21189bbe95d5878bfb3c703915da80ee7 (patch) | |
tree | ee74fd77e0b794df583bc5faf952935e9d4c15cf | |
parent | 82b151a8899eb49d04600ec17c84042736949f48 (diff) | |
parent | bd98b50cd249a250b35272c62ece295a7d68ad77 (diff) |
Merge branch 'pks-prune-unreachable-objects-with-cruft-packs' into 'master'qmnguyen0711/make-distributed-tracing-less-global
repository: Consider cruft packs in PruneUnreachableObjects
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5526
Merged-by: Quang-Minh Nguyen <qmnguyen@gitlab.com>
Approved-by: Justin Tobler <jtobler@gitlab.com>
Reviewed-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Reviewed-by: Justin Tobler <jtobler@gitlab.com>
Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r-- | internal/gitaly/service/repository/prune_unreachable_objects.go | 31 | ||||
-rw-r--r-- | internal/gitaly/service/repository/prune_unreachable_objects_test.go | 95 |
2 files changed, 124 insertions, 2 deletions
diff --git a/internal/gitaly/service/repository/prune_unreachable_objects.go b/internal/gitaly/service/repository/prune_unreachable_objects.go index 8706713b1..49904e0a6 100644 --- a/internal/gitaly/service/repository/prune_unreachable_objects.go +++ b/internal/gitaly/service/repository/prune_unreachable_objects.go @@ -2,11 +2,13 @@ package repository import ( "context" + "fmt" "time" "gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" ) @@ -32,12 +34,39 @@ func (s *server) PruneUnreachableObjects( return nil, err } + // Verify that the repository is not an object pool. Pruning objects in object pools is not + // a safe operation and is likely to cause corruption of object pool members. + repoInfo, err := stats.RepositoryInfoForRepository(repo) + if err != nil { + return nil, fmt.Errorf("deriving repository info: %w", err) + } + if repoInfo.IsObjectPool { + return nil, structerr.NewInvalidArgument("pruning objects for object pool") + } + + expireBefore := time.Now().Add(-30 * time.Minute) + + // We need to prune loose unreachable objects that exist in the repository. if err := housekeeping.PruneObjects(ctx, repo, housekeeping.PruneObjectsConfig{ - ExpireBefore: time.Now().Add(-30 * time.Minute), + ExpireBefore: expireBefore, }); err != nil { return nil, structerr.NewInternal("pruning objects: %w", err) } + // But we also have to prune unreachable objects part of cruft packs. The only way to do + // that is to do a full repack. So unfortunately, this is quite expensive. + if featureflag.WriteCruftPacks.IsEnabled(ctx) { + if err := housekeeping.RepackObjects(ctx, repo, housekeeping.RepackObjectsConfig{ + FullRepack: true, + WriteCruftPack: true, + WriteMultiPackIndex: true, + WriteBitmap: len(repoInfo.Alternates) == 0, + CruftExpireBefore: expireBefore, + }); err != nil { + return nil, structerr.NewInternal("repacking objects: %w", err) + } + } + // Rewrite the commit-graph so that it doesn't contain references to pruned commits // anymore. if err := housekeeping.WriteCommitGraph(ctx, repo, housekeeping.WriteCommitGraphConfig{ diff --git a/internal/gitaly/service/repository/prune_unreachable_objects_test.go b/internal/gitaly/service/repository/prune_unreachable_objects_test.go index d4badb60d..cf2510427 100644 --- a/internal/gitaly/service/repository/prune_unreachable_objects_test.go +++ b/internal/gitaly/service/repository/prune_unreachable_objects_test.go @@ -3,6 +3,7 @@ package repository import ( + "context" "os" "path/filepath" "testing" @@ -11,6 +12,9 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" @@ -18,8 +22,12 @@ import ( func TestPruneUnreachableObjects(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.WriteCruftPacks).Run(t, testPruneUnreachableObjects) +} + +func testPruneUnreachableObjects(t *testing.T, ctx context.Context) { + t.Parallel() - ctx := testhelper.Context(t) cfg, client := setupRepositoryServiceWithoutRepo(t) setObjectTime := func(t *testing.T, repoPath string, objectID git.ObjectID, when time.Time) { @@ -162,4 +170,89 @@ func TestPruneUnreachableObjects(t *testing.T) { // that it doesn't refer to the pruned commit anymore. gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "verify") }) + + t.Run("repository with recent objects in cruft pack", func(t *testing.T) { + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + // Write two commits into the repository and create a commit-graph. The second + // commit will become unreachable and will be pruned, but will be contained in the + // commit-graph. + reachableCommitID := gittest.WriteCommit(t, cfg, repoPath) + unreachableCommitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(reachableCommitID), gittest.WithBranch("main")) + + // Reset the "main" branch back to the initial root commit ID. + gittest.Exec(t, cfg, "-C", repoPath, "update-ref", "refs/heads/main", reachableCommitID.String()) + + // We now repack the repository with cruft packs. The result should be that we have + // two packs in the repository. + gittest.Exec(t, cfg, "-C", repoPath, "repack", "--cruft", "-d") + packfilesInfo, err := stats.PackfilesInfoForRepository(repo) + require.NoError(t, err) + require.EqualValues(t, 1, packfilesInfo.Count) + require.EqualValues(t, 1, packfilesInfo.CruftCount) + + _, err = client.PruneUnreachableObjects(ctx, &gitalypb.PruneUnreachableObjectsRequest{ + Repository: repoProto, + }) + require.NoError(t, err) + + // Both commits should still exist as they are still recent. + gittest.RequireObjectExists(t, cfg, repoPath, reachableCommitID) + gittest.RequireObjectExists(t, cfg, repoPath, unreachableCommitID) + }) + + t.Run("repository with stale objects in cruft pack", func(t *testing.T) { + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + // Write two commits into the repository and create a commit-graph. The second + // commit will become unreachable and will be pruned, but will be contained in the + // commit-graph. + reachableCommitID := gittest.WriteCommit(t, cfg, repoPath) + unreachableCommitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(reachableCommitID), gittest.WithBranch("main")) + + // Set the object time of both commits to be older than our 30 minute grace period. + // As a result, they would both be removed via cruft packs if they were to be + // considered unreachable. + setObjectTime(t, repoPath, reachableCommitID, time.Now().Add(-31*time.Minute)) + setObjectTime(t, repoPath, unreachableCommitID, time.Now().Add(-31*time.Minute)) + + // Reset the "main" branch back to the initial root commit ID. + gittest.Exec(t, cfg, "-C", repoPath, "update-ref", "refs/heads/main", reachableCommitID.String()) + + // We now repack the repository with cruft packs. The result should be that we have + // two packs in the repository. + gittest.Exec(t, cfg, "-C", repoPath, "repack", "--cruft", "-d") + packfilesInfo, err := stats.PackfilesInfoForRepository(repo) + require.NoError(t, err) + require.EqualValues(t, 1, packfilesInfo.Count) + require.EqualValues(t, 1, packfilesInfo.CruftCount) + + _, err = client.PruneUnreachableObjects(ctx, &gitalypb.PruneUnreachableObjectsRequest{ + Repository: repoProto, + }) + require.NoError(t, err) + + // The reachable commit should exist, but the unreachable one shouldn't. + gittest.RequireObjectExists(t, cfg, repoPath, reachableCommitID) + if featureflag.WriteCruftPacks.IsEnabled(ctx) { + gittest.RequireObjectNotExists(t, cfg, repoPath, unreachableCommitID) + } else { + gittest.RequireObjectExists(t, cfg, repoPath, unreachableCommitID) + } + }) + + t.Run("object pool", func(t *testing.T) { + repo, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + RelativePath: gittest.NewObjectPoolName(t), + }) + + _, err := client.PruneUnreachableObjects(ctx, &gitalypb.PruneUnreachableObjectsRequest{ + Repository: repo, + }) + testhelper.RequireGrpcError(t, + structerr.NewInvalidArgument("pruning objects for object pool"), err, + ) + }) } |