diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-07-12 14:31:37 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-07-13 15:25:01 +0300 |
commit | be9c15e7ae888e63e1d6d449e59db40a2ee3951e (patch) | |
tree | 4021ce6bedce0f0248d455944741faf639a300ef | |
parent | 09ecee48e98eb9acea5aa47f4d19e11eeccbb238 (diff) |
objectpool: Switch to use OptimizeRepository for pool repospks-object-pools-optimize-repository
The equivalent to a housekeeping job for object pools is our
FetchIntoObjectPool RPC: it fetches all changes from the primary pool
member and then optimizes the repository. Its implementation is separate
from `housekeeping.OptimizeRepository()` though which creates a world
where we have two parallel implementations of repository housekeeping,
which goes against our principle of a single source of truth.
More importantly though pool repositories may not only be optimized via
FetchIntoObjectPool, but also via our nightly repository housekeeping
job. So we already use both implementations to pack a repository. And
sure enough, they had some subtle differences already like delta islands
not being the same. Furthermore, OptimizeRepository is doing some things
like keeping commit-graphs up to date that FetchIntoObjectPool doesn't.
Unify the infrastructure so that we only have a one maintenance strategy
to worry about: OptimizeRepository. This has multiple benefits:
- We avoid any future divergence in behaviour.
- We have less code to worry about.
- Nightly maintenance and on-demand maintenance use the same code
paths.
- Commit-graphs are kept up to date again, which is something that
has been broken in object pools for quite a while.
- We avoid doing needless work when there is not much to be done
thanks to the heuristisc of OptimizeRepository.
- We avoid running repository maintenance concurrently on the same
pool repository given that OptimizeRepository exits early when it
sees a concurrent call to the same repository.
The new strategy is guarded by a feature flag for the time being.
Changelog: fixed
4 files changed, 92 insertions, 24 deletions
diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index 33f96731c..2639cc48e 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -18,6 +18,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v15/internal/git/repository" "gitlab.com/gitlab-org/gitaly/v15/internal/git/updateref" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" ) // FetchFromOrigin initializes the pool and fetches the objects from its origin repository @@ -73,15 +74,21 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *localrepo.Repo return fmt.Errorf("computing stats after fetch: %w", err) } - if err := o.Repo.ExecAndWait(ctx, git.SubCmd{ - Name: "pack-refs", - Flags: []git.Option{git.Flag{Name: "--all"}}, - }); err != nil { - return fmt.Errorf("packing pool refs: %w", err) - } + if featureflag.FetchIntoObjectPoolOptimizeRepository.IsEnabled(ctx) { + if err := o.housekeepingManager.OptimizeRepository(ctx, o.Repo); err != nil { + return fmt.Errorf("optimizing pool repo: %w", err) + } + } else { + if err := o.Repo.ExecAndWait(ctx, git.SubCmd{ + Name: "pack-refs", + Flags: []git.Option{git.Flag{Name: "--all"}}, + }); err != nil { + return fmt.Errorf("packing pool refs: %w", err) + } - if err := o.repackPool(ctx, o); err != nil { - return fmt.Errorf("repacking pool: %w", err) + if err := o.repackPool(ctx, o); err != nil { + return fmt.Errorf("repacking pool: %w", err) + } } return nil diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 92e0f36c4..92f741bc5 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -1,9 +1,11 @@ package objectpool import ( + "context" "fmt" "os" "path/filepath" + "strconv" "strings" "testing" @@ -13,13 +15,17 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v15/internal/helper/text" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" ) func TestFetchFromOrigin_dangling(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginDangling) +} - ctx := testhelper.Context(t) +func testFetchFromOriginDangling(t *testing.T, ctx context.Context) { + t.Parallel() cfg, pool, repoProto := setupObjectPool(t, ctx) repo := localrepo.NewTestRepo(t, cfg, repoProto) @@ -90,8 +96,11 @@ func TestFetchFromOrigin_dangling(t *testing.T) { func TestFetchFromOrigin_fsck(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginFsck) +} - ctx := testhelper.Context(t) +func testFetchFromOriginFsck(t *testing.T, ctx context.Context) { + t.Parallel() cfg, pool, repoProto := setupObjectPool(t, ctx) repo := localrepo.NewTestRepo(t, cfg, repoProto) @@ -116,8 +125,11 @@ func TestFetchFromOrigin_fsck(t *testing.T) { func TestFetchFromOrigin_deltaIslands(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginDeltaIslands) +} - ctx := testhelper.Context(t) +func testFetchFromOriginDeltaIslands(t *testing.T, ctx context.Context) { + t.Parallel() cfg, pool, repoProto := setupObjectPool(t, ctx) @@ -144,8 +156,11 @@ func TestFetchFromOrigin_deltaIslands(t *testing.T) { func TestFetchFromOrigin_bitmapHashCache(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginBitmapHashCache) +} - ctx := testhelper.Context(t) +func testFetchFromOriginBitmapHashCache(t *testing.T, ctx context.Context) { + t.Parallel() cfg, pool, repoProto := setupObjectPool(t, ctx) repo := localrepo.NewTestRepo(t, cfg, repoProto) @@ -171,8 +186,11 @@ func TestFetchFromOrigin_bitmapHashCache(t *testing.T) { func TestFetchFromOrigin_refUpdates(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginRefUpdates) +} - ctx := testhelper.Context(t) +func testFetchFromOriginRefUpdates(t *testing.T, ctx context.Context) { + t.Parallel() cfg, pool, repoProto := setupObjectPool(t, ctx) repo := localrepo.NewTestRepo(t, cfg, repoProto) @@ -197,11 +215,19 @@ func TestFetchFromOrigin_refUpdates(t *testing.T) { "tags/v1.1.0": "646ece5cfed840eca0a4feb21bcd6a81bb19bda3", } - for ref, newOid := range newRefs { - require.NotEqual(t, newOid, oldRefs[ref], "sanity check of new refs") + // Create a bunch of additional references. This is to trigger OptimizeRepository to indeed + // repack the loose references as we expect it to in this test. It's debatable whether we + // should test this at all here given that this is business of the housekeeping package. But + // it's easy enough to do, so it doesn't hurt. + for i := 0; i < 32; i++ { + newRefs[fmt.Sprintf("heads/branch-%d", i)] = gittest.WriteCommit(t, cfg, repoPath, + gittest.WithParents(), + gittest.WithMessage(strconv.Itoa(i)), + ).String() } for ref, oid := range newRefs { + require.NotEqual(t, oid, oldRefs[ref], "sanity check of new refs") gittest.Exec(t, cfg, "-C", repoPath, "update-ref", "refs/"+ref, oid) require.Equal(t, oid, resolveRef(t, cfg, repoPath, "refs/"+ref), "look up %q in source after update", ref) } @@ -218,8 +244,11 @@ func TestFetchFromOrigin_refUpdates(t *testing.T) { func TestFetchFromOrigin_refs(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginRefs) +} - ctx := testhelper.Context(t) +func testFetchFromOriginRefs(t *testing.T, ctx context.Context) { + t.Parallel() cfg, pool, _ := setupObjectPool(t, ctx) poolPath := pool.FullPath() diff --git a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go index c6539541c..df1951dba 100644 --- a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go +++ b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go @@ -1,6 +1,7 @@ package objectpool import ( + "context" "os" "path/filepath" "testing" @@ -17,6 +18,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testserver" @@ -29,7 +31,13 @@ import ( ) func TestFetchIntoObjectPool_Success(t *testing.T) { - ctx := testhelper.Context(t) + t.Parallel() + testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchIntoObjectPoolSuccess) +} + +func testFetchIntoObjectPoolSuccess(t *testing.T, ctx context.Context) { + t.Parallel() + cfg, repo, repoPath, locator, client := setup(ctx, t) repoCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch(t.Name())) @@ -57,12 +65,21 @@ func TestFetchIntoObjectPool_Success(t *testing.T) { // No problems gittest.Exec(t, cfg, "-C", pool.FullPath(), "fsck") - packFiles, err := filepath.Glob(filepath.Join(pool.FullPath(), "objects", "pack", "pack-*.pack")) - require.NoError(t, err) - require.Len(t, packFiles, 1, "ensure commits got packed") + // Verify that the newly written commit exists in the repository now. + gittest.Exec(t, cfg, "-C", pool.FullPath(), "rev-parse", "--verify", repoCommit.String()) + + if featureflag.FetchIntoObjectPoolOptimizeRepository.IsDisabled(ctx) { + // We used to verify here how objects have been packed. This doesn't make a lot of + // sense anymore in the new world though, where we rely on the housekeeping package + // to do repository maintenance for us. We thus only check for this when the feature + // flag is disabled. + packFiles, err := filepath.Glob(filepath.Join(pool.FullPath(), "objects", "pack", "pack-*.pack")) + require.NoError(t, err) + require.Len(t, packFiles, 1, "ensure commits got packed") - packContents := gittest.Exec(t, cfg, "-C", pool.FullPath(), "verify-pack", "-v", packFiles[0]) - require.Contains(t, string(packContents), repoCommit) + packContents := gittest.Exec(t, cfg, "-C", pool.FullPath(), "verify-pack", "-v", packFiles[0]) + require.Contains(t, string(packContents), repoCommit.String()) + } _, err = client.FetchIntoObjectPool(ctx, req) require.NoError(t, err, "calling FetchIntoObjectPool twice should be OK") @@ -124,8 +141,14 @@ func TestFetchIntoObjectPool_hooks(t *testing.T) { } func TestFetchIntoObjectPool_CollectLogStatistics(t *testing.T) { - cfg := testcfg.Build(t) + t.Parallel() + testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchIntoObjectPoolCollectLogStatistics) +} +func testFetchIntoObjectPoolCollectLogStatistics(t *testing.T, ctx context.Context) { + t.Parallel() + + cfg := testcfg.Build(t) testcfg.BuildGitalyHooks(t, cfg) locator := config.NewLocator(cfg) @@ -133,7 +156,6 @@ func TestFetchIntoObjectPool_CollectLogStatistics(t *testing.T) { logger, hook := test.NewNullLogger() cfg.SocketPath = runObjectPoolServer(t, cfg, locator, logger) - ctx := testhelper.Context(t) ctx = ctxlogrus.ToContext(ctx, log.WithField("test", "logging")) repo, _ := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{ Seed: gittest.SeedGitLabTest, diff --git a/internal/metadata/featureflag/ff_fetch_into_object_pool_optimize_repository.go b/internal/metadata/featureflag/ff_fetch_into_object_pool_optimize_repository.go new file mode 100644 index 000000000..0d47c50df --- /dev/null +++ b/internal/metadata/featureflag/ff_fetch_into_object_pool_optimize_repository.go @@ -0,0 +1,10 @@ +package featureflag + +// FetchIntoObjectPoolOptimizeRepository will cause FetchIntoObjectPool to use OptimizeRepository to +// maintain the object pool instead of manually performing repository maintenance. +var FetchIntoObjectPoolOptimizeRepository = NewFeatureFlag( + "fetch_into_object_pool_optimize_repository", + "v15.2.0", + "https://gitlab.com/gitlab-org/gitaly/-/issues/4342", + false, +) |