Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-07-12 14:31:37 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-07-13 15:25:01 +0300
commitbe9c15e7ae888e63e1d6d449e59db40a2ee3951e (patch)
tree4021ce6bedce0f0248d455944741faf639a300ef
parent09ecee48e98eb9acea5aa47f4d19e11eeccbb238 (diff)
objectpool: Switch to use OptimizeRepository for pool repospks-object-pools-optimize-repository
The equivalent to a housekeeping job for object pools is our FetchIntoObjectPool RPC: it fetches all changes from the primary pool member and then optimizes the repository. Its implementation is separate from `housekeeping.OptimizeRepository()` though which creates a world where we have two parallel implementations of repository housekeeping, which goes against our principle of a single source of truth. More importantly though pool repositories may not only be optimized via FetchIntoObjectPool, but also via our nightly repository housekeeping job. So we already use both implementations to pack a repository. And sure enough, they had some subtle differences already like delta islands not being the same. Furthermore, OptimizeRepository is doing some things like keeping commit-graphs up to date that FetchIntoObjectPool doesn't. Unify the infrastructure so that we only have a one maintenance strategy to worry about: OptimizeRepository. This has multiple benefits: - We avoid any future divergence in behaviour. - We have less code to worry about. - Nightly maintenance and on-demand maintenance use the same code paths. - Commit-graphs are kept up to date again, which is something that has been broken in object pools for quite a while. - We avoid doing needless work when there is not much to be done thanks to the heuristisc of OptimizeRepository. - We avoid running repository maintenance concurrently on the same pool repository given that OptimizeRepository exits early when it sees a concurrent call to the same repository. The new strategy is guarded by a feature flag for the time being. Changelog: fixed
-rw-r--r--internal/git/objectpool/fetch.go23
-rw-r--r--internal/git/objectpool/fetch_test.go45
-rw-r--r--internal/gitaly/service/objectpool/fetch_into_object_pool_test.go38
-rw-r--r--internal/metadata/featureflag/ff_fetch_into_object_pool_optimize_repository.go10
4 files changed, 92 insertions, 24 deletions
diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go
index 33f96731c..2639cc48e 100644
--- a/internal/git/objectpool/fetch.go
+++ b/internal/git/objectpool/fetch.go
@@ -18,6 +18,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/repository"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/updateref"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
)
// FetchFromOrigin initializes the pool and fetches the objects from its origin repository
@@ -73,15 +74,21 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *localrepo.Repo
return fmt.Errorf("computing stats after fetch: %w", err)
}
- if err := o.Repo.ExecAndWait(ctx, git.SubCmd{
- Name: "pack-refs",
- Flags: []git.Option{git.Flag{Name: "--all"}},
- }); err != nil {
- return fmt.Errorf("packing pool refs: %w", err)
- }
+ if featureflag.FetchIntoObjectPoolOptimizeRepository.IsEnabled(ctx) {
+ if err := o.housekeepingManager.OptimizeRepository(ctx, o.Repo); err != nil {
+ return fmt.Errorf("optimizing pool repo: %w", err)
+ }
+ } else {
+ if err := o.Repo.ExecAndWait(ctx, git.SubCmd{
+ Name: "pack-refs",
+ Flags: []git.Option{git.Flag{Name: "--all"}},
+ }); err != nil {
+ return fmt.Errorf("packing pool refs: %w", err)
+ }
- if err := o.repackPool(ctx, o); err != nil {
- return fmt.Errorf("repacking pool: %w", err)
+ if err := o.repackPool(ctx, o); err != nil {
+ return fmt.Errorf("repacking pool: %w", err)
+ }
}
return nil
diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go
index 92e0f36c4..92f741bc5 100644
--- a/internal/git/objectpool/fetch_test.go
+++ b/internal/git/objectpool/fetch_test.go
@@ -1,9 +1,11 @@
package objectpool
import (
+ "context"
"fmt"
"os"
"path/filepath"
+ "strconv"
"strings"
"testing"
@@ -13,13 +15,17 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v15/internal/helper/text"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
)
func TestFetchFromOrigin_dangling(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginDangling)
+}
- ctx := testhelper.Context(t)
+func testFetchFromOriginDangling(t *testing.T, ctx context.Context) {
+ t.Parallel()
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -90,8 +96,11 @@ func TestFetchFromOrigin_dangling(t *testing.T) {
func TestFetchFromOrigin_fsck(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginFsck)
+}
- ctx := testhelper.Context(t)
+func testFetchFromOriginFsck(t *testing.T, ctx context.Context) {
+ t.Parallel()
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -116,8 +125,11 @@ func TestFetchFromOrigin_fsck(t *testing.T) {
func TestFetchFromOrigin_deltaIslands(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginDeltaIslands)
+}
- ctx := testhelper.Context(t)
+func testFetchFromOriginDeltaIslands(t *testing.T, ctx context.Context) {
+ t.Parallel()
cfg, pool, repoProto := setupObjectPool(t, ctx)
@@ -144,8 +156,11 @@ func TestFetchFromOrigin_deltaIslands(t *testing.T) {
func TestFetchFromOrigin_bitmapHashCache(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginBitmapHashCache)
+}
- ctx := testhelper.Context(t)
+func testFetchFromOriginBitmapHashCache(t *testing.T, ctx context.Context) {
+ t.Parallel()
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -171,8 +186,11 @@ func TestFetchFromOrigin_bitmapHashCache(t *testing.T) {
func TestFetchFromOrigin_refUpdates(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginRefUpdates)
+}
- ctx := testhelper.Context(t)
+func testFetchFromOriginRefUpdates(t *testing.T, ctx context.Context) {
+ t.Parallel()
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -197,11 +215,19 @@ func TestFetchFromOrigin_refUpdates(t *testing.T) {
"tags/v1.1.0": "646ece5cfed840eca0a4feb21bcd6a81bb19bda3",
}
- for ref, newOid := range newRefs {
- require.NotEqual(t, newOid, oldRefs[ref], "sanity check of new refs")
+ // Create a bunch of additional references. This is to trigger OptimizeRepository to indeed
+ // repack the loose references as we expect it to in this test. It's debatable whether we
+ // should test this at all here given that this is business of the housekeeping package. But
+ // it's easy enough to do, so it doesn't hurt.
+ for i := 0; i < 32; i++ {
+ newRefs[fmt.Sprintf("heads/branch-%d", i)] = gittest.WriteCommit(t, cfg, repoPath,
+ gittest.WithParents(),
+ gittest.WithMessage(strconv.Itoa(i)),
+ ).String()
}
for ref, oid := range newRefs {
+ require.NotEqual(t, oid, oldRefs[ref], "sanity check of new refs")
gittest.Exec(t, cfg, "-C", repoPath, "update-ref", "refs/"+ref, oid)
require.Equal(t, oid, resolveRef(t, cfg, repoPath, "refs/"+ref), "look up %q in source after update", ref)
}
@@ -218,8 +244,11 @@ func TestFetchFromOrigin_refUpdates(t *testing.T) {
func TestFetchFromOrigin_refs(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchFromOriginRefs)
+}
- ctx := testhelper.Context(t)
+func testFetchFromOriginRefs(t *testing.T, ctx context.Context) {
+ t.Parallel()
cfg, pool, _ := setupObjectPool(t, ctx)
poolPath := pool.FullPath()
diff --git a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
index c6539541c..df1951dba 100644
--- a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
+++ b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
@@ -1,6 +1,7 @@
package objectpool
import (
+ "context"
"os"
"path/filepath"
"testing"
@@ -17,6 +18,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testserver"
@@ -29,7 +31,13 @@ import (
)
func TestFetchIntoObjectPool_Success(t *testing.T) {
- ctx := testhelper.Context(t)
+ t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchIntoObjectPoolSuccess)
+}
+
+func testFetchIntoObjectPoolSuccess(t *testing.T, ctx context.Context) {
+ t.Parallel()
+
cfg, repo, repoPath, locator, client := setup(ctx, t)
repoCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch(t.Name()))
@@ -57,12 +65,21 @@ func TestFetchIntoObjectPool_Success(t *testing.T) {
// No problems
gittest.Exec(t, cfg, "-C", pool.FullPath(), "fsck")
- packFiles, err := filepath.Glob(filepath.Join(pool.FullPath(), "objects", "pack", "pack-*.pack"))
- require.NoError(t, err)
- require.Len(t, packFiles, 1, "ensure commits got packed")
+ // Verify that the newly written commit exists in the repository now.
+ gittest.Exec(t, cfg, "-C", pool.FullPath(), "rev-parse", "--verify", repoCommit.String())
+
+ if featureflag.FetchIntoObjectPoolOptimizeRepository.IsDisabled(ctx) {
+ // We used to verify here how objects have been packed. This doesn't make a lot of
+ // sense anymore in the new world though, where we rely on the housekeeping package
+ // to do repository maintenance for us. We thus only check for this when the feature
+ // flag is disabled.
+ packFiles, err := filepath.Glob(filepath.Join(pool.FullPath(), "objects", "pack", "pack-*.pack"))
+ require.NoError(t, err)
+ require.Len(t, packFiles, 1, "ensure commits got packed")
- packContents := gittest.Exec(t, cfg, "-C", pool.FullPath(), "verify-pack", "-v", packFiles[0])
- require.Contains(t, string(packContents), repoCommit)
+ packContents := gittest.Exec(t, cfg, "-C", pool.FullPath(), "verify-pack", "-v", packFiles[0])
+ require.Contains(t, string(packContents), repoCommit.String())
+ }
_, err = client.FetchIntoObjectPool(ctx, req)
require.NoError(t, err, "calling FetchIntoObjectPool twice should be OK")
@@ -124,8 +141,14 @@ func TestFetchIntoObjectPool_hooks(t *testing.T) {
}
func TestFetchIntoObjectPool_CollectLogStatistics(t *testing.T) {
- cfg := testcfg.Build(t)
+ t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolOptimizeRepository).Run(t, testFetchIntoObjectPoolCollectLogStatistics)
+}
+func testFetchIntoObjectPoolCollectLogStatistics(t *testing.T, ctx context.Context) {
+ t.Parallel()
+
+ cfg := testcfg.Build(t)
testcfg.BuildGitalyHooks(t, cfg)
locator := config.NewLocator(cfg)
@@ -133,7 +156,6 @@ func TestFetchIntoObjectPool_CollectLogStatistics(t *testing.T) {
logger, hook := test.NewNullLogger()
cfg.SocketPath = runObjectPoolServer(t, cfg, locator, logger)
- ctx := testhelper.Context(t)
ctx = ctxlogrus.ToContext(ctx, log.WithField("test", "logging"))
repo, _ := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
Seed: gittest.SeedGitLabTest,
diff --git a/internal/metadata/featureflag/ff_fetch_into_object_pool_optimize_repository.go b/internal/metadata/featureflag/ff_fetch_into_object_pool_optimize_repository.go
new file mode 100644
index 000000000..0d47c50df
--- /dev/null
+++ b/internal/metadata/featureflag/ff_fetch_into_object_pool_optimize_repository.go
@@ -0,0 +1,10 @@
+package featureflag
+
+// FetchIntoObjectPoolOptimizeRepository will cause FetchIntoObjectPool to use OptimizeRepository to
+// maintain the object pool instead of manually performing repository maintenance.
+var FetchIntoObjectPoolOptimizeRepository = NewFeatureFlag(
+ "fetch_into_object_pool_optimize_repository",
+ "v15.2.0",
+ "https://gitlab.com/gitlab-org/gitaly/-/issues/4342",
+ false,
+)