Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--internal/git/objectpool/fetch.go141
-rw-r--r--internal/git/objectpool/fetch_test.go38
-rw-r--r--internal/gitaly/service/objectpool/fetch_into_object_pool_test.go91
-rw-r--r--internal/metadata/featureflag/ff_fetch_into_object_pool_prune_refs.go11
4 files changed, 256 insertions, 25 deletions
diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go
index 2c491d9a0..0ee7d6d0a 100644
--- a/internal/git/objectpool/fetch.go
+++ b/internal/git/objectpool/fetch.go
@@ -16,8 +16,13 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/updateref"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/transaction/voting"
)
+var objectPoolRefspec = fmt.Sprintf("+refs/*:%s/*", git.ObjectPoolRefNamespace)
+
// FetchFromOrigin initializes the pool and fetches the objects from its origin repository
func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *localrepo.Repo) error {
if err := o.Init(ctx); err != nil {
@@ -37,7 +42,28 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *localrepo.Repo
return fmt.Errorf("computing stats before fetch: %w", err)
}
- refSpec := fmt.Sprintf("+refs/*:%s/*", git.ObjectPoolRefNamespace)
+ // Ideally we wouldn't want to prune old references at all so that we can keep alive all
+ // objects without having to create loads of dangling references. But unfortunately keeping
+ // around old refs can lead to D/F conflicts between old references that have since
+ // been deleted in the pool and new references that have been added in the pool member we're
+ // fetching from. E.g. if we have the old reference `refs/heads/branch` and the pool member
+ // has replaced that since with a new reference `refs/heads/branch/conflict` then
+ // the fetch would now always fail because of that conflict.
+ //
+ // Due to the lack of an alternative to resolve that conflict we are thus forced to enable
+ // pruning. This isn't too bad given that we know to keep alive the old objects via dangling
+ // refs anyway, but I'd sleep easier if we didn't have to do this.
+ //
+ // Note that we need to perform the pruning separately from the fetch: if the fetch is using
+ // `--atomic` and `--prune` together then it still wouldn't be able to recover from the D/F
+ // conflict. So we first to a preliminary prune that only prunes refs without fetching
+ // objects yet to avoid that scenario.
+ if featureflag.FetchIntoObjectPoolPruneRefs.IsEnabled(ctx) {
+ if err := o.pruneReferences(ctx, origin); err != nil {
+ return fmt.Errorf("pruning references: %w", err)
+ }
+ }
+
var stderr bytes.Buffer
if err := o.Repo.ExecAndWait(ctx,
git.SubCmd{
@@ -54,7 +80,7 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *localrepo.Repo
// references.
git.Flag{Name: "--no-write-fetch-head"},
},
- Args: []string{originPath, refSpec},
+ Args: []string{originPath, objectPoolRefspec},
},
git.WithRefTxHook(o.Repo),
git.WithStderr(&stderr),
@@ -78,6 +104,117 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *localrepo.Repo
return nil
}
+// pruneReferences prunes any references that have been deleted in the origin repository.
+func (o *ObjectPool) pruneReferences(ctx context.Context, origin *localrepo.Repo) error {
+ originPath, err := origin.Path()
+ if err != nil {
+ return fmt.Errorf("computing origin repo's path: %w", err)
+ }
+
+ // Ideally, we'd just use `git remote prune` directly. But unfortunately, this command does
+ // not support atomic updates, but will instead use a separate reference transaction for
+ // updating the packed-refs file and for updating each of the loose references. This can be
+ // really expensive in case we are about to prune a lot of references given that every time,
+ // the reference-transaction hook needs to vote on the deletion and reach quorum.
+ //
+ // Instead we ask for a dry-run, parse the output and queue up every reference into a
+ // git-update-ref(1) process. While ugly, it works around the performance issues.
+ prune, err := o.Repo.Exec(ctx,
+ git.SubSubCmd{
+ Name: "remote",
+ Action: "prune",
+ Args: []string{"origin"},
+ Flags: []git.Option{
+ git.Flag{Name: "--dry-run"},
+ },
+ },
+ git.WithConfig(git.ConfigPair{Key: "remote.origin.url", Value: originPath}),
+ git.WithConfig(git.ConfigPair{Key: "remote.origin.fetch", Value: objectPoolRefspec}),
+ // This is a dry-run, only, so we don't have to enable hooks.
+ git.WithDisabledHooks(),
+ )
+ if err != nil {
+ return fmt.Errorf("spawning prune: %w", err)
+ }
+
+ updater, err := updateref.New(ctx, o.Repo)
+ if err != nil {
+ return fmt.Errorf("spawning updater: %w", err)
+ }
+
+ // We need to manually compute a vote because all deletions we queue up here are
+ // force-deletions. We are forced to filter out force-deletions because these may also
+ // happen when evicting references from the packed-refs file.
+ voteHash := voting.NewVoteHash()
+
+ scanner := bufio.NewScanner(prune)
+ for scanner.Scan() {
+ line := scanner.Bytes()
+
+ // We need to skip the first two lines that represent the header of git-remote(1)'s
+ // output. While we should ideally just use a state machine here, it doesn't feel
+ // worth it given that the output is comparatively simple and given that the pruned
+ // branches are distinguished by a special prefix.
+ switch {
+ case bytes.Equal(line, []byte("Pruning origin")):
+ continue
+ case bytes.HasPrefix(line, []byte("URL: ")):
+ continue
+ case bytes.HasPrefix(line, []byte(" * [would prune] ")):
+ // The references announced by git-remote(1) only have the remote's name as
+ // prefix, which is "origin". We thus have to reassemble the complete name
+ // of every reference here.
+ deletedRef := "refs/remotes/" + string(bytes.TrimPrefix(line, []byte(" * [would prune] ")))
+
+ if _, err := io.Copy(voteHash, strings.NewReader(fmt.Sprintf("%[1]s %[1]s %s\n", git.ObjectHashSHA1.ZeroOID, deletedRef))); err != nil {
+ return fmt.Errorf("hashing reference deletion: %w", err)
+ }
+
+ if err := updater.Delete(git.ReferenceName(deletedRef)); err != nil {
+ return fmt.Errorf("queueing ref for deletion: %w", err)
+ }
+ default:
+ return fmt.Errorf("unexpected line: %q", line)
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ return fmt.Errorf("scanning deleted refs: %w", err)
+ }
+
+ if err := prune.Wait(); err != nil {
+ return fmt.Errorf("waiting for prune: %w", err)
+ }
+
+ vote, err := voteHash.Vote()
+ if err != nil {
+ return fmt.Errorf("computing vote: %w", err)
+ }
+
+ // Prepare references so that they're locked and cannot be written by any concurrent
+ // processes. This also verifies that we can indeed delete the references.
+ if err := updater.Prepare(); err != nil {
+ return fmt.Errorf("preparing deletion of references: %w", err)
+ }
+
+ // Vote on the references we're about to delete.
+ if err := transaction.VoteOnContext(ctx, o.txManager, vote, voting.Prepared); err != nil {
+ return fmt.Errorf("preparational vote on pruned references: %w", err)
+ }
+
+ // Commit the pruned references to disk so that the change gets applied.
+ if err := updater.Commit(); err != nil {
+ return fmt.Errorf("deleting references: %w", err)
+ }
+
+ // And then confirm that we actually deleted the references.
+ if err := transaction.VoteOnContext(ctx, o.txManager, vote, voting.Committed); err != nil {
+ return fmt.Errorf("preparational vote on pruned references: %w", err)
+ }
+
+ return nil
+}
+
const danglingObjectNamespace = "refs/dangling/"
// rescueDanglingObjects creates refs for all dangling objects if finds
diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go
index 53fc9a3e6..2df9a769d 100644
--- a/internal/git/objectpool/fetch_test.go
+++ b/internal/git/objectpool/fetch_test.go
@@ -3,6 +3,7 @@
package objectpool
import (
+ "context"
"fmt"
"os"
"path/filepath"
@@ -16,13 +17,18 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v15/internal/helper/text"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
)
func TestFetchFromOrigin_dangling(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchFromOriginDangling)
+}
+
+func testFetchFromOriginDangling(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -92,8 +98,12 @@ func TestFetchFromOrigin_dangling(t *testing.T) {
func TestFetchFromOrigin_fsck(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchFromOriginFsck)
+}
+
+func testFetchFromOriginFsck(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
repoPath := filepath.Join(cfg.Storages[0].Path, repo.GetRelativePath())
@@ -117,8 +127,12 @@ func TestFetchFromOrigin_fsck(t *testing.T) {
func TestFetchFromOrigin_deltaIslands(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchFromOriginDeltaIslands)
+}
+
+func testFetchFromOriginDeltaIslands(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -138,8 +152,12 @@ func TestFetchFromOrigin_deltaIslands(t *testing.T) {
func TestFetchFromOrigin_bitmapHashCache(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchFromOriginBitmapHashCache)
+}
+
+func testFetchFromOriginBitmapHashCache(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -164,8 +182,12 @@ func TestFetchFromOrigin_bitmapHashCache(t *testing.T) {
func TestFetchFromOrigin_refUpdates(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchFromOriginRefUpdates)
+}
+
+func testFetchFromOriginRefUpdates(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg, pool, repoProto := setupObjectPool(t, ctx)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
repoPath := filepath.Join(cfg.Storages[0].Path, repo.GetRelativePath())
@@ -217,8 +239,12 @@ func TestFetchFromOrigin_refUpdates(t *testing.T) {
func TestFetchFromOrigin_refs(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchFromOriginRefs)
+}
+
+func testFetchFromOriginRefs(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg, pool, _ := setupObjectPool(t, ctx)
poolPath := pool.FullPath()
diff --git a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
index 6018ae32b..03262537b 100644
--- a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
+++ b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
@@ -24,6 +24,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction"
"gitlab.com/gitlab-org/gitaly/v15/internal/helper"
"gitlab.com/gitlab-org/gitaly/v15/internal/metadata"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testserver"
@@ -39,8 +40,12 @@ import (
func TestFetchIntoObjectPool_Success(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchIntoObjectPoolSuccess)
+}
+
+func testFetchIntoObjectPoolSuccess(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg, repo, repoPath, locator, client := setup(ctx, t)
repoCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch(t.Name()))
@@ -93,8 +98,11 @@ func TestFetchIntoObjectPool_Success(t *testing.T) {
func TestFetchIntoObjectPool_transactional(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchIntoObjectPoolTransactional)
+}
- ctx := testhelper.Context(t)
+func testFetchIntoObjectPoolTransactional(t *testing.T, ctx context.Context) {
+ t.Parallel()
var votes []voting.Vote
var votesMutex sync.Mutex
@@ -155,9 +163,17 @@ func TestFetchIntoObjectPool_transactional(t *testing.T) {
})
require.NoError(t, err)
- // This is a bug: we should always perform transactional voting even when nothing
- // has changed.
- require.Nil(t, votes)
+ if featureflag.FetchIntoObjectPoolPruneRefs.IsEnabled(ctx) {
+ require.Equal(t, []voting.Vote{
+ // We expect to see two votes that demonstrate we're voting on no deleted
+ // references.
+ voting.VoteFromData(nil), voting.VoteFromData(nil),
+ // It is a bug though that we don't have a vote on the unchanged references
+ // in git-fetch(1).
+ }, votes)
+ } else {
+ require.Nil(t, votes)
+ }
})
t.Run("with a new reference", func(t *testing.T) {
@@ -173,33 +189,61 @@ func TestFetchIntoObjectPool_transactional(t *testing.T) {
})
require.NoError(t, err)
- // We expect a single vote on the reference we're about to pull in here.
vote := voting.VoteFromData([]byte(fmt.Sprintf(
"%s %s refs/remotes/origin/heads/new-branch\n", git.ObjectHashSHA1.ZeroOID, repoCommit,
)))
- require.Equal(t, []voting.Vote{vote, vote}, votes)
+ if featureflag.FetchIntoObjectPoolPruneRefs.IsEnabled(ctx) {
+ require.Equal(t, []voting.Vote{
+ // The first two votes stem from the fact that we're voting on no
+ // deleted references.
+ voting.VoteFromData(nil), voting.VoteFromData(nil),
+ // And the other two votes are from the new branch we pull in.
+ vote, vote,
+ }, votes)
+ } else {
+ require.Equal(t, []voting.Vote{vote, vote}, votes)
+ }
})
t.Run("with a stale reference in pool", func(t *testing.T) {
votes = nil
+ reference := "refs/remotes/origin/heads/to-be-pruned"
+
// Create a commit in the pool repository itself. Right now, we don't touch this
// commit at all, but this will change in one of the next commits.
- gittest.WriteCommit(t, cfg, pool.FullPath(), gittest.WithParents(), gittest.WithReference("refs/remotes/origin/to-be-pruned"))
+ gittest.WriteCommit(t, cfg, pool.FullPath(), gittest.WithParents(), gittest.WithReference(reference))
_, err = client.FetchIntoObjectPool(ctx, &gitalypb.FetchIntoObjectPoolRequest{
ObjectPool: pool.ToProto(),
Origin: repo,
})
require.NoError(t, err)
- require.Nil(t, votes)
+
+ if featureflag.FetchIntoObjectPoolPruneRefs.IsEnabled(ctx) {
+ // We expect a single vote on the reference we have deleted.
+ vote := voting.VoteFromData([]byte(fmt.Sprintf(
+ "%[1]s %[1]s %s\n", git.ObjectHashSHA1.ZeroOID, reference,
+ )))
+ require.Equal(t, []voting.Vote{vote, vote}, votes)
+ } else {
+ require.Nil(t, votes)
+ }
+
+ exists, err := pool.Repo.HasRevision(ctx, git.Revision(reference))
+ require.NoError(t, err)
+ require.Equal(t, exists, featureflag.FetchIntoObjectPoolPruneRefs.IsDisabled(ctx))
})
}
func TestFetchIntoObjectPool_CollectLogStatistics(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchIntoObjectPoolCollectLogStatistics)
+}
+
+func testFetchIntoObjectPoolCollectLogStatistics(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg := testcfg.Build(t)
testcfg.BuildGitalyHooks(t, cfg)
@@ -327,8 +371,12 @@ func TestFetchIntoObjectPool_Failure(t *testing.T) {
func TestFetchIntoObjectPool_dfConflict(t *testing.T) {
t.Parallel()
+ testhelper.NewFeatureSets(featureflag.FetchIntoObjectPoolPruneRefs).Run(t, testFetchIntoObjectPoolDfConflict)
+}
+
+func testFetchIntoObjectPoolDfConflict(t *testing.T, ctx context.Context) {
+ t.Parallel()
- ctx := testhelper.Context(t)
cfg, repo, repoPath, _, client := setup(ctx, t)
pool := initObjectPool(t, cfg, cfg.Storages[0])
@@ -369,11 +417,20 @@ func TestFetchIntoObjectPool_dfConflict(t *testing.T) {
ObjectPool: pool.ToProto(),
Origin: repo,
})
+ if featureflag.FetchIntoObjectPoolPruneRefs.IsEnabled(ctx) {
+ require.NoError(t, err)
- // But right now it fails due to a bug.
- testhelper.RequireGrpcError(t, helper.ErrInternalf(
- "fetch into object pool: exit status %d, stderr: %q",
- expectedExitStatus,
- "error: cannot lock ref 'refs/remotes/origin/heads/branch/conflict': 'refs/remotes/origin/heads/branch' exists; cannot create 'refs/remotes/origin/heads/branch/conflict'\n",
- ), err)
+ poolPath, err := config.NewLocator(cfg).GetRepoPath(gittest.RewrittenRepository(ctx, t, cfg, pool.ToProto().GetRepository()))
+ require.NoError(t, err)
+
+ // Verify that the conflicting reference exists now.
+ gittest.Exec(t, cfg, "-C", poolPath, "rev-parse", "refs/remotes/origin/heads/branch/conflict")
+ } else {
+ // But right now it fails due to a bug.
+ testhelper.RequireGrpcError(t, helper.ErrInternalf(
+ "fetch into object pool: exit status %d, stderr: %q",
+ expectedExitStatus,
+ "error: cannot lock ref 'refs/remotes/origin/heads/branch/conflict': 'refs/remotes/origin/heads/branch' exists; cannot create 'refs/remotes/origin/heads/branch/conflict'\n",
+ ), err)
+ }
}
diff --git a/internal/metadata/featureflag/ff_fetch_into_object_pool_prune_refs.go b/internal/metadata/featureflag/ff_fetch_into_object_pool_prune_refs.go
new file mode 100644
index 000000000..21d8eb3f0
--- /dev/null
+++ b/internal/metadata/featureflag/ff_fetch_into_object_pool_prune_refs.go
@@ -0,0 +1,11 @@
+package featureflag
+
+// FetchIntoObjectPoolPruneRefs enables pruning of references in object pools. This is required in
+// order to fix cases where updating pools doesn't work anymore due to preexisting references
+// conflicting with new references in the pool member.
+var FetchIntoObjectPoolPruneRefs = NewFeatureFlag(
+ "fetch_into_object_pool_prune_refs",
+ "v15.3.0",
+ "https://gitlab.com/gitlab-org/gitaly/-/issues/4394",
+ false,
+)