diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-11 11:12:15 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-15 10:20:59 +0300 |
commit | 88298a9cdb209dcd435244197347299b013e65f7 (patch) | |
tree | 89c9607c1c51c13dbefbb662ccfe10e22e65e9b5 | |
parent | 7e4ced45a42379d240661115c613912d318a1de1 (diff) |
repository: Fix indeterministic voting when creating new repospks-repository-creation-fix-indeterministic-voting
When creating repositories we use transactional voting to determine that
the repositories have been created the same on all nodes part of the
transaction. This voting happens after we have seeded the repository,
and the vote is computed by walking through the repository's directory
and hashing all its files. We need to be careful though to skip files
which we know to be indeterministic:
- FETCH_HEAD may contain URLs which are different for each of the
nodes.
- Object packfiles contained in the object database are not
deterministic, mostly because it may use multiple threads to
compute deltas.
Luckily, we do not have to rely on either of both types of files in
order to ensure that the user-visible state of the repository is the
same, so we can indeed just skip them.
While we already have the logic to skip these files, this logic didn't
work alright because we embarassingly forgot to actually return
`fs.SkipDir` in case we see the object directory. So even though we
thought we skipped these files, in reality we didn't.
This bug has been manifesting in production in form of CreateFork, which
regularly fails to reach quorum at random on a subset of nodes. The root
cause here is that we use git-clone(1) to seed repository contents of
the fork, which triggers exactly the case of indeterministic packfiles
noted above. So any successful CreateFork RPC call really only succeeded
by pure luck.
Fix this issue by correctly skipping over "object" directories. While at
it, fix how we skip over FETCH_HEAD by returning `nil`: it's a file and
not a directory, so it doesn't make much sense to return `fs.SkipDir`.
Changelog: fixed
-rw-r--r-- | internal/gitaly/service/repository/create_repository_from_bundle_test.go | 4 | ||||
-rw-r--r-- | internal/gitaly/service/repository/util.go | 3 | ||||
-rw-r--r-- | internal/gitaly/service/repository/util_test.go | 46 |
3 files changed, 50 insertions, 3 deletions
diff --git a/internal/gitaly/service/repository/create_repository_from_bundle_test.go b/internal/gitaly/service/repository/create_repository_from_bundle_test.go index 8f544432e..4e47e9ec3 100644 --- a/internal/gitaly/service/repository/create_repository_from_bundle_test.go +++ b/internal/gitaly/service/repository/create_repository_from_bundle_test.go @@ -151,8 +151,8 @@ func TestCreateRepositoryFromBundle_transactional(t *testing.T) { createVote("47553c06f575f757ad56ef3216c59804b72aa4a6", voting.Prepared), createVote("47553c06f575f757ad56ef3216c59804b72aa4a6", voting.Committed), // And this is the manual votes we compute by walking the repository. - createVote("da39a3ee5e6b4b0d3255bfef95601890afd80709", voting.Prepared), - createVote("da39a3ee5e6b4b0d3255bfef95601890afd80709", voting.Committed), + createVote("5947862798db146701879742c0d8fd988ca37797", voting.Prepared), + createVote("5947862798db146701879742c0d8fd988ca37797", voting.Committed), }, txManager.Votes()) } diff --git a/internal/gitaly/service/repository/util.go b/internal/gitaly/service/repository/util.go index 97a96de28..852995543 100644 --- a/internal/gitaly/service/repository/util.go +++ b/internal/gitaly/service/repository/util.go @@ -124,11 +124,12 @@ func (s *server) createRepository( // The way packfiles are generated may not be deterministic, so we skip over the // object database. case filepath.Join(newRepoDir.Path(), "objects"): + return fs.SkipDir // FETCH_HEAD refers to the remote we're fetching from. This URL may not be // deterministic, e.g. when fetching from a temporary file like we do in // CreateRepositoryFromBundle. case filepath.Join(newRepoDir.Path(), "FETCH_HEAD"): - return fs.SkipDir + return nil } // We do not care about directories. diff --git a/internal/gitaly/service/repository/util_test.go b/internal/gitaly/service/repository/util_test.go index 17773d5a8..ffbb80001 100644 --- a/internal/gitaly/service/repository/util_test.go +++ b/internal/gitaly/service/repository/util_test.go @@ -206,6 +206,52 @@ func TestCreateRepository(t *testing.T) { }, expectedErr: fmt.Errorf("locking repository: %w", errors.New("file already locked")), }, + { + desc: "vote is deterministic", + transactional: true, + setup: func(t *testing.T, repo *gitalypb.Repository, repoPath string) { + txManager.VoteFn = func(_ context.Context, _ txinfo.Transaction, vote voting.Vote, _ voting.Phase) error { + require.Equal(t, voting.VoteFromData([]byte("headcfgfoo")), vote) + return nil + } + }, + seed: func(t *testing.T, repo *gitalypb.Repository, repoPath string) error { + // Remove the repository first so we can start from a clean state. + require.NoError(t, os.RemoveAll(repoPath)) + require.NoError(t, os.Mkdir(repoPath, 0o777)) + + // Objects and FETCH_HEAD should both be ignored. They may contain + // indeterministic data that's different across replicas and would + // thus cause us to not reach quorum. + require.NoError(t, os.Mkdir(filepath.Join(repoPath, "objects"), 0o777)) + require.NoError(t, os.WriteFile(filepath.Join(repoPath, "objects", "object"), []byte("object"), 0o666)) + require.NoError(t, os.WriteFile(filepath.Join(repoPath, "FETCH_HEAD"), []byte("fetch-head"), 0o666)) + + // All the other files should be hashed though. + require.NoError(t, os.WriteFile(filepath.Join(repoPath, "HEAD"), []byte("head"), 0o666)) + require.NoError(t, os.WriteFile(filepath.Join(repoPath, "config"), []byte("cfg"), 0o666)) + require.NoError(t, os.MkdirAll(filepath.Join(repoPath, "refs", "heads"), 0o777)) + require.NoError(t, os.WriteFile(filepath.Join(repoPath, "refs", "heads", "foo"), []byte("foo"), 0o666)) + + return nil + }, + verify: func(t *testing.T, _ *gitalypb.Repository, tempRepoPath string, _ *gitalypb.Repository, realRepoPath string) { + require.NoDirExists(t, tempRepoPath) + require.DirExists(t, realRepoPath) + + // Even though a subset of data wasn't voted on, it should still be + // part of the final repository. + for expectedPath, expectedContents := range map[string]string{ + filepath.Join(realRepoPath, "objects", "object"): "object", + filepath.Join(realRepoPath, "HEAD"): "head", + filepath.Join(realRepoPath, "FETCH_HEAD"): "fetch-head", + filepath.Join(realRepoPath, "config"): "cfg", + filepath.Join(realRepoPath, "refs", "heads", "foo"): "foo", + } { + require.Equal(t, expectedContents, string(testhelper.MustReadFile(t, expectedPath))) + } + }, + }, } { t.Run(tc.desc, func(t *testing.T) { repo := &gitalypb.Repository{ |