diff options
author | Sami Hiltunen <shiltunen@gitlab.com> | 2023-04-04 20:00:03 +0300 |
---|---|---|
committer | Sami Hiltunen <shiltunen@gitlab.com> | 2023-04-04 21:25:08 +0300 |
commit | 6723cab60e1b5710b0c329f5e00621e2ade645d7 (patch) | |
tree | 302784e6c347ebd51f1092907f6a8a3b5fb51f42 | |
parent | dc55c3a2ee7bc36f7dc7fd49c1afe1e138f3f5b5 (diff) |
Implement WalkUnreachableObjects
Gitaly's write-ahead log needs to include pack files with the log
entries in order to log the objects the transactions introduce. Along
the new objects, the pack files must also include objects that are
made reachable in the transaction. This guarantees the pack files will
apply later from the log even if some of the unreachable objects were
pruned while the pack file was sitting in the log. This commit implements
WalkUnreachableObjects to help with computing such a pack file. It takes
a list of new heads in the transaction and outputs a list of objects that
are reachable from these heads but not from the existing references in the
repository. When this is ran on a localrepo instance that has been
configured with the transaction's quarantine directory as an alternate,
it yields the list of new and newly reachable objects compared to the
current set of references.
-rw-r--r-- | internal/git/localrepo/objects.go | 61 | ||||
-rw-r--r-- | internal/git/localrepo/objects_test.go | 73 |
2 files changed, 134 insertions, 0 deletions
diff --git a/internal/git/localrepo/objects.go b/internal/git/localrepo/objects.go index 3149b1d24..d0fa8ab8f 100644 --- a/internal/git/localrepo/objects.go +++ b/internal/git/localrepo/objects.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "regexp" "strings" "time" @@ -322,6 +323,66 @@ func (repo *Repo) IsAncestor(ctx context.Context, parent, child git.Revision) (b return true, nil } +// BacObjectError is returned when attempting to walk a bad object. +type BadObjectError struct { + // ObjectID is the object id of the object that was bad. + ObjectID git.ObjectID +} + +// Error returns the error message. +func (err BadObjectError) Error() string { + return fmt.Sprintf("bad object %q", err.ObjectID) +} + +// ObjectReadError is returned when reading an object fails. +type ObjectReadError struct { + // ObjectID is the object id of the object that git failed to read + ObjectID git.ObjectID +} + +// Error returns the error message. +func (err ObjectReadError) Error() string { + return fmt.Sprintf("failed reading object %q", err.ObjectID) +} + +var ( + regexpBadObjectError = regexp.MustCompile(`^fatal: bad object ([[:xdigit:]]*)\n$`) + regexpObjectReadError = regexp.MustCompile(`^error: Could not read ([[:xdigit:]]*)\n`) +) + +// WalkUnreachableObjects walks the object graph starting from heads and writes to the output object IDs +// that are included in the walk but unreachable from any of the repository's references. Heads should +// return object IDs separated with a newline. Output is object IDs separated by newlines. +func (repo *Repo) WalkUnreachableObjects(ctx context.Context, heads io.Reader, output io.Writer) error { + var stderr bytes.Buffer + if err := repo.ExecAndWait(ctx, + git.Command{ + Name: "rev-list", + Flags: []git.Option{ + git.Flag{Name: "--objects"}, + git.Flag{Name: "--not"}, + git.Flag{Name: "--all"}, + git.Flag{Name: "--stdin"}, + }, + }, + git.WithStdin(heads), + git.WithStdout(output), + git.WithStderr(&stderr), + ); err != nil { + if matches := regexpBadObjectError.FindSubmatch(stderr.Bytes()); len(matches) > 1 { + return BadObjectError{ObjectID: git.ObjectID(matches[1])} + } + + if matches := regexpObjectReadError.FindSubmatch(stderr.Bytes()); len(matches) > 1 { + return ObjectReadError{ObjectID: git.ObjectID(matches[1])} + } + + return fmt.Errorf("rev-list: %w, stderr: %q", err, stderr.String()) + } + + return nil +} + // PackObjects takes in object IDs separated by newlines. It packs the objects into a pack file and // writes it into the output. func (repo *Repo) PackObjects(ctx context.Context, objectIDs io.Reader, output io.Writer) error { diff --git a/internal/git/localrepo/objects_test.go b/internal/git/localrepo/objects_test.go index 5390b7ea1..a80753f74 100644 --- a/internal/git/localrepo/objects_test.go +++ b/internal/git/localrepo/objects_test.go @@ -538,6 +538,79 @@ func TestRepo_IsAncestor(t *testing.T) { } } +func TestWalkUnreachableObjects(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + + cfg, repo, repoPath := setupRepo(t) + + commit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("commit-1")) + unreachableCommit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit1)) + unreachableCommit2 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit1)) + prunedCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit2)) + brokenParent1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(prunedCommit)) + + // Pack brokenParent so we can unpack it into the repository as an object with broken links after + // pruning. + var packedBrokenParent bytes.Buffer + require.NoError(t, repo.PackObjects(ctx, strings.NewReader(brokenParent1.String()), &packedBrokenParent)) + + // Prune to remove the prunedCommit. + gittest.Exec(t, cfg, "-C", repoPath, "prune", unreachableCommit1.String(), unreachableCommit2.String()) + + // Unpack brokenParent now that the parent has been pruned. + require.NoError(t, repo.UnpackObjects(ctx, &packedBrokenParent)) + // gittest.ExecOpts(t, cfg, gittest.ExecConfig{Stdin: &packedBrokenParent}, "-C", repoPath, "unpack-objects") + + gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{ + commit1, unreachableCommit1, unreachableCommit2, brokenParent1, + }) + + for _, tc := range []struct { + desc string + heads []git.ObjectID + expectedOutput string + expectedError error + }{ + { + desc: "no heads", + }, + { + desc: "reachable commit not reported", + heads: []git.ObjectID{commit1}, + }, + { + desc: "unreachable commits reported", + heads: []git.ObjectID{unreachableCommit2}, + expectedOutput: "1afc2cf83cf04db11fc16bfdc403ca6f968cbd79\n0395680c5bb13cb1e67f018583985f42c8700250\n", + }, + { + desc: "non-existent head", + heads: []git.ObjectID{prunedCommit}, + expectedError: BadObjectError{ObjectID: prunedCommit}, + }, + { + desc: "traversal fails due to missing parent commit", + heads: []git.ObjectID{brokenParent1}, + expectedError: ObjectReadError{prunedCommit}, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + var heads []string + for _, head := range tc.heads { + heads = append(heads, head.String()) + } + + var output bytes.Buffer + require.Equal(t, + tc.expectedError, + repo.WalkUnreachableObjects(ctx, strings.NewReader(strings.Join(heads, "\n")), &output)) + require.Equal(t, tc.expectedOutput, output.String()) + }) + } +} + func TestPackAndUnpackObjects(t *testing.T) { t.Parallel() |