Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2023-04-04 20:00:03 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2023-04-11 15:56:01 +0300
commitb420236e0ad94f1a77ceef9dbbfe1591c3f5d3eb (patch)
tree3fd163fa0dd40ea8bee9e5011ed3c9dac52a371a
parent4e09e98ce19f2d19654ce786c9de9fa8d2097e3b (diff)
Implement WalkUnreachableObjects
Gitaly's write-ahead log needs to include pack files with the log entries in order to log the objects the transactions introduce. Along the new objects, the pack files must also include objects that are made reachable in the transaction. This guarantees the pack files will apply later from the log even if some of the unreachable objects were pruned while the pack file was sitting in the log. This commit implements WalkUnreachableObjects to help with computing such a pack file. It takes a list of new heads in the transaction and outputs a list of objects that are reachable from these heads but not from the existing references in the repository. When this is ran on a localrepo instance that has been configured with the transaction's quarantine directory as an alternate, it yields the list of new and newly reachable objects compared to the current set of references.
-rw-r--r--internal/git/localrepo/objects.go61
-rw-r--r--internal/git/localrepo/objects_test.go83
2 files changed, 144 insertions, 0 deletions
diff --git a/internal/git/localrepo/objects.go b/internal/git/localrepo/objects.go
index 314c8fb8c..c9fb2345b 100644
--- a/internal/git/localrepo/objects.go
+++ b/internal/git/localrepo/objects.go
@@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"io"
+ "regexp"
"strings"
"time"
@@ -323,6 +324,66 @@ func (repo *Repo) IsAncestor(ctx context.Context, parent, child git.Revision) (b
return true, nil
}
+// BadObjectError is returned when attempting to walk a bad object.
+type BadObjectError struct {
+ // ObjectID is the object id of the object that was bad.
+ ObjectID git.ObjectID
+}
+
+// Error returns the error message.
+func (err BadObjectError) Error() string {
+ return fmt.Sprintf("bad object %q", err.ObjectID)
+}
+
+// ObjectReadError is returned when reading an object fails.
+type ObjectReadError struct {
+ // ObjectID is the object id of the object that git failed to read
+ ObjectID git.ObjectID
+}
+
+// Error returns the error message.
+func (err ObjectReadError) Error() string {
+ return fmt.Sprintf("failed reading object %q", err.ObjectID)
+}
+
+var (
+ regexpBadObjectError = regexp.MustCompile(`^fatal: bad object ([[:xdigit:]]*)\n$`)
+ regexpObjectReadError = regexp.MustCompile(`^error: Could not read ([[:xdigit:]]*)\n`)
+)
+
+// WalkUnreachableObjects walks the object graph starting from heads and writes to the output object IDs
+// that are included in the walk but unreachable from any of the repository's references. Heads should
+// return object IDs separated with a newline. Output is object IDs separated by newlines.
+func (repo *Repo) WalkUnreachableObjects(ctx context.Context, heads io.Reader, output io.Writer) error {
+ var stderr bytes.Buffer
+ if err := repo.ExecAndWait(ctx,
+ git.Command{
+ Name: "rev-list",
+ Flags: []git.Option{
+ git.Flag{Name: "--objects"},
+ git.Flag{Name: "--not"},
+ git.Flag{Name: "--all"},
+ git.Flag{Name: "--stdin"},
+ },
+ },
+ git.WithStdin(heads),
+ git.WithStdout(output),
+ git.WithStderr(&stderr),
+ ); err != nil {
+ if matches := regexpBadObjectError.FindSubmatch(stderr.Bytes()); len(matches) > 1 {
+ return BadObjectError{ObjectID: git.ObjectID(matches[1])}
+ }
+
+ if matches := regexpObjectReadError.FindSubmatch(stderr.Bytes()); len(matches) > 1 {
+ return ObjectReadError{ObjectID: git.ObjectID(matches[1])}
+ }
+
+ return structerr.New("rev-list: %w", err).WithMetadata("stderr", stderr.String())
+ }
+
+ return nil
+}
+
// PackObjects takes in object IDs separated by newlines. It packs the objects into a pack file and
// writes it into the output.
func (repo *Repo) PackObjects(ctx context.Context, objectIDs io.Reader, output io.Writer) error {
diff --git a/internal/git/localrepo/objects_test.go b/internal/git/localrepo/objects_test.go
index 8e7740566..d9af7525c 100644
--- a/internal/git/localrepo/objects_test.go
+++ b/internal/git/localrepo/objects_test.go
@@ -538,6 +538,89 @@ func TestRepo_IsAncestor(t *testing.T) {
}
}
+func TestWalkUnreachableObjects(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+
+ cfg, repo, repoPath := setupRepo(t)
+
+ commit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("commit-1"))
+ unreachableCommit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit1))
+ unreachableCommit2 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit1))
+ prunedCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit2))
+ brokenParent1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(prunedCommit))
+
+ // Pack brokenParent so we can unpack it into the repository as an object with broken links after
+ // pruning.
+ var packedBrokenParent bytes.Buffer
+ require.NoError(t, repo.PackObjects(ctx, strings.NewReader(brokenParent1.String()), &packedBrokenParent))
+
+ // Prune to remove the prunedCommit.
+ gittest.Exec(t, cfg, "-C", repoPath, "prune", unreachableCommit1.String(), unreachableCommit2.String())
+
+ // Unpack brokenParent now that the parent has been pruned.
+ require.NoError(t, repo.UnpackObjects(ctx, &packedBrokenParent))
+
+ gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{
+ commit1, unreachableCommit1, unreachableCommit2, brokenParent1,
+ })
+
+ for _, tc := range []struct {
+ desc string
+ heads []git.ObjectID
+ expectedOutput []string
+ expectedError error
+ }{
+ {
+ desc: "no heads",
+ },
+ {
+ desc: "reachable commit not reported",
+ heads: []git.ObjectID{commit1},
+ },
+ {
+ desc: "unreachable commits reported",
+ heads: []git.ObjectID{unreachableCommit2},
+ expectedOutput: []string{
+ unreachableCommit1.String(),
+ unreachableCommit2.String(),
+ },
+ },
+ {
+ desc: "non-existent head",
+ heads: []git.ObjectID{prunedCommit},
+ expectedError: BadObjectError{ObjectID: prunedCommit},
+ },
+ {
+ desc: "traversal fails due to missing parent commit",
+ heads: []git.ObjectID{brokenParent1},
+ expectedError: ObjectReadError{prunedCommit},
+ },
+ } {
+ tc := tc
+ t.Run(tc.desc, func(t *testing.T) {
+ t.Parallel()
+
+ var heads []string
+ for _, head := range tc.heads {
+ heads = append(heads, head.String())
+ }
+
+ var output bytes.Buffer
+ require.Equal(t,
+ tc.expectedError,
+ repo.WalkUnreachableObjects(ctx, strings.NewReader(strings.Join(heads, "\n")), &output))
+
+ var actualOutput []string
+ if output.Len() > 0 {
+ actualOutput = strings.Split(strings.TrimSpace(output.String()), "\n")
+ }
+ require.ElementsMatch(t, tc.expectedOutput, actualOutput)
+ })
+ }
+}
+
func TestPackAndUnpackObjects(t *testing.T) {
t.Parallel()