Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2023-04-04 20:00:03 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2023-04-04 21:25:08 +0300
commit6723cab60e1b5710b0c329f5e00621e2ade645d7 (patch)
tree302784e6c347ebd51f1092907f6a8a3b5fb51f42
parentdc55c3a2ee7bc36f7dc7fd49c1afe1e138f3f5b5 (diff)
Implement WalkUnreachableObjects
Gitaly's write-ahead log needs to include pack files with the log entries in order to log the objects the transactions introduce. Along the new objects, the pack files must also include objects that are made reachable in the transaction. This guarantees the pack files will apply later from the log even if some of the unreachable objects were pruned while the pack file was sitting in the log. This commit implements WalkUnreachableObjects to help with computing such a pack file. It takes a list of new heads in the transaction and outputs a list of objects that are reachable from these heads but not from the existing references in the repository. When this is ran on a localrepo instance that has been configured with the transaction's quarantine directory as an alternate, it yields the list of new and newly reachable objects compared to the current set of references.
-rw-r--r--internal/git/localrepo/objects.go61
-rw-r--r--internal/git/localrepo/objects_test.go73
2 files changed, 134 insertions, 0 deletions
diff --git a/internal/git/localrepo/objects.go b/internal/git/localrepo/objects.go
index 3149b1d24..d0fa8ab8f 100644
--- a/internal/git/localrepo/objects.go
+++ b/internal/git/localrepo/objects.go
@@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"io"
+ "regexp"
"strings"
"time"
@@ -322,6 +323,66 @@ func (repo *Repo) IsAncestor(ctx context.Context, parent, child git.Revision) (b
return true, nil
}
+// BacObjectError is returned when attempting to walk a bad object.
+type BadObjectError struct {
+ // ObjectID is the object id of the object that was bad.
+ ObjectID git.ObjectID
+}
+
+// Error returns the error message.
+func (err BadObjectError) Error() string {
+ return fmt.Sprintf("bad object %q", err.ObjectID)
+}
+
+// ObjectReadError is returned when reading an object fails.
+type ObjectReadError struct {
+ // ObjectID is the object id of the object that git failed to read
+ ObjectID git.ObjectID
+}
+
+// Error returns the error message.
+func (err ObjectReadError) Error() string {
+ return fmt.Sprintf("failed reading object %q", err.ObjectID)
+}
+
+var (
+ regexpBadObjectError = regexp.MustCompile(`^fatal: bad object ([[:xdigit:]]*)\n$`)
+ regexpObjectReadError = regexp.MustCompile(`^error: Could not read ([[:xdigit:]]*)\n`)
+)
+
+// WalkUnreachableObjects walks the object graph starting from heads and writes to the output object IDs
+// that are included in the walk but unreachable from any of the repository's references. Heads should
+// return object IDs separated with a newline. Output is object IDs separated by newlines.
+func (repo *Repo) WalkUnreachableObjects(ctx context.Context, heads io.Reader, output io.Writer) error {
+ var stderr bytes.Buffer
+ if err := repo.ExecAndWait(ctx,
+ git.Command{
+ Name: "rev-list",
+ Flags: []git.Option{
+ git.Flag{Name: "--objects"},
+ git.Flag{Name: "--not"},
+ git.Flag{Name: "--all"},
+ git.Flag{Name: "--stdin"},
+ },
+ },
+ git.WithStdin(heads),
+ git.WithStdout(output),
+ git.WithStderr(&stderr),
+ ); err != nil {
+ if matches := regexpBadObjectError.FindSubmatch(stderr.Bytes()); len(matches) > 1 {
+ return BadObjectError{ObjectID: git.ObjectID(matches[1])}
+ }
+
+ if matches := regexpObjectReadError.FindSubmatch(stderr.Bytes()); len(matches) > 1 {
+ return ObjectReadError{ObjectID: git.ObjectID(matches[1])}
+ }
+
+ return fmt.Errorf("rev-list: %w, stderr: %q", err, stderr.String())
+ }
+
+ return nil
+}
+
// PackObjects takes in object IDs separated by newlines. It packs the objects into a pack file and
// writes it into the output.
func (repo *Repo) PackObjects(ctx context.Context, objectIDs io.Reader, output io.Writer) error {
diff --git a/internal/git/localrepo/objects_test.go b/internal/git/localrepo/objects_test.go
index 5390b7ea1..a80753f74 100644
--- a/internal/git/localrepo/objects_test.go
+++ b/internal/git/localrepo/objects_test.go
@@ -538,6 +538,79 @@ func TestRepo_IsAncestor(t *testing.T) {
}
}
+func TestWalkUnreachableObjects(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+
+ cfg, repo, repoPath := setupRepo(t)
+
+ commit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("commit-1"))
+ unreachableCommit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit1))
+ unreachableCommit2 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit1))
+ prunedCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit2))
+ brokenParent1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(prunedCommit))
+
+ // Pack brokenParent so we can unpack it into the repository as an object with broken links after
+ // pruning.
+ var packedBrokenParent bytes.Buffer
+ require.NoError(t, repo.PackObjects(ctx, strings.NewReader(brokenParent1.String()), &packedBrokenParent))
+
+ // Prune to remove the prunedCommit.
+ gittest.Exec(t, cfg, "-C", repoPath, "prune", unreachableCommit1.String(), unreachableCommit2.String())
+
+ // Unpack brokenParent now that the parent has been pruned.
+ require.NoError(t, repo.UnpackObjects(ctx, &packedBrokenParent))
+ // gittest.ExecOpts(t, cfg, gittest.ExecConfig{Stdin: &packedBrokenParent}, "-C", repoPath, "unpack-objects")
+
+ gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{
+ commit1, unreachableCommit1, unreachableCommit2, brokenParent1,
+ })
+
+ for _, tc := range []struct {
+ desc string
+ heads []git.ObjectID
+ expectedOutput string
+ expectedError error
+ }{
+ {
+ desc: "no heads",
+ },
+ {
+ desc: "reachable commit not reported",
+ heads: []git.ObjectID{commit1},
+ },
+ {
+ desc: "unreachable commits reported",
+ heads: []git.ObjectID{unreachableCommit2},
+ expectedOutput: "1afc2cf83cf04db11fc16bfdc403ca6f968cbd79\n0395680c5bb13cb1e67f018583985f42c8700250\n",
+ },
+ {
+ desc: "non-existent head",
+ heads: []git.ObjectID{prunedCommit},
+ expectedError: BadObjectError{ObjectID: prunedCommit},
+ },
+ {
+ desc: "traversal fails due to missing parent commit",
+ heads: []git.ObjectID{brokenParent1},
+ expectedError: ObjectReadError{prunedCommit},
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ var heads []string
+ for _, head := range tc.heads {
+ heads = append(heads, head.String())
+ }
+
+ var output bytes.Buffer
+ require.Equal(t,
+ tc.expectedError,
+ repo.WalkUnreachableObjects(ctx, strings.NewReader(strings.Join(heads, "\n")), &output))
+ require.Equal(t, tc.expectedOutput, output.String())
+ })
+ }
+}
+
func TestPackAndUnpackObjects(t *testing.T) {
t.Parallel()