Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2024-01-12 19:15:13 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2024-01-15 13:53:35 +0300
commitda43db2215deee24f74eb19c7a81749aa5b7663f (patch)
treee8551aadcc1d3b4f59a13dfea5ebcc417990a087
parent9be461c8617c48d3a86069250aba5dca0bc6971f (diff)
Add WalkObjects to walk the object graph
The TransactionManager needs to walk objects in the quarantine to identify which objects to commit with the transaction in a packfile and which objects that already exist in the repository the transaction depends on. This commit adds WalkObjects to facilitate that. It walks the object graph from given start points and prints out all objects IDs that it encounters. If applicable, the objects path is also printed out to aid packing them later. Objects that are encountered but do not exist are printed out with a '?' to mark them missing. When the WalkObjects is invoked with just the quarantine directory without the alternates, this leads to the object walk identifying the objects from the quarantine that we need to pack with the missing objects being the transaction's dependencies that we must exist in the repository already. 'git rev-list --missing=print' has an issue where it errors out if a start point of a walk does not exist. This is a problem as the heads we feed as the starting point of the walk may be objects that already exist in the repository. In that case, they should be treated as the transaction's dependencies and simply be printed out as missing objects. There is a workaround in place to do so by checking whether the object exists before feeding it to the walk. If it doesn't, we simply print it out as missing after the walk. This allows us to plug in the functionality in TransactionManager and later just remove the workaround from here once the issue has been fixed in Git.
-rw-r--r--internal/git/localrepo/objects.go66
-rw-r--r--internal/git/localrepo/objects_test.go208
2 files changed, 274 insertions, 0 deletions
diff --git a/internal/git/localrepo/objects.go b/internal/git/localrepo/objects.go
index 4239f5218..3ac8f3ae5 100644
--- a/internal/git/localrepo/objects.go
+++ b/internal/git/localrepo/objects.go
@@ -1,6 +1,7 @@
package localrepo
import (
+ "bufio"
"bytes"
"context"
"errors"
@@ -148,6 +149,71 @@ func (repo *Repo) WalkUnreachableObjects(ctx context.Context, heads io.Reader, o
return nil
}
+// WalkObjects walks the object graph starting from heads and writes to the output the objects it
+// sees. Objects that are encountered during the walk and exist in the object database are written
+// out as `<oid> <path>\n` with the `<path>` being sometimes present if applicable. Objects that
+// were encountered during the walk but do not exist in the object database are written out
+// as `?<oid>\n`.
+func (repo *Repo) WalkObjects(ctx context.Context, heads io.Reader, output io.Writer) error {
+ // TransactionManager feeds here the reference tips updated in transaction. Some of the
+ // objects may exist in the repository and some may be new. `rev-list ---missing` currently
+ // fails if a starting point of the walk does not exist. This is a problem for us as this means
+ // the walk fails when updating a reference to an existing object in the repository.
+ //
+ // Below is a workaround for this problem until the actual issue in rev-list is fixed.
+ // Before we pass the heads to rev-list, we check whether they exist in the repo. If they
+ // do, we pass them to rev-list for the walk. If they do not, they are marked as missing
+ // object and appended to the output after the rev-list call has finished.
+ //
+ // Issue: https://gitlab.com/gitlab-org/git/-/issues/234
+ var existingHeads []byte
+ var missingHeads []byte
+
+ scanner := bufio.NewScanner(heads)
+ for scanner.Scan() {
+ objectID := scanner.Text()
+
+ if _, err := repo.ReadObjectInfo(ctx, git.Revision(objectID)); err != nil {
+ var invalidObjectError InvalidObjectError
+ if errors.As(err, &invalidObjectError) {
+ missingHeads = append(missingHeads, []byte("?"+objectID+"\n")...)
+ continue
+ }
+
+ return fmt.Errorf("read object info: %w", err)
+ }
+
+ existingHeads = append(existingHeads, []byte(objectID+"\n")...)
+ }
+
+ if err := scanner.Err(); err != nil {
+ return fmt.Errorf("scanner: %w", err)
+ }
+
+ var stderr bytes.Buffer
+ if err := repo.ExecAndWait(ctx,
+ git.Command{
+ Name: "rev-list",
+ Flags: []git.Option{
+ git.Flag{Name: "--objects"},
+ git.Flag{Name: "--missing=print"},
+ git.Flag{Name: "--stdin"},
+ },
+ },
+ git.WithStdin(bytes.NewReader(existingHeads)),
+ git.WithStdout(output),
+ git.WithStderr(&stderr),
+ ); err != nil {
+ return structerr.New("rev-list: %w", err).WithMetadata("stderr", stderr.String())
+ }
+
+ if _, err := output.Write(missingHeads); err != nil {
+ return fmt.Errorf("write missing heads: %w", err)
+ }
+
+ return nil
+}
+
// PackObjects takes in object IDs separated by newlines. It packs the objects into a pack file and
// writes it into the output.
func (repo *Repo) PackObjects(ctx context.Context, objectIDs io.Reader, output io.Writer) error {
diff --git a/internal/git/localrepo/objects_test.go b/internal/git/localrepo/objects_test.go
index eb59b0926..58049bfca 100644
--- a/internal/git/localrepo/objects_test.go
+++ b/internal/git/localrepo/objects_test.go
@@ -2,6 +2,8 @@ package localrepo
import (
"bytes"
+ "os"
+ "path/filepath"
"strings"
"testing"
@@ -211,6 +213,212 @@ func TestWalkUnreachableObjects(t *testing.T) {
}
}
+func TestWalkObjects(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+
+ cfg, repo, repoPath := setupRepo(t)
+
+ // This blob is not expected to be encountered during walk as the tree referencing it is missing.
+ unexpectedBlob := gittest.WriteBlob(t, cfg, repoPath, []byte("we should not see this blob during the walk"))
+ // Create a commit which has a tree missing from the object database. We expect the commit to be reported with the tree reported as missing.
+ missingTree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{{
+ OID: unexpectedBlob,
+ Mode: "100644",
+ Path: "this tree will be deleted so this path should not be output",
+ }})
+ rootCommit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTree(missingTree))
+
+ // Create a commit with a blob missing from the tree. We expect the commit and tree be reported with the blob reported as missing.
+ missingBlob := gittest.WriteBlob(t, cfg, repoPath, []byte("pruned blob"))
+ rootCommit2Tree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{{
+ OID: missingBlob,
+ Mode: "100644",
+ Path: "missing_blob",
+ }})
+ rootCommit2 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTree(rootCommit2Tree))
+
+ // This commit is deleted and is expected to be reported as missing when it's encountered during the walk.
+ missingRootCommit := gittest.WriteCommit(t, cfg, repoPath)
+
+ // Create a commit with two parents. We expect both parents to be included.
+ mergeCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTree(gittest.DefaultObjectHash.EmptyTreeOID), gittest.WithParents(rootCommit1, rootCommit2, missingRootCommit))
+
+ // This commit is missing its only parent. Walking this we expect the commit to be reported but the parent reported as missing.
+ commitMissingParent := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTree(gittest.DefaultObjectHash.EmptyTreeOID), gittest.WithParents(missingRootCommit))
+
+ // This tag is referencing a missing commit. We expect the tag to be reported and the commit reported as missing.
+ tagMissingParent := gittest.WriteTag(t, cfg, repoPath, "refs/tags/tag-missing-parent", missingRootCommit.Revision(), gittest.WriteTagConfig{
+ Message: "tag missing parent",
+ })
+
+ // Create a hierarchy with some files and subdirectories to see they are reported expectedly.
+ blob1 := gittest.WriteBlob(t, cfg, repoPath, []byte("blob 1"))
+ blob2 := gittest.WriteBlob(t, cfg, repoPath, []byte("blob 2"))
+ blob3 := gittest.WriteBlob(t, cfg, repoPath, []byte("blob 3"))
+ leafCommitSubtree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {
+ OID: blob2,
+ Mode: "100644",
+ Path: "blob2",
+ },
+ {
+ OID: blob3,
+ Mode: "100644",
+ Path: "blob3",
+ },
+ })
+ leafCommitTree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {
+ OID: leafCommitSubtree,
+ Mode: "040000",
+ Path: "subtree",
+ },
+ {
+ OID: blob1,
+ Mode: "100644",
+ Path: "blob1",
+ },
+ })
+
+ // Create two commits that diverge from the same parent. When walking one, we don't expect to get the other one reported.
+ leafCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTree(leafCommitTree), gittest.WithParents(mergeCommit))
+ divergedCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTree(gittest.DefaultObjectHash.EmptyTreeOID), gittest.WithParents(mergeCommit))
+ divergedTag := gittest.WriteTag(t, cfg, repoPath, "refs/tags/diverged-tag", divergedCommit.Revision(), gittest.WriteTagConfig{
+ Message: "diverged tag",
+ })
+
+ // Remove the objects that we use for testing missing objects.
+ for _, removedOID := range []string{
+ missingBlob.String(),
+ missingTree.String(),
+ missingRootCommit.String(),
+ } {
+ require.NoError(t, os.Remove(filepath.Join(repoPath, "objects", removedOID[:2], removedOID[2:])))
+ }
+
+ for _, tc := range []struct {
+ desc string
+ heads []git.ObjectID
+ expectedOutput []string
+ expectedErrorMessage string
+ }{
+ {
+ desc: "no heads",
+ },
+ {
+ desc: "missing start points",
+ heads: []git.ObjectID{
+ missingBlob,
+ missingRootCommit,
+ },
+ expectedOutput: []string{
+ "?" + missingBlob.String(),
+ "?" + missingRootCommit.String(),
+ },
+ },
+ {
+ desc: "commit missing parent commit",
+ heads: []git.ObjectID{
+ commitMissingParent,
+ },
+ expectedOutput: []string{
+ commitMissingParent.String(),
+ gittest.DefaultObjectHash.EmptyTreeOID.String() + " ",
+ "?" + missingRootCommit.String(),
+ },
+ },
+ {
+ desc: "annotated tag missing referenced commit",
+ heads: []git.ObjectID{
+ tagMissingParent,
+ },
+ // This is the actual expected output. Due to an issue in Git, rev-list fails when it encounters
+ // the missing commit even if `--missing=print` is used.
+ //
+ // Issue: https://gitlab.com/gitlab-org/git/-/issues/239
+ //
+ /*expectedOutput: []string{
+ tagMissingParent.String(),
+ "?"+ missingRootCommit.String(),
+ },*/
+ expectedErrorMessage: "rev-list: exit status 128",
+ },
+ {
+ desc: "diverged tag and commit are not reported",
+ heads: []git.ObjectID{
+ leafCommit,
+ },
+ expectedOutput: []string{
+ leafCommit.String(),
+ leafCommitTree.String() + " ",
+ blob1.String() + " blob1",
+ leafCommitSubtree.String() + " subtree",
+ blob2.String() + " subtree/blob2",
+ blob3.String() + " subtree/blob3",
+ mergeCommit.String(),
+ gittest.DefaultObjectHash.EmptyTreeOID.String() + " ",
+ "?" + missingRootCommit.String(),
+ rootCommit2.String(),
+ rootCommit2Tree.String() + " ",
+ "?" + missingBlob.String(),
+ rootCommit1.String(),
+ "?" + missingTree.String(),
+ },
+ },
+ {
+ desc: "walk the leaf commit and diverged tag",
+ heads: []git.ObjectID{
+ leafCommit,
+ divergedTag,
+ },
+ expectedOutput: []string{
+ divergedTag.String() + " refs/tags/diverged-tag",
+ divergedCommit.String(),
+ leafCommit.String(),
+ leafCommitTree.String() + " ",
+ blob1.String() + " blob1",
+ leafCommitSubtree.String() + " subtree",
+ blob2.String() + " subtree/blob2",
+ blob3.String() + " subtree/blob3",
+ mergeCommit.String(),
+ gittest.DefaultObjectHash.EmptyTreeOID.String() + " ",
+ "?" + missingRootCommit.String(),
+ rootCommit2.String(),
+ rootCommit2Tree.String() + " ",
+ "?" + missingBlob.String(),
+ rootCommit1.String(),
+ "?" + missingTree.String(),
+ },
+ },
+ } {
+ tc := tc
+ t.Run(tc.desc, func(t *testing.T) {
+ t.Parallel()
+
+ var heads []string
+ for _, head := range tc.heads {
+ heads = append(heads, head.String())
+ }
+
+ var output bytes.Buffer
+ err := repo.WalkObjects(ctx, strings.NewReader(strings.Join(heads, "\n")), &output)
+ if tc.expectedErrorMessage != "" {
+ require.Equal(t, tc.expectedErrorMessage, err.Error())
+ return
+ }
+ require.NoError(t, err)
+
+ var actualOutput []string
+ if output.Len() > 0 {
+ actualOutput = strings.Split(strings.TrimSpace(output.String()), "\n")
+ }
+ require.ElementsMatch(t, tc.expectedOutput, actualOutput)
+ })
+ }
+}
+
func TestPackAndUnpackObjects(t *testing.T) {
t.Parallel()