Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2023-04-12 14:40:58 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2023-04-12 14:40:58 +0300
commite029e3b56eacf23943a987207e8b490f45598630 (patch)
tree1091dd566a7cf0ab62e5838248185cfcdbe0e798
parentb8190668d147784e8be4a379b33f691363e08a0f (diff)
parentb420236e0ad94f1a77ceef9dbbfe1591c3f5d3eb (diff)
Merge branch 'smh-pack-utils' into 'master'
Implement localrepo methods for working with pack files See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5611 Merged-by: Patrick Steinhardt <psteinhardt@gitlab.com> Approved-by: Patrick Steinhardt <psteinhardt@gitlab.com> Reviewed-by: Patrick Steinhardt <psteinhardt@gitlab.com> Reviewed-by: Sami Hiltunen <shiltunen@gitlab.com> Co-authored-by: Sami Hiltunen <shiltunen@gitlab.com>
-rw-r--r--internal/git/command_description.go3
-rw-r--r--internal/git/gittest/objects.go28
-rw-r--r--internal/git/localrepo/objects.go103
-rw-r--r--internal/git/localrepo/objects_test.go190
4 files changed, 324 insertions, 0 deletions
diff --git a/internal/git/command_description.go b/internal/git/command_description.go
index 8e059f684..0d3eeae1c 100644
--- a/internal/git/command_description.go
+++ b/internal/git/command_description.go
@@ -297,6 +297,9 @@ var commandDescriptions = map[string]commandDescription{
"tag": {
flags: 0,
},
+ "unpack-objects": {
+ flags: scNoRefUpdates | scNoEndOfOptions,
+ },
"update-ref": {
flags: 0,
},
diff --git a/internal/git/gittest/objects.go b/internal/git/gittest/objects.go
index d7499868f..b96d7ffbb 100644
--- a/internal/git/gittest/objects.go
+++ b/internal/git/gittest/objects.go
@@ -20,6 +20,34 @@ func ObjectHashIsSHA256() bool {
return DefaultObjectHash.EmptyTreeOID == git.ObjectHashSHA256.EmptyTreeOID
}
+// RequireObjects asserts that the object database contains the expected objects. It filters the empty tree
+// oid of the default object hash from the actual elements. Empty tree oid should not be included in the
+// expectedObjects.
+func RequireObjects(tb testing.TB, cfg config.Cfg, repoPath string, expectedObjects []git.ObjectID) {
+ tb.Helper()
+
+ rawOutput := bytes.Split(
+ bytes.TrimSpace(
+ Exec(tb, cfg, "-C", repoPath, "cat-file", "--batch-check=%(objectname)", "--batch-all-objects"),
+ ),
+ []byte{'\n'},
+ )
+
+ actualObjects := []git.ObjectID{}
+ if len(rawOutput[0]) > 0 {
+ for _, oid := range rawOutput {
+ oid := git.ObjectID(oid)
+ if oid == DefaultObjectHash.EmptyTreeOID {
+ continue
+ }
+
+ actualObjects = append(actualObjects, oid)
+ }
+ }
+
+ require.ElementsMatch(tb, expectedObjects, actualObjects)
+}
+
// RequireObjectExists asserts that the given repository does contain an object with the specified
// object ID.
func RequireObjectExists(tb testing.TB, cfg config.Cfg, repoPath string, objectID git.ObjectID) {
diff --git a/internal/git/localrepo/objects.go b/internal/git/localrepo/objects.go
index d34f6b11e..c9fb2345b 100644
--- a/internal/git/localrepo/objects.go
+++ b/internal/git/localrepo/objects.go
@@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"io"
+ "regexp"
"strings"
"time"
@@ -14,6 +15,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile"
"gitlab.com/gitlab-org/gitaly/v15/internal/helper/text"
"gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/structerr"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
)
@@ -321,3 +323,104 @@ func (repo *Repo) IsAncestor(ctx context.Context, parent, child git.Revision) (b
return true, nil
}
+
+// BadObjectError is returned when attempting to walk a bad object.
+type BadObjectError struct {
+ // ObjectID is the object id of the object that was bad.
+ ObjectID git.ObjectID
+}
+
+// Error returns the error message.
+func (err BadObjectError) Error() string {
+ return fmt.Sprintf("bad object %q", err.ObjectID)
+}
+
+// ObjectReadError is returned when reading an object fails.
+type ObjectReadError struct {
+ // ObjectID is the object id of the object that git failed to read
+ ObjectID git.ObjectID
+}
+
+// Error returns the error message.
+func (err ObjectReadError) Error() string {
+ return fmt.Sprintf("failed reading object %q", err.ObjectID)
+}
+
+var (
+ regexpBadObjectError = regexp.MustCompile(`^fatal: bad object ([[:xdigit:]]*)\n$`)
+ regexpObjectReadError = regexp.MustCompile(`^error: Could not read ([[:xdigit:]]*)\n`)
+)
+
+// WalkUnreachableObjects walks the object graph starting from heads and writes to the output object IDs
+// that are included in the walk but unreachable from any of the repository's references. Heads should
+// return object IDs separated with a newline. Output is object IDs separated by newlines.
+func (repo *Repo) WalkUnreachableObjects(ctx context.Context, heads io.Reader, output io.Writer) error {
+ var stderr bytes.Buffer
+ if err := repo.ExecAndWait(ctx,
+ git.Command{
+ Name: "rev-list",
+ Flags: []git.Option{
+ git.Flag{Name: "--objects"},
+ git.Flag{Name: "--not"},
+ git.Flag{Name: "--all"},
+ git.Flag{Name: "--stdin"},
+ },
+ },
+ git.WithStdin(heads),
+ git.WithStdout(output),
+ git.WithStderr(&stderr),
+ ); err != nil {
+ if matches := regexpBadObjectError.FindSubmatch(stderr.Bytes()); len(matches) > 1 {
+ return BadObjectError{ObjectID: git.ObjectID(matches[1])}
+ }
+
+ if matches := regexpObjectReadError.FindSubmatch(stderr.Bytes()); len(matches) > 1 {
+ return ObjectReadError{ObjectID: git.ObjectID(matches[1])}
+ }
+
+ return structerr.New("rev-list: %w", err).WithMetadata("stderr", stderr.String())
+ }
+
+ return nil
+}
+
+// PackObjects takes in object IDs separated by newlines. It packs the objects into a pack file and
+// writes it into the output.
+func (repo *Repo) PackObjects(ctx context.Context, objectIDs io.Reader, output io.Writer) error {
+ var stderr bytes.Buffer
+ if err := repo.ExecAndWait(ctx,
+ git.Command{
+ Name: "pack-objects",
+ Flags: []git.Option{
+ git.Flag{Name: "-q"},
+ git.Flag{Name: "--stdout"},
+ },
+ },
+ git.WithStdin(objectIDs),
+ git.WithStderr(&stderr),
+ git.WithStdout(output),
+ ); err != nil {
+ return structerr.New("pack objects: %w", err).WithMetadata("stderr", stderr.String())
+ }
+
+ return nil
+}
+
+// UnpackObjects unpacks the objects from the pack file to the repository's object database.
+func (repo *Repo) UnpackObjects(ctx context.Context, packFile io.Reader) error {
+ stderr := &bytes.Buffer{}
+ if err := repo.ExecAndWait(ctx,
+ git.Command{
+ Name: "unpack-objects",
+ Flags: []git.Option{
+ git.Flag{Name: "-q"},
+ },
+ },
+ git.WithStdin(packFile),
+ git.WithStderr(stderr),
+ ); err != nil {
+ return structerr.New("unpack objects: %w", err).WithMetadata("stderr", stderr.String())
+ }
+
+ return nil
+}
diff --git a/internal/git/localrepo/objects_test.go b/internal/git/localrepo/objects_test.go
index 3727a574e..d9af7525c 100644
--- a/internal/git/localrepo/objects_test.go
+++ b/internal/git/localrepo/objects_test.go
@@ -1,6 +1,7 @@
package localrepo
import (
+ "bytes"
"context"
"fmt"
"io"
@@ -536,3 +537,192 @@ func TestRepo_IsAncestor(t *testing.T) {
})
}
}
+
+func TestWalkUnreachableObjects(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+
+ cfg, repo, repoPath := setupRepo(t)
+
+ commit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("commit-1"))
+ unreachableCommit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit1))
+ unreachableCommit2 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit1))
+ prunedCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit2))
+ brokenParent1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(prunedCommit))
+
+ // Pack brokenParent so we can unpack it into the repository as an object with broken links after
+ // pruning.
+ var packedBrokenParent bytes.Buffer
+ require.NoError(t, repo.PackObjects(ctx, strings.NewReader(brokenParent1.String()), &packedBrokenParent))
+
+ // Prune to remove the prunedCommit.
+ gittest.Exec(t, cfg, "-C", repoPath, "prune", unreachableCommit1.String(), unreachableCommit2.String())
+
+ // Unpack brokenParent now that the parent has been pruned.
+ require.NoError(t, repo.UnpackObjects(ctx, &packedBrokenParent))
+
+ gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{
+ commit1, unreachableCommit1, unreachableCommit2, brokenParent1,
+ })
+
+ for _, tc := range []struct {
+ desc string
+ heads []git.ObjectID
+ expectedOutput []string
+ expectedError error
+ }{
+ {
+ desc: "no heads",
+ },
+ {
+ desc: "reachable commit not reported",
+ heads: []git.ObjectID{commit1},
+ },
+ {
+ desc: "unreachable commits reported",
+ heads: []git.ObjectID{unreachableCommit2},
+ expectedOutput: []string{
+ unreachableCommit1.String(),
+ unreachableCommit2.String(),
+ },
+ },
+ {
+ desc: "non-existent head",
+ heads: []git.ObjectID{prunedCommit},
+ expectedError: BadObjectError{ObjectID: prunedCommit},
+ },
+ {
+ desc: "traversal fails due to missing parent commit",
+ heads: []git.ObjectID{brokenParent1},
+ expectedError: ObjectReadError{prunedCommit},
+ },
+ } {
+ tc := tc
+ t.Run(tc.desc, func(t *testing.T) {
+ t.Parallel()
+
+ var heads []string
+ for _, head := range tc.heads {
+ heads = append(heads, head.String())
+ }
+
+ var output bytes.Buffer
+ require.Equal(t,
+ tc.expectedError,
+ repo.WalkUnreachableObjects(ctx, strings.NewReader(strings.Join(heads, "\n")), &output))
+
+ var actualOutput []string
+ if output.Len() > 0 {
+ actualOutput = strings.Split(strings.TrimSpace(output.String()), "\n")
+ }
+ require.ElementsMatch(t, tc.expectedOutput, actualOutput)
+ })
+ }
+}
+
+func TestPackAndUnpackObjects(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+
+ cfg, repo, repoPath := setupRepo(t)
+
+ commit1 := gittest.WriteCommit(t, cfg, repoPath)
+ commit2 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit1))
+ commit3 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit2))
+
+ gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{commit1, commit2, commit3})
+
+ var emptyPack bytes.Buffer
+ require.NoError(t,
+ repo.PackObjects(ctx, strings.NewReader(""),
+ &emptyPack,
+ ),
+ )
+
+ var oneCommitPack bytes.Buffer
+ require.NoError(t,
+ repo.PackObjects(ctx, strings.NewReader(
+ strings.Join([]string{commit1.String()}, "\n"),
+ ),
+ &oneCommitPack,
+ ),
+ )
+
+ var twoCommitPack bytes.Buffer
+ require.NoError(t,
+ repo.PackObjects(ctx, strings.NewReader(
+ strings.Join([]string{commit1.String(), commit2.String()}, "\n"),
+ ),
+ &twoCommitPack,
+ ),
+ )
+
+ var incompletePack bytes.Buffer
+ require.NoError(t,
+ repo.PackObjects(ctx, strings.NewReader(
+ strings.Join([]string{commit1.String(), commit3.String()}, "\n"),
+ ),
+ &incompletePack,
+ ),
+ )
+
+ for _, tc := range []struct {
+ desc string
+ pack []byte
+ expectedObjects []git.ObjectID
+ expectedErrorMessage string
+ }{
+ {
+ desc: "empty pack",
+ pack: emptyPack.Bytes(),
+ },
+ {
+ desc: "one commit",
+ pack: oneCommitPack.Bytes(),
+ expectedObjects: []git.ObjectID{
+ commit1,
+ },
+ },
+ {
+ desc: "two commits",
+ pack: twoCommitPack.Bytes(),
+ expectedObjects: []git.ObjectID{
+ commit1, commit2,
+ },
+ },
+ {
+ desc: "incomplete pack",
+ pack: incompletePack.Bytes(),
+ expectedObjects: []git.ObjectID{
+ commit1, commit3,
+ },
+ },
+ {
+ desc: "no pack",
+ expectedErrorMessage: "unpack objects: exit status 128",
+ },
+ {
+ desc: "broken pack",
+ pack: []byte("invalid pack"),
+ expectedErrorMessage: "unpack objects: exit status 128",
+ },
+ } {
+ tc := tc
+ t.Run(tc.desc, func(t *testing.T) {
+ t.Parallel()
+
+ cfg, repo, repoPath := setupRepo(t)
+ gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{})
+
+ err := repo.UnpackObjects(ctx, bytes.NewReader(tc.pack))
+ if tc.expectedErrorMessage != "" {
+ require.EqualError(t, err, tc.expectedErrorMessage)
+ } else {
+ require.NoError(t, err)
+ }
+ gittest.RequireObjects(t, cfg, repoPath, tc.expectedObjects)
+ })
+ }
+}