diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-04-12 14:40:58 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-04-12 14:40:58 +0300 |
commit | e029e3b56eacf23943a987207e8b490f45598630 (patch) | |
tree | 1091dd566a7cf0ab62e5838248185cfcdbe0e798 | |
parent | b8190668d147784e8be4a379b33f691363e08a0f (diff) | |
parent | b420236e0ad94f1a77ceef9dbbfe1591c3f5d3eb (diff) |
Merge branch 'smh-pack-utils' into 'master'
Implement localrepo methods for working with pack files
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5611
Merged-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Approved-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Reviewed-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Reviewed-by: Sami Hiltunen <shiltunen@gitlab.com>
Co-authored-by: Sami Hiltunen <shiltunen@gitlab.com>
-rw-r--r-- | internal/git/command_description.go | 3 | ||||
-rw-r--r-- | internal/git/gittest/objects.go | 28 | ||||
-rw-r--r-- | internal/git/localrepo/objects.go | 103 | ||||
-rw-r--r-- | internal/git/localrepo/objects_test.go | 190 |
4 files changed, 324 insertions, 0 deletions
diff --git a/internal/git/command_description.go b/internal/git/command_description.go index 8e059f684..0d3eeae1c 100644 --- a/internal/git/command_description.go +++ b/internal/git/command_description.go @@ -297,6 +297,9 @@ var commandDescriptions = map[string]commandDescription{ "tag": { flags: 0, }, + "unpack-objects": { + flags: scNoRefUpdates | scNoEndOfOptions, + }, "update-ref": { flags: 0, }, diff --git a/internal/git/gittest/objects.go b/internal/git/gittest/objects.go index d7499868f..b96d7ffbb 100644 --- a/internal/git/gittest/objects.go +++ b/internal/git/gittest/objects.go @@ -20,6 +20,34 @@ func ObjectHashIsSHA256() bool { return DefaultObjectHash.EmptyTreeOID == git.ObjectHashSHA256.EmptyTreeOID } +// RequireObjects asserts that the object database contains the expected objects. It filters the empty tree +// oid of the default object hash from the actual elements. Empty tree oid should not be included in the +// expectedObjects. +func RequireObjects(tb testing.TB, cfg config.Cfg, repoPath string, expectedObjects []git.ObjectID) { + tb.Helper() + + rawOutput := bytes.Split( + bytes.TrimSpace( + Exec(tb, cfg, "-C", repoPath, "cat-file", "--batch-check=%(objectname)", "--batch-all-objects"), + ), + []byte{'\n'}, + ) + + actualObjects := []git.ObjectID{} + if len(rawOutput[0]) > 0 { + for _, oid := range rawOutput { + oid := git.ObjectID(oid) + if oid == DefaultObjectHash.EmptyTreeOID { + continue + } + + actualObjects = append(actualObjects, oid) + } + } + + require.ElementsMatch(tb, expectedObjects, actualObjects) +} + // RequireObjectExists asserts that the given repository does contain an object with the specified // object ID. func RequireObjectExists(tb testing.TB, cfg config.Cfg, repoPath string, objectID git.ObjectID) { diff --git a/internal/git/localrepo/objects.go b/internal/git/localrepo/objects.go index d34f6b11e..c9fb2345b 100644 --- a/internal/git/localrepo/objects.go +++ b/internal/git/localrepo/objects.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "regexp" "strings" "time" @@ -14,6 +15,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v15/internal/helper/text" "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" + "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" ) @@ -321,3 +323,104 @@ func (repo *Repo) IsAncestor(ctx context.Context, parent, child git.Revision) (b return true, nil } + +// BadObjectError is returned when attempting to walk a bad object. +type BadObjectError struct { + // ObjectID is the object id of the object that was bad. + ObjectID git.ObjectID +} + +// Error returns the error message. +func (err BadObjectError) Error() string { + return fmt.Sprintf("bad object %q", err.ObjectID) +} + +// ObjectReadError is returned when reading an object fails. +type ObjectReadError struct { + // ObjectID is the object id of the object that git failed to read + ObjectID git.ObjectID +} + +// Error returns the error message. +func (err ObjectReadError) Error() string { + return fmt.Sprintf("failed reading object %q", err.ObjectID) +} + +var ( + regexpBadObjectError = regexp.MustCompile(`^fatal: bad object ([[:xdigit:]]*)\n$`) + regexpObjectReadError = regexp.MustCompile(`^error: Could not read ([[:xdigit:]]*)\n`) +) + +// WalkUnreachableObjects walks the object graph starting from heads and writes to the output object IDs +// that are included in the walk but unreachable from any of the repository's references. Heads should +// return object IDs separated with a newline. Output is object IDs separated by newlines. +func (repo *Repo) WalkUnreachableObjects(ctx context.Context, heads io.Reader, output io.Writer) error { + var stderr bytes.Buffer + if err := repo.ExecAndWait(ctx, + git.Command{ + Name: "rev-list", + Flags: []git.Option{ + git.Flag{Name: "--objects"}, + git.Flag{Name: "--not"}, + git.Flag{Name: "--all"}, + git.Flag{Name: "--stdin"}, + }, + }, + git.WithStdin(heads), + git.WithStdout(output), + git.WithStderr(&stderr), + ); err != nil { + if matches := regexpBadObjectError.FindSubmatch(stderr.Bytes()); len(matches) > 1 { + return BadObjectError{ObjectID: git.ObjectID(matches[1])} + } + + if matches := regexpObjectReadError.FindSubmatch(stderr.Bytes()); len(matches) > 1 { + return ObjectReadError{ObjectID: git.ObjectID(matches[1])} + } + + return structerr.New("rev-list: %w", err).WithMetadata("stderr", stderr.String()) + } + + return nil +} + +// PackObjects takes in object IDs separated by newlines. It packs the objects into a pack file and +// writes it into the output. +func (repo *Repo) PackObjects(ctx context.Context, objectIDs io.Reader, output io.Writer) error { + var stderr bytes.Buffer + if err := repo.ExecAndWait(ctx, + git.Command{ + Name: "pack-objects", + Flags: []git.Option{ + git.Flag{Name: "-q"}, + git.Flag{Name: "--stdout"}, + }, + }, + git.WithStdin(objectIDs), + git.WithStderr(&stderr), + git.WithStdout(output), + ); err != nil { + return structerr.New("pack objects: %w", err).WithMetadata("stderr", stderr.String()) + } + + return nil +} + +// UnpackObjects unpacks the objects from the pack file to the repository's object database. +func (repo *Repo) UnpackObjects(ctx context.Context, packFile io.Reader) error { + stderr := &bytes.Buffer{} + if err := repo.ExecAndWait(ctx, + git.Command{ + Name: "unpack-objects", + Flags: []git.Option{ + git.Flag{Name: "-q"}, + }, + }, + git.WithStdin(packFile), + git.WithStderr(stderr), + ); err != nil { + return structerr.New("unpack objects: %w", err).WithMetadata("stderr", stderr.String()) + } + + return nil +} diff --git a/internal/git/localrepo/objects_test.go b/internal/git/localrepo/objects_test.go index 3727a574e..d9af7525c 100644 --- a/internal/git/localrepo/objects_test.go +++ b/internal/git/localrepo/objects_test.go @@ -1,6 +1,7 @@ package localrepo import ( + "bytes" "context" "fmt" "io" @@ -536,3 +537,192 @@ func TestRepo_IsAncestor(t *testing.T) { }) } } + +func TestWalkUnreachableObjects(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + + cfg, repo, repoPath := setupRepo(t) + + commit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("commit-1")) + unreachableCommit1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit1)) + unreachableCommit2 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit1)) + prunedCommit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(unreachableCommit2)) + brokenParent1 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(prunedCommit)) + + // Pack brokenParent so we can unpack it into the repository as an object with broken links after + // pruning. + var packedBrokenParent bytes.Buffer + require.NoError(t, repo.PackObjects(ctx, strings.NewReader(brokenParent1.String()), &packedBrokenParent)) + + // Prune to remove the prunedCommit. + gittest.Exec(t, cfg, "-C", repoPath, "prune", unreachableCommit1.String(), unreachableCommit2.String()) + + // Unpack brokenParent now that the parent has been pruned. + require.NoError(t, repo.UnpackObjects(ctx, &packedBrokenParent)) + + gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{ + commit1, unreachableCommit1, unreachableCommit2, brokenParent1, + }) + + for _, tc := range []struct { + desc string + heads []git.ObjectID + expectedOutput []string + expectedError error + }{ + { + desc: "no heads", + }, + { + desc: "reachable commit not reported", + heads: []git.ObjectID{commit1}, + }, + { + desc: "unreachable commits reported", + heads: []git.ObjectID{unreachableCommit2}, + expectedOutput: []string{ + unreachableCommit1.String(), + unreachableCommit2.String(), + }, + }, + { + desc: "non-existent head", + heads: []git.ObjectID{prunedCommit}, + expectedError: BadObjectError{ObjectID: prunedCommit}, + }, + { + desc: "traversal fails due to missing parent commit", + heads: []git.ObjectID{brokenParent1}, + expectedError: ObjectReadError{prunedCommit}, + }, + } { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + + var heads []string + for _, head := range tc.heads { + heads = append(heads, head.String()) + } + + var output bytes.Buffer + require.Equal(t, + tc.expectedError, + repo.WalkUnreachableObjects(ctx, strings.NewReader(strings.Join(heads, "\n")), &output)) + + var actualOutput []string + if output.Len() > 0 { + actualOutput = strings.Split(strings.TrimSpace(output.String()), "\n") + } + require.ElementsMatch(t, tc.expectedOutput, actualOutput) + }) + } +} + +func TestPackAndUnpackObjects(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + + cfg, repo, repoPath := setupRepo(t) + + commit1 := gittest.WriteCommit(t, cfg, repoPath) + commit2 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit1)) + commit3 := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(commit2)) + + gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{commit1, commit2, commit3}) + + var emptyPack bytes.Buffer + require.NoError(t, + repo.PackObjects(ctx, strings.NewReader(""), + &emptyPack, + ), + ) + + var oneCommitPack bytes.Buffer + require.NoError(t, + repo.PackObjects(ctx, strings.NewReader( + strings.Join([]string{commit1.String()}, "\n"), + ), + &oneCommitPack, + ), + ) + + var twoCommitPack bytes.Buffer + require.NoError(t, + repo.PackObjects(ctx, strings.NewReader( + strings.Join([]string{commit1.String(), commit2.String()}, "\n"), + ), + &twoCommitPack, + ), + ) + + var incompletePack bytes.Buffer + require.NoError(t, + repo.PackObjects(ctx, strings.NewReader( + strings.Join([]string{commit1.String(), commit3.String()}, "\n"), + ), + &incompletePack, + ), + ) + + for _, tc := range []struct { + desc string + pack []byte + expectedObjects []git.ObjectID + expectedErrorMessage string + }{ + { + desc: "empty pack", + pack: emptyPack.Bytes(), + }, + { + desc: "one commit", + pack: oneCommitPack.Bytes(), + expectedObjects: []git.ObjectID{ + commit1, + }, + }, + { + desc: "two commits", + pack: twoCommitPack.Bytes(), + expectedObjects: []git.ObjectID{ + commit1, commit2, + }, + }, + { + desc: "incomplete pack", + pack: incompletePack.Bytes(), + expectedObjects: []git.ObjectID{ + commit1, commit3, + }, + }, + { + desc: "no pack", + expectedErrorMessage: "unpack objects: exit status 128", + }, + { + desc: "broken pack", + pack: []byte("invalid pack"), + expectedErrorMessage: "unpack objects: exit status 128", + }, + } { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + + cfg, repo, repoPath := setupRepo(t) + gittest.RequireObjects(t, cfg, repoPath, []git.ObjectID{}) + + err := repo.UnpackObjects(ctx, bytes.NewReader(tc.pack)) + if tc.expectedErrorMessage != "" { + require.EqualError(t, err, tc.expectedErrorMessage) + } else { + require.NoError(t, err) + } + gittest.RequireObjects(t, cfg, repoPath, tc.expectedObjects) + }) + } +} |