diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-05-17 09:29:29 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-05-17 09:29:29 +0300 |
commit | 8ef258a6d33ea36c143d8912c8c82c6c112ed2eb (patch) | |
tree | fab45e1f4f7ca1e20a6231f1cdcce1b056ce6b4e | |
parent | d3ec19edc16bc3e02216e3f985c35fb95c1df93e (diff) | |
parent | fc2c5c92d654f55eac2888e5ef853b8fc6349a9c (diff) |
Merge branch 'jc/use-git-hash-object' into 'master'
localrepo: Use git-hash-object instead of git-mktree for writing trees
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5797
Merged-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Approved-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Co-authored-by: John Cai <jcai@gitlab.com>
-rw-r--r-- | internal/git/localrepo/commit_test.go | 6 | ||||
-rw-r--r-- | internal/git/localrepo/tree.go | 85 | ||||
-rw-r--r-- | internal/git/localrepo/tree_test.go | 205 | ||||
-rw-r--r-- | internal/git/version.go | 8 | ||||
-rw-r--r-- | internal/gitaly/service/operations/submodules.go | 6 |
5 files changed, 260 insertions, 50 deletions
diff --git a/internal/git/localrepo/commit_test.go b/internal/git/localrepo/commit_test.go index a95e41273..9238ae7a8 100644 --- a/internal/git/localrepo/commit_test.go +++ b/internal/git/localrepo/commit_test.go @@ -32,10 +32,10 @@ func TestWriteCommit(t *testing.T) { require.NoError(t, err) treeEntryA := TreeEntry{Path: "file", Mode: "100644", OID: blobID} - treeA, err := repo.WriteTree(ctx, []TreeEntry{treeEntryA}) + treeA, err := repo.WriteTree(ctx, []*TreeEntry{&treeEntryA}) require.NoError(t, err) - treeB, err := repo.WriteTree(ctx, []TreeEntry{ + treeB, err := repo.WriteTree(ctx, []*TreeEntry{ {Path: "file", Mode: "100644", OID: changedBlobID}, }) require.NoError(t, err) @@ -203,7 +203,7 @@ func TestWriteCommit_validation(t *testing.T) { blobID, err := repo.WriteBlob(ctx, "", strings.NewReader("foo")) require.NoError(t, err) - treeID, err := repo.WriteTree(ctx, []TreeEntry{ + treeID, err := repo.WriteTree(ctx, []*TreeEntry{ { OID: blobID, Mode: "100644", diff --git a/internal/git/localrepo/tree.go b/internal/git/localrepo/tree.go index 3582cadfa..e32014afe 100644 --- a/internal/git/localrepo/tree.go +++ b/internal/git/localrepo/tree.go @@ -4,9 +4,12 @@ import ( "bytes" "context" "fmt" + "sort" + "strings" "gitlab.com/gitlab-org/gitaly/v16/internal/git" "gitlab.com/gitlab-org/gitaly/v16/internal/helper/text" + "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" ) // ObjectType is an Enum for the type of object of @@ -37,6 +40,33 @@ func (e Entries) Less(i, j int) bool { return e[i].Type < e[j].Type } +// TreeEntriesByPath allows a slice of *TreeEntry to be sorted by Path +type TreeEntriesByPath []*TreeEntry + +func (b TreeEntriesByPath) Len() int { + return len(b) +} + +func (b TreeEntriesByPath) Swap(i, j int) { + b[i], b[j] = b[j], b[i] +} + +func (b TreeEntriesByPath) Less(i, j int) bool { + iPath, jPath := b[i].Path, b[j].Path + + // git has an edge case for subtrees where they are always appended with + // a '/'. See https://github.com/git/git/blob/v2.40.0/read-cache.c#L491 + if b[i].Type == Tree { + iPath += "/" + } + + if b[j].Type == Tree { + jPath += "/" + } + + return iPath < jPath +} + // ToEnum translates a string representation of the object type into an // ObjectType enum. func ToEnum(s string) ObjectType { @@ -52,19 +82,6 @@ func ToEnum(s string) ObjectType { } } -func fromEnum(t ObjectType) string { - switch t { - case Tree: - return "tree" - case Blob: - return "blob" - case Submodule: - return "commit" - default: - return "unknown" - } -} - // TreeEntry represents an entry of a git tree object. type TreeEntry struct { // OID is the object ID the tree entry refers to. @@ -84,50 +101,46 @@ func (t *TreeEntry) IsBlob() bool { // WriteTree writes a new tree object to the given path. This function does not verify whether OIDs // referred to by tree entries actually exist in the repository. -func (repo *Repo) WriteTree(ctx context.Context, entries []TreeEntry) (git.ObjectID, error) { +func (repo *Repo) WriteTree(ctx context.Context, entries []*TreeEntry) (git.ObjectID, error) { var tree bytes.Buffer + + sort.Stable(TreeEntriesByPath(entries)) + for _, entry := range entries { - entryType := entry.Type - - if entryType == Unknown { - switch entry.Mode { - case "100644": - fallthrough - case "100755": - fallthrough - case "120000": - entryType = Blob - case "040000": - entryType = Tree - case "160000": - entryType = Submodule - } - } + mode := strings.TrimPrefix(entry.Mode, "0") + formattedEntry := fmt.Sprintf("%s %s\000", mode, entry.Path) - oid := entry.OID + oidBytes, err := entry.OID.Bytes() + if err != nil { + return "", err + } - formattedEntry := fmt.Sprintf("%s %s %s\t%s\000", entry.Mode, fromEnum(entryType), oid.String(), entry.Path) if _, err := tree.WriteString(formattedEntry); err != nil { return "", err } + + if _, err := tree.Write(oidBytes); err != nil { + return "", err + } } options := []git.Option{ - git.Flag{Name: "-z"}, - git.Flag{Name: "--missing"}, + git.ValueFlag{Name: "-t", Value: "tree"}, + git.Flag{Name: "-w"}, + git.Flag{Name: "--stdin"}, } var stdout, stderr bytes.Buffer if err := repo.ExecAndWait(ctx, git.Command{ - Name: "mktree", + Name: "hash-object", Flags: options, }, git.WithStdout(&stdout), git.WithStderr(&stderr), git.WithStdin(&tree), ); err != nil { - return "", err + return "", structerr.New("%w", err).WithMetadata("stderr", stderr.String()) } objectHash, err := repo.ObjectHash(ctx) diff --git a/internal/git/localrepo/tree_test.go b/internal/git/localrepo/tree_test.go index c2bb1f897..92dd71575 100644 --- a/internal/git/localrepo/tree_test.go +++ b/internal/git/localrepo/tree_test.go @@ -4,11 +4,14 @@ import ( "bytes" "os" "path/filepath" + "sort" + "strings" "testing" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v16/internal/helper/text" + "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper/testcfg" ) @@ -17,6 +20,9 @@ func TestWriteTree(t *testing.T) { cfg := testcfg.Build(t) ctx := testhelper.Context(t) + gitVersion, err := gittest.NewCommandFactory(t, cfg).GitVersion(ctx) + require.NoError(t, err) + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ SkipCreationViaService: true, }) @@ -28,7 +34,7 @@ func TestWriteTree(t *testing.T) { blobID, err := repo.WriteBlob(ctx, "file", bytes.NewBufferString("foobar\n")) require.NoError(t, err) - treeID, err := repo.WriteTree(ctx, []TreeEntry{ + treeID, err := repo.WriteTree(ctx, []*TreeEntry{ { OID: blobID, Mode: "100644", @@ -44,13 +50,14 @@ func TestWriteTree(t *testing.T) { require.NoError(t, os.Remove(nonExistentBlobPath)) for _, tc := range []struct { - desc string - entries []TreeEntry - expectedEntries []TreeEntry + desc string + entries []*TreeEntry + expectedEntries []TreeEntry + expectedErrString string }{ { desc: "entry with blob OID", - entries: []TreeEntry{ + entries: []*TreeEntry{ { OID: blobID, Mode: "100644", @@ -67,7 +74,7 @@ func TestWriteTree(t *testing.T) { }, { desc: "entry with tree OID", - entries: []TreeEntry{ + entries: []*TreeEntry{ { OID: treeID, Mode: "040000", @@ -84,7 +91,7 @@ func TestWriteTree(t *testing.T) { }, { desc: "mixed tree and blob entries", - entries: []TreeEntry{ + entries: []*TreeEntry{ { OID: treeID, Mode: "040000", @@ -121,7 +128,7 @@ func TestWriteTree(t *testing.T) { }, { desc: "entry with nonexistent object", - entries: []TreeEntry{ + entries: []*TreeEntry{ { OID: nonExistentBlobID, Mode: "100644", @@ -136,12 +143,63 @@ func TestWriteTree(t *testing.T) { }, }, }, + { + desc: "entry with duplicate file", + entries: []*TreeEntry{ + { + OID: blobID, + Mode: "100644", + Path: "file", + }, + { + OID: nonExistentBlobID, + Mode: "100644", + Path: "file", + }, + }, + expectedErrString: "duplicateEntries: contains duplicate file entries", + }, + { + desc: "entry with malformed mode", + entries: []*TreeEntry{ + { + OID: blobID, + Mode: "1006442", + Path: "file", + }, + }, + expectedErrString: "badFilemode: contains bad file modes", + }, + { + desc: "tries to write .git file", + entries: []*TreeEntry{ + { + OID: blobID, + Mode: "040000", + Path: ".git", + }, + }, + expectedErrString: "hasDotgit: contains '.git'", + }, } { tc := tc t.Run(tc.desc, func(t *testing.T) { t.Parallel() oid, err := repo.WriteTree(ctx, tc.entries) + if tc.expectedErrString != "" { + if gitVersion.HashObjectFsck() { + switch e := err.(type) { + case structerr.Error: + stderr := e.Metadata()["stderr"].(string) + strings.Contains(stderr, tc.expectedErrString) + default: + strings.Contains(err.Error(), tc.expectedErrString) + } + } + return + } + require.NoError(t, err) output := text.ChompBytes(gittest.Exec(t, cfg, "-C", repoPath, "ls-tree", "-r", string(oid))) @@ -179,3 +237,134 @@ func TestWriteTree(t *testing.T) { }) } } + +func TestTreeEntryByPath(t *testing.T) { + t.Parallel() + + testCases := []struct { + desc string + input []*TreeEntry + expected []*TreeEntry + }{ + { + desc: "all blobs", + input: []*TreeEntry{ + { + Type: Blob, + Path: "abc", + }, + { + Type: Blob, + Path: "ab", + }, + { + Type: Blob, + Path: "a", + }, + }, + expected: []*TreeEntry{ + { + Type: Blob, + Path: "a", + }, + { + Type: Blob, + Path: "ab", + }, + { + Type: Blob, + Path: "abc", + }, + }, + }, + { + desc: "blobs and trees", + input: []*TreeEntry{ + { + Type: Blob, + Path: "abc", + }, + { + Type: Tree, + Path: "ab", + }, + { + Type: Blob, + Path: "a", + }, + }, + expected: []*TreeEntry{ + { + Type: Blob, + Path: "a", + }, + { + Type: Tree, + Path: "ab", + }, + { + Type: Blob, + Path: "abc", + }, + }, + }, + { + desc: "trees get sorted with / appended", + input: []*TreeEntry{ + { + Type: Tree, + Path: "a", + }, + { + Type: Blob, + Path: "a+", + }, + }, + expected: []*TreeEntry{ + { + Type: Blob, + Path: "a+", + }, + { + Type: Tree, + Path: "a", + }, + }, + }, + { + desc: "blobs get sorted without / appended", + input: []*TreeEntry{ + { + Type: Blob, + Path: "a", + }, + { + Type: Blob, + Path: "a+", + }, + }, + expected: []*TreeEntry{ + { + Type: Blob, + Path: "a", + }, + { + Type: Blob, + Path: "a+", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + sort.Stable(TreeEntriesByPath(tc.input)) + + require.Equal( + t, + tc.expected, + tc.input, + ) + }) + } +} diff --git a/internal/git/version.go b/internal/git/version.go index 5048007fa..43cff0475 100644 --- a/internal/git/version.go +++ b/internal/git/version.go @@ -75,6 +75,14 @@ func (v Version) IsSupported() bool { return !v.LessThan(minimumVersion) } +// HashObjectFsck detects whether or not the given Git version will do fsck +// checks when git-hash-object writes objects. +func (v Version) HashObjectFsck() bool { + return !v.LessThan(Version{ + major: 2, minor: 40, patch: 0, + }) +} + // PatchIDRespectsBinaries detects whether the given Git version correctly handles binary diffs when // computing a patch ID. Previous to Git v2.39.0, git-patch-id(1) just completely ignored any binary // diffs and thus would consider two diffs the same even if a binary changed. diff --git a/internal/gitaly/service/operations/submodules.go b/internal/gitaly/service/operations/submodules.go index f5daad03c..ba249bcde 100644 --- a/internal/gitaly/service/operations/submodules.go +++ b/internal/gitaly/service/operations/submodules.go @@ -86,14 +86,14 @@ func (s *Server) updateSubmodule(ctx context.Context, quarantineRepo *localrepo. return "", fmt.Errorf("error reading tree: %w", err) } - var newEntries []localrepo.TreeEntry + var newEntries []*localrepo.TreeEntry var newTreeID git.ObjectID for _, entry := range entries { // If the entry's path does not match, then we simply // want to retain this tree entry. if entry.Path != base { - newEntries = append(newEntries, *entry) + newEntries = append(newEntries, entry) continue } @@ -119,7 +119,7 @@ func (s *Server) updateSubmodule(ctx context.Context, quarantineRepo *localrepo. // Otherwise, create a new tree entry submoduleFound = true - newEntries = append(newEntries, localrepo.TreeEntry{ + newEntries = append(newEntries, &localrepo.TreeEntry{ Mode: entry.Mode, Path: entry.Path, OID: replaceWith, |