From 3ac7e70322684cde7ca1283b4cc71c806273fc1e Mon Sep 17 00:00:00 2001 From: Pavlo Strokov Date: Wed, 22 Jan 2020 15:42:12 +0000 Subject: Git object extraction improvements Omit extracting core object information before extracting actual object. If extracted object has type different from expected the NotFound error returns. Each extracted object represented as instance of Object struct with info and the reader inside. Based on the info reader could be consumed and properly parsed. Moved from pointer to value for ObjectInfo (reduce work for GC). Relates to: https://gitlab.com/gitlab-org/gitaly/issues/2255 --- internal/git/catfile/batch.go | 15 ++++---- internal/git/catfile/catfile.go | 2 +- internal/git/catfile/object.go | 13 +++++++ internal/git/log/commit.go | 21 +++++++++-- internal/git/log/commit_test.go | 77 +++++++++++++++++++++++++++++++++++++++++ internal/git/log/last_commit.go | 2 +- internal/git/log/log.go | 4 ++- internal/git/log/tag.go | 15 ++++---- internal/git/log/tag_test.go | 2 +- 9 files changed, 132 insertions(+), 19 deletions(-) create mode 100644 internal/git/catfile/object.go (limited to 'internal/git') diff --git a/internal/git/catfile/batch.go b/internal/git/catfile/batch.go index 0099ef6dd..aec8fa508 100644 --- a/internal/git/catfile/batch.go +++ b/internal/git/catfile/batch.go @@ -69,7 +69,7 @@ func newBatchProcess(ctx context.Context, repo repository.GitRepo) (*batchProces return b, nil } -func (b *batchProcess) reader(revspec string, expectedType string) (io.Reader, error) { +func (b *batchProcess) reader(revspec string, expectedType string) (*Object, error) { b.Lock() defer b.Unlock() @@ -82,7 +82,7 @@ func (b *batchProcess) reader(revspec string, expectedType string) (io.Reader, e } if b.n != 0 { - return nil, fmt.Errorf("cannot create new reader: batch contains %d unread bytes", b.n) + return nil, fmt.Errorf("cannot create new Object: batch contains %d unread bytes", b.n) } if _, err := fmt.Fprintln(b.w, revspec); err != nil { @@ -104,12 +104,15 @@ func (b *batchProcess) reader(revspec string, expectedType string) (io.Reader, e } b.n = 0 - return nil, fmt.Errorf("expected %s to be a %s, got %s", oi.Oid, expectedType, oi.Type) + return nil, NotFoundError{error: fmt.Errorf("expected %s to be a %s, got %s", oi.Oid, expectedType, oi.Type)} } - return &batchReader{ - batchProcess: b, - r: io.LimitReader(b.r, oi.Size), + return &Object{ + ObjectInfo: *oi, + Reader: &batchReader{ + batchProcess: b, + r: io.LimitReader(b.r, oi.Size), + }, }, nil } diff --git a/internal/git/catfile/catfile.go b/internal/git/catfile/catfile.go index 1226971bb..a1ce1fbb6 100644 --- a/internal/git/catfile/catfile.go +++ b/internal/git/catfile/catfile.go @@ -89,7 +89,7 @@ func (c *Batch) Tree(revspec string) (io.Reader, error) { // point to a commit. To prevent this first use Info to resolve the revspec // and check the object type. Caller must consume the Reader before // making another call on C. -func (c *Batch) Commit(revspec string) (io.Reader, error) { +func (c *Batch) Commit(revspec string) (*Object, error) { catfileLookupCounter.WithLabelValues("commit").Inc() return c.batchProcess.reader(revspec, "commit") } diff --git a/internal/git/catfile/object.go b/internal/git/catfile/object.go new file mode 100644 index 000000000..04363ad21 --- /dev/null +++ b/internal/git/catfile/object.go @@ -0,0 +1,13 @@ +package catfile + +import ( + "io" +) + +// Object represents data returned by `git cat-file --batch` +type Object struct { + // ObjectInfo represents main information about object + ObjectInfo + // Reader provides raw data about object. It differs for each type of object(tag, commit, tree, log, etc.) + io.Reader +} diff --git a/internal/git/log/commit.go b/internal/git/log/commit.go index 421bfd59c..282a54404 100644 --- a/internal/git/log/commit.go +++ b/internal/git/log/commit.go @@ -13,6 +13,7 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/git" "gitlab.com/gitlab-org/gitaly/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/internal/helper" + "gitlab.com/gitlab-org/gitaly/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" ) @@ -24,11 +25,18 @@ func GetCommit(ctx context.Context, repo *gitalypb.Repository, revision string) return nil, err } - return GetCommitCatfile(c, revision) + return GetCommitCatfile(ctx, c, revision) } // GetCommitCatfile looks up a commit by revision using an existing *catfile.Batch instance. -func GetCommitCatfile(c *catfile.Batch, revision string) (*gitalypb.GitCommit, error) { +func GetCommitCatfile(ctx context.Context, c *catfile.Batch, revision string) (*gitalypb.GitCommit, error) { + if featureflag.IsEnabled(ctx, featureflag.CommitWithoutBatchCheck) { + return getCommitCatfileNew(c, revision) + } + return getCommitCatfileOld(c, revision) +} + +func getCommitCatfileOld(c *catfile.Batch, revision string) (*gitalypb.GitCommit, error) { info, err := c.Info(revision + "^{commit}") if err != nil { return nil, err @@ -42,6 +50,15 @@ func GetCommitCatfile(c *catfile.Batch, revision string) (*gitalypb.GitCommit, e return parseRawCommit(r, info) } +func getCommitCatfileNew(c *catfile.Batch, revision string) (*gitalypb.GitCommit, error) { + obj, err := c.Commit(revision + "^{commit}") + if err != nil { + return nil, err + } + + return parseRawCommit(obj.Reader, &obj.ObjectInfo) +} + // GetCommitMessage looks up a commit message and returns it in its entirety. func GetCommitMessage(c *catfile.Batch, repo *gitalypb.Repository, revision string) ([]byte, error) { info, err := c.Info(revision + "^{commit}") diff --git a/internal/git/log/commit_test.go b/internal/git/log/commit_test.go index 120604859..8ca3c1ec4 100644 --- a/internal/git/log/commit_test.go +++ b/internal/git/log/commit_test.go @@ -7,7 +7,10 @@ import ( "github.com/golang/protobuf/ptypes/timestamp" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/internal/git/catfile" + "gitlab.com/gitlab-org/gitaly/internal/metadata/featureflag" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" + "google.golang.org/grpc/metadata" ) func TestParseRawCommit(t *testing.T) { @@ -102,3 +105,77 @@ func TestParseRawCommit(t *testing.T) { }) } } + +func TestGetCommitCatfile(t *testing.T) { + ctx, cancel := testhelper.Context() + ctx = metadata.NewIncomingContext(ctx, metadata.MD{}) + defer cancel() + + testRepo, _, cleanup := testhelper.NewTestRepo(t) + defer cleanup() + + const commitSha = "2d1db523e11e777e49377cfb22d368deec3f0793" + const commitMsg = "Correct test_env.rb path for adding branch\n" + const blobSha = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" + + testCases := []struct { + desc string + revision string + featureOn bool + errStr string + }{ + { + desc: "feature off | commit", + revision: commitSha, + featureOn: false, + }, + { + desc: "feature on | commit", + revision: commitSha, + featureOn: true, + }, + { + desc: "feature off | not existing commit", + revision: "not existing revision", + featureOn: false, + errStr: "object not found", + }, + { + desc: "feature on | not existing commit", + revision: "not existing revision", + featureOn: true, + errStr: "object not found", + }, + { + desc: "feature off | blob sha", + revision: blobSha, + featureOn: false, + errStr: "object not found", + }, + { + desc: "feature on | blob sha", + revision: blobSha, + featureOn: true, + errStr: "object not found", + }, + } + + c, err := catfile.New(ctx, testRepo) + require.NoError(t, err) + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + if tc.featureOn { + ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CommitWithoutBatchCheck) + } + + c, err := GetCommitCatfile(ctx, c, tc.revision) + + if tc.errStr == "" { + require.NoError(t, err) + require.Equal(t, commitMsg, string(c.Body)) + } else { + require.EqualError(t, err, tc.errStr) + } + }) + } +} diff --git a/internal/git/log/last_commit.go b/internal/git/log/last_commit.go index 76b3c9915..763033f30 100644 --- a/internal/git/log/last_commit.go +++ b/internal/git/log/last_commit.go @@ -27,7 +27,7 @@ func LastCommitForPath(ctx context.Context, batch *catfile.Batch, repo *gitalypb return nil, err } - return GetCommitCatfile(batch, text.ChompBytes(commitID)) + return GetCommitCatfile(ctx, batch, text.ChompBytes(commitID)) } // GitLogCommand returns a Command that executes git log with the given the arguments diff --git a/internal/git/log/log.go b/internal/git/log/log.go index c591f36bf..e6de9c37f 100644 --- a/internal/git/log/log.go +++ b/internal/git/log/log.go @@ -19,6 +19,7 @@ type Parser struct { currentCommit *gitalypb.GitCommit err error c *catfile.Batch + ctx context.Context } // NewLogParser returns a new Parser @@ -31,6 +32,7 @@ func NewLogParser(ctx context.Context, repo *gitalypb.Repository, src io.Reader) parser := &Parser{ scanner: bufio.NewScanner(src), c: c, + ctx: ctx, } return parser, nil @@ -46,7 +48,7 @@ func (parser *Parser) Parse() bool { commitID := parser.scanner.Text() - commit, err := GetCommitCatfile(parser.c, commitID) + commit, err := GetCommitCatfile(parser.ctx, parser.c, commitID) if err != nil { parser.err = err return false diff --git a/internal/git/log/tag.go b/internal/git/log/tag.go index 6e78cb0db..ae82afd74 100644 --- a/internal/git/log/tag.go +++ b/internal/git/log/tag.go @@ -3,6 +3,7 @@ package log import ( "bufio" "bytes" + "context" "fmt" "io" "io/ioutil" @@ -23,7 +24,7 @@ const ( // When 'trimRightNewLine' is 'true', the tag message will be trimmed to remove all '\n' characters from right. // note: we pass in the tagName because the tag name from refs/tags may be different // than the name found in the actual tag object. We want to use the tagName found in refs/tags -func GetTagCatfile(c *catfile.Batch, tagID, tagName string, trimLen, trimRightNewLine bool) (*gitalypb.Tag, error) { +func GetTagCatfile(ctx context.Context, c *catfile.Batch, tagID, tagName string, trimLen, trimRightNewLine bool) (*gitalypb.Tag, error) { r, err := c.Tag(tagID) if err != nil { return nil, err @@ -35,7 +36,7 @@ func GetTagCatfile(c *catfile.Batch, tagID, tagName string, trimLen, trimRightNe } // the tagID is the oid of the tag object - tag, err := buildAnnotatedTag(c, tagID, tagName, header, body, trimLen, trimRightNewLine) + tag, err := buildAnnotatedTag(ctx, c, tagID, tagName, header, body, trimLen, trimRightNewLine) if err != nil { return nil, err } @@ -92,7 +93,7 @@ func splitRawTag(r io.Reader, trimRightNewLine bool) (*tagHeader, []byte, error) return &header, body, nil } -func buildAnnotatedTag(b *catfile.Batch, tagID, name string, header *tagHeader, body []byte, trimLen, trimRightNewLine bool) (*gitalypb.Tag, error) { +func buildAnnotatedTag(ctx context.Context, b *catfile.Batch, tagID, name string, header *tagHeader, body []byte, trimLen, trimRightNewLine bool) (*gitalypb.Tag, error) { tag := &gitalypb.Tag{ Id: tagID, Name: []byte(name), @@ -107,13 +108,13 @@ func buildAnnotatedTag(b *catfile.Batch, tagID, name string, header *tagHeader, var err error switch header.tagType { case "commit": - tag.TargetCommit, err = GetCommitCatfile(b, header.oid) + tag.TargetCommit, err = GetCommitCatfile(ctx, b, header.oid) if err != nil { return nil, fmt.Errorf("buildAnnotatedTag error when getting target commit: %v", err) } case "tag": - tag.TargetCommit, err = dereferenceTag(b, header.oid) + tag.TargetCommit, err = dereferenceTag(ctx, b, header.oid) if err != nil { return nil, fmt.Errorf("buildAnnotatedTag error when dereferencing tag: %v", err) } @@ -137,7 +138,7 @@ func buildAnnotatedTag(b *catfile.Batch, tagID, name string, header *tagHeader, // we also protect against circular tag references. Even though this is not possible in git, // we still want to protect against an infinite looop -func dereferenceTag(b *catfile.Batch, Oid string) (*gitalypb.GitCommit, error) { +func dereferenceTag(ctx context.Context, b *catfile.Batch, Oid string) (*gitalypb.GitCommit, error) { for depth := 0; depth < MaxTagReferenceDepth; depth++ { i, err := b.Info(Oid) if err != nil { @@ -159,7 +160,7 @@ func dereferenceTag(b *catfile.Batch, Oid string) (*gitalypb.GitCommit, error) { Oid = header.oid continue case "commit": - return GetCommitCatfile(b, Oid) + return GetCommitCatfile(ctx, b, Oid) default: // This current tag points to a tree or a blob return nil, nil } diff --git a/internal/git/log/tag_test.go b/internal/git/log/tag_test.go index 2aadb8511..ab975ee25 100644 --- a/internal/git/log/tag_test.go +++ b/internal/git/log/tag_test.go @@ -52,7 +52,7 @@ func TestGetTag(t *testing.T) { t.Run(testCase.tagName, func(t *testing.T) { tagID := testhelper.CreateTag(t, testRepoPath, testCase.tagName, testCase.rev, &testhelper.CreateTagOpts{Message: testCase.message}) - tag, err := GetTagCatfile(c, tagID, testCase.tagName, testCase.trim, true) + tag, err := GetTagCatfile(ctx, c, tagID, testCase.tagName, testCase.trim, true) require.NoError(t, err) if testCase.trim && len(testCase.message) >= helper.MaxCommitOrTagMessageSize { testCase.message = testCase.message[:helper.MaxCommitOrTagMessageSize] -- cgit v1.2.3