Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2021-09-21 08:33:52 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2021-10-06 13:39:32 +0300
commit2ff20d772203555ce7e44e3b37db7f3e9803d988 (patch)
tree8e2b31a0ff8bb29827c6ec2db86eebf0263c7168
parentc01aed4046e1fc06a4487b505be45970814f21ac (diff)
catfile: Allow reading objects with unknown type
The `objectReader` structure currently requires the caller to know about the object's type before it is actually being read: if the type does not match, then the object's data is discarded and an error is returned. This forces callers to first learn about the object type, e.g. via `git cat-file --batch-info`, which is additional overhead that is often not necessary at all. Refactor the interface to not require a type anymore. Like this, callers can use the interface without knowing about the object type beforehand. Instead, callers may still discard the object in case they realize it is not the type they expected.
-rw-r--r--internal/git/catfile/batch.go29
-rw-r--r--internal/git/catfile/object_reader.go22
-rw-r--r--internal/git/catfile/object_reader_test.go35
3 files changed, 39 insertions, 47 deletions
diff --git a/internal/git/catfile/batch.go b/internal/git/catfile/batch.go
index fa1dd8440..221277c87 100644
--- a/internal/git/catfile/batch.go
+++ b/internal/git/catfile/batch.go
@@ -2,6 +2,8 @@ package catfile
import (
"context"
+ "fmt"
+ "io"
"sync"
"github.com/opentracing/opentracing-go"
@@ -98,7 +100,7 @@ func (c *batch) Info(ctx context.Context, revision git.Revision) (*ObjectInfo, e
// the object type. Caller must consume the Reader before making another call
// on C.
func (c *batch) Tree(ctx context.Context, revision git.Revision) (*Object, error) {
- return c.objectReader.reader(ctx, revision, "tree")
+ return c.typedObjectReader(ctx, revision, "tree")
}
// Commit returns a raw commit object. It is an error if the revision does not
@@ -106,7 +108,7 @@ func (c *batch) Tree(ctx context.Context, revision git.Revision) (*Object, error
// check the object type. Caller must consume the Reader before making another
// call on C.
func (c *batch) Commit(ctx context.Context, revision git.Revision) (*Object, error) {
- return c.objectReader.reader(ctx, revision, "commit")
+ return c.typedObjectReader(ctx, revision, "commit")
}
// Blob returns a reader for the requested blob. The entire blob must be
@@ -115,11 +117,30 @@ func (c *batch) Commit(ctx context.Context, revision git.Revision) (*Object, err
// It is an error if the revision does not point to a blob. To prevent this,
// use Info to resolve the revision and check the object type.
func (c *batch) Blob(ctx context.Context, revision git.Revision) (*Object, error) {
- return c.objectReader.reader(ctx, revision, "blob")
+ return c.typedObjectReader(ctx, revision, "blob")
}
// Tag returns a raw tag object. Caller must consume the Reader before
// making another call on C.
func (c *batch) Tag(ctx context.Context, revision git.Revision) (*Object, error) {
- return c.objectReader.reader(ctx, revision, "tag")
+ return c.typedObjectReader(ctx, revision, "tag")
+}
+
+func (c *batch) typedObjectReader(ctx context.Context, revision git.Revision, expectedType string) (*Object, error) {
+ object, err := c.objectReader.reader(ctx, revision)
+ if err != nil {
+ return nil, err
+ }
+
+ if object.Type != expectedType {
+ if _, err := io.Copy(io.Discard, object); err != nil {
+ return nil, fmt.Errorf("discarding object: %w", err)
+ }
+
+ return nil, NotFoundError{
+ error: fmt.Errorf("expected %s to be a %s, got %s", object.Oid, expectedType, object.Type),
+ }
+ }
+
+ return object, nil
}
diff --git a/internal/git/catfile/object_reader.go b/internal/git/catfile/object_reader.go
index 14edf344a..024b423c4 100644
--- a/internal/git/catfile/object_reader.go
+++ b/internal/git/catfile/object_reader.go
@@ -90,15 +90,10 @@ func (o *objectReader) close() {
func (o *objectReader) reader(
ctx context.Context,
revision git.Revision,
- expectedType string,
) (*Object, error) {
- finish := startSpan(o.creationCtx, ctx, fmt.Sprintf("Batch.Object(%s)", expectedType), revision)
+ finish := startSpan(o.creationCtx, ctx, "Batch.Object", revision)
defer finish()
- if o.counter != nil {
- o.counter.WithLabelValues(expectedType).Inc()
- }
-
o.Lock()
defer o.Unlock()
@@ -123,19 +118,12 @@ func (o *objectReader) reader(
return nil, err
}
- o.n = oi.Size + 1
-
- if oi.Type != expectedType {
- // This is a programmer error and it should never happen. But if it does,
- // we need to leave the cat-file process in a good state
- if _, err := io.CopyN(io.Discard, o.stdout, o.n); err != nil {
- return nil, err
- }
- o.n = 0
-
- return nil, NotFoundError{error: fmt.Errorf("expected %s to be a %s, got %s", oi.Oid, expectedType, oi.Type)}
+ if o.counter != nil {
+ o.counter.WithLabelValues(oi.Type).Inc()
}
+ o.n = oi.Size + 1
+
return &Object{
ObjectInfo: *oi,
Reader: &objectDataReader{
diff --git a/internal/git/catfile/object_reader_test.go b/internal/git/catfile/object_reader_test.go
index 50f07c4e2..bc092aa0d 100644
--- a/internal/git/catfile/object_reader_test.go
+++ b/internal/git/catfile/object_reader_test.go
@@ -29,7 +29,7 @@ func TestObjectReader_reader(t *testing.T) {
reader, err := newObjectReader(ctx, newRepoExecutor(t, cfg, repoProto), nil)
require.NoError(t, err)
- object, err := reader.reader(ctx, "refs/heads/master", "commit")
+ object, err := reader.reader(ctx, "refs/heads/master")
require.NoError(t, err)
data, err := io.ReadAll(object)
@@ -41,7 +41,7 @@ func TestObjectReader_reader(t *testing.T) {
reader, err := newObjectReader(ctx, newRepoExecutor(t, cfg, repoProto), nil)
require.NoError(t, err)
- object, err := reader.reader(ctx, commitID.Revision(), "commit")
+ object, err := reader.reader(ctx, commitID.Revision())
require.NoError(t, err)
data, err := io.ReadAll(object)
@@ -50,32 +50,15 @@ func TestObjectReader_reader(t *testing.T) {
require.Contains(t, string(data), "Merge branch 'cherry-pick-ce369011' into 'master'\n")
})
- t.Run("read commit with wrong type", func(t *testing.T) {
- reader, err := newObjectReader(ctx, newRepoExecutor(t, cfg, repoProto), nil)
- require.NoError(t, err)
-
- _, err = reader.reader(ctx, commitID.Revision(), "tag")
- require.EqualError(t, err, fmt.Sprintf("expected %s to be a tag, got commit", commitID))
-
- // Verify that we're still able to read a commit after the previous read has failed.
- object, err := reader.reader(ctx, commitID.Revision(), "commit")
- require.NoError(t, err)
-
- data, err := io.ReadAll(object)
- require.NoError(t, err)
-
- require.Equal(t, commitContents, data)
- })
-
t.Run("read missing ref", func(t *testing.T) {
reader, err := newObjectReader(ctx, newRepoExecutor(t, cfg, repoProto), nil)
require.NoError(t, err)
- _, err = reader.reader(ctx, "refs/heads/does-not-exist", "commit")
+ _, err = reader.reader(ctx, "refs/heads/does-not-exist")
require.EqualError(t, err, "object not found")
// Verify that we're still able to read a commit after the previous read has failed.
- object, err := reader.reader(ctx, commitID.Revision(), "commit")
+ object, err := reader.reader(ctx, commitID.Revision())
require.NoError(t, err)
data, err := io.ReadAll(object)
@@ -88,11 +71,11 @@ func TestObjectReader_reader(t *testing.T) {
reader, err := newObjectReader(ctx, newRepoExecutor(t, cfg, repoProto), nil)
require.NoError(t, err)
- _, err = reader.reader(ctx, commitID.Revision(), "commit")
+ _, err = reader.reader(ctx, commitID.Revision())
require.NoError(t, err)
// We haven't yet consumed the previous object, so this must now fail.
- _, err = reader.reader(ctx, commitID.Revision(), "commit")
+ _, err = reader.reader(ctx, commitID.Revision())
require.EqualError(t, err, fmt.Sprintf("cannot create new Object: batch contains %d unread bytes", len(commitContents)+1))
})
@@ -100,14 +83,14 @@ func TestObjectReader_reader(t *testing.T) {
reader, err := newObjectReader(ctx, newRepoExecutor(t, cfg, repoProto), nil)
require.NoError(t, err)
- object, err := reader.reader(ctx, commitID.Revision(), "commit")
+ object, err := reader.reader(ctx, commitID.Revision())
require.NoError(t, err)
_, err = io.CopyN(io.Discard, object, 100)
require.NoError(t, err)
// We haven't yet consumed the previous object, so this must now fail.
- _, err = reader.reader(ctx, commitID.Revision(), "commit")
+ _, err = reader.reader(ctx, commitID.Revision())
require.EqualError(t, err, fmt.Sprintf("cannot create new Object: batch contains %d unread bytes", len(commitContents)-100+1))
})
@@ -125,7 +108,7 @@ func TestObjectReader_reader(t *testing.T) {
} {
require.Equal(t, float64(0), testutil.ToFloat64(counter.WithLabelValues(objectType)))
- object, err := reader.reader(ctx, revision, objectType)
+ object, err := reader.reader(ctx, revision)
require.NoError(t, err)
require.Equal(t, float64(1), testutil.ToFloat64(counter.WithLabelValues(objectType)))