diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-09-20 14:04:45 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-10-06 13:39:32 +0300 |
commit | 44f8b3e88731ed371d9484b90a3cf39173e11f9f (patch) | |
tree | ae5401e8414cf680ef3c79a074577259ece8151f | |
parent | 02951427e90b7a2194a4006c307b64364af17b59 (diff) |
blob: Convert `GetBlob()` to use object reader
The `GetBlob()` RPC retrieves a single blob by object ID from the
repository's object database. This is implemented via the catfile cache
by first retrieving object info via `Batch.Info()`, and then the object
data is retrieved via another call to `Batch.Blob()` in case the info
says it is indeed a blob. This is inefficient: we spawn two processes
and have two round trips to those processes. The only upside is that we
avoid reading objects which are not a blob, but given that non-blob
objetcs are typically not large-ish it is debatable whether this really
has much of an impact at all.
Refactor the code to use our new cached ObjectReader interface and just
read the object directly. This only requires a single process and avoids
one round trip by just reading the object directly.
Changelog: performance
-rw-r--r-- | internal/gitaly/service/blob/get_blob.go | 25 |
1 files changed, 12 insertions, 13 deletions
diff --git a/internal/gitaly/service/blob/get_blob.go b/internal/gitaly/service/blob/get_blob.go index 177a9756a..4e6068d0c 100644 --- a/internal/gitaly/service/blob/get_blob.go +++ b/internal/gitaly/service/blob/get_blob.go @@ -20,37 +20,36 @@ func (s *server) GetBlob(in *gitalypb.GetBlobRequest, stream gitalypb.BlobServic return helper.ErrInvalidArgumentf("GetBlob: %v", err) } - c, err := s.catfileCache.BatchProcess(stream.Context(), repo) + objectReader, err := s.catfileCache.ObjectReader(stream.Context(), repo) if err != nil { return helper.ErrInternalf("GetBlob: %v", err) } - objectInfo, err := c.Info(ctx, git.Revision(in.Oid)) - if err != nil && !catfile.IsNotFound(err) { + blob, err := objectReader.Object(ctx, git.Revision(in.Oid)) + if err != nil { + if catfile.IsNotFound(err) { + return helper.ErrUnavailable(stream.Send(&gitalypb.GetBlobResponse{})) + } return helper.ErrInternalf("GetBlob: %v", err) } - if catfile.IsNotFound(err) || objectInfo.Type != "blob" { + + if blob.Type != "blob" { return helper.ErrUnavailable(stream.Send(&gitalypb.GetBlobResponse{})) } - readLimit := objectInfo.Size + readLimit := blob.Size if in.Limit >= 0 && in.Limit < readLimit { readLimit = in.Limit } firstMessage := &gitalypb.GetBlobResponse{ - Size: objectInfo.Size, - Oid: objectInfo.Oid.String(), + Size: blob.Size, + Oid: blob.Oid.String(), } if readLimit == 0 { return helper.ErrUnavailable(stream.Send(firstMessage)) } - blobObj, err := c.Blob(ctx, git.Revision(objectInfo.Oid)) - if err != nil { - return helper.ErrInternalf("GetBlob: %v", err) - } - sw := streamio.NewWriter(func(p []byte) error { msg := &gitalypb.GetBlobResponse{} if firstMessage != nil { @@ -61,7 +60,7 @@ func (s *server) GetBlob(in *gitalypb.GetBlobRequest, stream gitalypb.BlobServic return stream.Send(msg) }) - _, err = io.CopyN(sw, blobObj.Reader, readLimit) + _, err = io.CopyN(sw, blob.Reader, readLimit) if err != nil { return helper.ErrUnavailablef("GetBlob: send: %v", err) } |