Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2021-02-22 13:44:23 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2021-02-24 16:43:30 +0300
commit22ab115f6e0fc17c24e7923e871402cae6be3de3 (patch)
tree2f35c2296bade6853ba82cefc639c680f5d277af
parentf9603bfd2da6488628524000cdbb082afef5b921 (diff)
blob: Implement function to find LFS pointers
We're about to port the implementation of the blob service's LFS-related RPCs to Go. The most important building block for this is to be able to identify LFS pointers, either by walking a revision graph or by reading blob IDs directly. This commit introduces two new functions which implement this with `findLFSPointersByRevisions()` and `readLFSPointers()`. While the former one searches the revision graph for LFS pointers, the latter one accepts a list of potential LFS pointers and returns all which in fact are valid LFS pointers. Except for the newly added tests, both functions are not yet used.
-rw-r--r--internal/gitaly/service/blob/lfs_pointers.go159
-rw-r--r--internal/gitaly/service/blob/lfs_pointers_test.go185
2 files changed, 334 insertions, 10 deletions
diff --git a/internal/gitaly/service/blob/lfs_pointers.go b/internal/gitaly/service/blob/lfs_pointers.go
index 96ec4531f..517ae3d9f 100644
--- a/internal/gitaly/service/blob/lfs_pointers.go
+++ b/internal/gitaly/service/blob/lfs_pointers.go
@@ -1,10 +1,19 @@
package blob
import (
+ "bufio"
+ "bytes"
+ "context"
+ "errors"
"fmt"
+ "io"
+ "io/ioutil"
+ "strings"
- "gitlab.com/gitlab-org/gitaly/internal/errors"
+ gitaly_errors "gitlab.com/gitlab-org/gitaly/internal/errors"
"gitlab.com/gitlab-org/gitaly/internal/git"
+ "gitlab.com/gitlab-org/gitaly/internal/git/catfile"
+ "gitlab.com/gitlab-org/gitaly/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/internal/gitaly/rubyserver"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
"google.golang.org/grpc/codes"
@@ -12,14 +21,14 @@ import (
)
const (
- // These limits are used as a heuristic to ignore files which can't be LFS
- // pointers. The format of these is described in
- // https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer
-
- // LfsPointerMinSize is the minimum size for an lfs pointer text blob
- LfsPointerMinSize = 120
- // LfsPointerMaxSize is the maximum size for an lfs pointer text blob
- LfsPointerMaxSize = 200
+ // lfsPointerMaxSize is the maximum size for an lfs pointer text blob. This limit is used
+ // as a heuristic to filter blobs which can't be LFS pointers. The format of these pointers
+ // is described in https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer.
+ lfsPointerMaxSize = 200
+)
+
+var (
+ errInvalidRevision = errors.New("invalid revision")
)
type getLFSPointerByRevisionRequest interface {
@@ -62,7 +71,7 @@ func (s *server) GetLFSPointers(req *gitalypb.GetLFSPointersRequest, stream gita
func validateGetLFSPointersRequest(req *gitalypb.GetLFSPointersRequest) error {
if req.GetRepository() == nil {
- return errors.ErrEmptyRepository
+ return gitaly_errors.ErrEmptyRepository
}
if len(req.GetBlobIds()) == 0 {
@@ -152,3 +161,133 @@ func validateGetAllLFSPointersRequest(in *gitalypb.GetAllLFSPointersRequest) err
}
return nil
}
+
+// findLFSPointersByRevisions will return all LFS objects reachable via the given set of revisions.
+// Revisions accept all syntax supported by git-rev-list(1). This function also accepts a set of
+// options accepted by git-rev-list(1). Note that because git.Commands do not accept dashed
+// positional arguments, it is currently not possible to mix options and revisions (e.g. "git
+// rev-list master --not feature").
+func findLFSPointersByRevisions(
+ ctx context.Context,
+ repo *localrepo.Repo,
+ gitCmdFactory git.CommandFactory,
+ opts []git.Option,
+ revisions ...string,
+) (_ []*gitalypb.LFSPointer, returnErr error) {
+ for _, revision := range revisions {
+ if strings.HasPrefix(revision, "-") && revision != "--all" && revision != "--not" {
+ return nil, fmt.Errorf("%w: %q", errInvalidRevision, revision)
+ }
+ }
+
+ // git-rev-list(1) currently does not have any way to list all reachable objects of a
+ // certain type. As an optimization, we thus call it with `--object-names`, which will
+ // print an associated name for a given revision, if there is any. This allows us to skip
+ // at least some objects, namely all commits.
+ //
+ // It is questionable whether this optimization helps at all: by including object names,
+ // git cannot make use of bitmap indices. We thus pessimize git-rev-list(1) and optimize
+ // git-cat-file(1). And given that there's going to be a lot more blobs and trees (which
+ // both _do_ have an object name) than commits, it's probably an optimization which even
+ // slows down execution. Still, we keep this to stay compatible with the Ruby
+ // implementation.
+ var revListStderr bytes.Buffer
+ revlist, err := repo.Exec(ctx, nil, git.SubCmd{
+ Name: "rev-list",
+ Flags: append([]git.Option{
+ git.Flag{Name: "--in-commit-order"},
+ git.Flag{Name: "--objects"},
+ git.Flag{Name: "--object-names"},
+ git.Flag{Name: fmt.Sprintf("--filter=blob:limit=%d", lfsPointerMaxSize)},
+ }, opts...),
+ Args: revisions,
+ }, git.WithStderr(&revListStderr))
+ if err != nil {
+ return nil, fmt.Errorf("could not execute rev-list: %w", err)
+ }
+ defer func() {
+ if err := revlist.Wait(); err != nil && returnErr == nil {
+ returnErr = fmt.Errorf("rev-list failed: %w, stderr: %q",
+ err, revListStderr.String())
+ }
+ }()
+
+ return readLFSPointers(ctx, repo, gitCmdFactory, revlist, true)
+}
+
+// readLFSPointers reads object IDs of potential LFS pointers from the given reader and for each of
+// them, it will determine whether the referenced object is an LFS pointer. Objects which are not a
+// valid LFS pointer will be ignored. Objects which do not exist result in an error.
+//
+// If filterByObjectName is set to true, only IDs which have an associated object name will be
+// read. This is helpful to pass output of git-rev-list(1) with `--object-names` directly to this
+// function.
+func readLFSPointers(
+ ctx context.Context,
+ repo *localrepo.Repo,
+ gitCmdFactory git.CommandFactory,
+ objectIDReader io.Reader,
+ filterByObjectName bool,
+) ([]*gitalypb.LFSPointer, error) {
+ catfileBatch, err := catfile.New(ctx, gitCmdFactory, repo)
+ if err != nil {
+ return nil, fmt.Errorf("could not execute cat-file: %w", err)
+ }
+
+ var lfsPointers []*gitalypb.LFSPointer
+ scanner := bufio.NewScanner(objectIDReader)
+
+ for scanner.Scan() {
+ revision := git.Revision(scanner.Bytes())
+ if filterByObjectName {
+ revAndPath := strings.SplitN(revision.String(), " ", 2)
+ if len(revAndPath) != 2 {
+ continue
+ }
+ revision = git.Revision(revAndPath[0])
+ }
+
+ objectInfo, err := catfileBatch.Info(ctx, revision)
+ if err != nil {
+ return nil, fmt.Errorf("could not get LFS pointer info: %w", err)
+ }
+
+ if objectInfo.Type != "blob" {
+ continue
+ }
+
+ blob, err := catfileBatch.Blob(ctx, revision)
+ if err != nil {
+ return nil, fmt.Errorf("could not retrieve LFS pointer: %w", err)
+ }
+
+ data, err := ioutil.ReadAll(blob.Reader)
+ if err != nil {
+ return nil, fmt.Errorf("could not read LFS pointer: %w", err)
+ }
+
+ if !isLFSPointer(data) {
+ continue
+ }
+
+ lfsPointers = append(lfsPointers, &gitalypb.LFSPointer{
+ Data: data,
+ Size: int64(len(data)),
+ Oid: revision.String(),
+ })
+ }
+
+ if err := scanner.Err(); err != nil {
+ return nil, fmt.Errorf("scanning LFS pointers failed: %w", err)
+ }
+
+ return lfsPointers, nil
+}
+
+// isLFSPointer determines whether the given blob contents are an LFS pointer or not.
+func isLFSPointer(data []byte) bool {
+ // TODO: this is incomplete as it does not recognize pre-release version of LFS blobs with
+ // the "https://hawser.github.com/spec/v1" version. For compatibility with the Ruby RPC, we
+ // leave this as-is for now though.
+ return bytes.HasPrefix(data, []byte("version https://git-lfs.github.com/spec"))
+}
diff --git a/internal/gitaly/service/blob/lfs_pointers_test.go b/internal/gitaly/service/blob/lfs_pointers_test.go
index a51bdd9e1..413ab37f6 100644
--- a/internal/gitaly/service/blob/lfs_pointers_test.go
+++ b/internal/gitaly/service/blob/lfs_pointers_test.go
@@ -1,12 +1,16 @@
package blob
import (
+ "errors"
+ "fmt"
"io"
"os/exec"
"strings"
"testing"
"github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/internal/git"
+ "gitlab.com/gitlab-org/gitaly/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
@@ -492,3 +496,184 @@ func refHasPtr(t *testing.T, repoPath, ref string, lfsPtr *gitalypb.LFSPointer)
return strings.Contains(objects, lfsPtr.Oid)
}
+
+func TestFindLFSPointersByRevisions(t *testing.T) {
+ gitCmdFactory := git.NewExecCommandFactory(config.Config)
+
+ repoProto, _, cleanup := testhelper.NewTestRepo(t)
+ defer cleanup()
+ repo := localrepo.New(gitCmdFactory, repoProto, config.Config)
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ for _, tc := range []struct {
+ desc string
+ opts []git.Option
+ revs []string
+ expectedErr error
+ expectedLFSPointers []*gitalypb.LFSPointer
+ }{
+ {
+ desc: "--all",
+ opts: []git.Option{
+ git.Flag{Name: "--all"},
+ },
+ expectedLFSPointers: []*gitalypb.LFSPointer{
+ lfsPointers[lfsPointer1],
+ lfsPointers[lfsPointer2],
+ lfsPointers[lfsPointer3],
+ lfsPointers[lfsPointer4],
+ lfsPointers[lfsPointer5],
+ lfsPointers[lfsPointer6],
+ },
+ },
+ {
+ desc: "--not --all",
+ opts: []git.Option{
+ git.Flag{Name: "--not"},
+ git.Flag{Name: "--all"},
+ },
+ },
+ {
+ desc: "initial commit",
+ revs: []string{"1a0b36b3cdad1d2ee32457c102a8c0b7056fa863"},
+ },
+ {
+ desc: "master",
+ revs: []string{"master"},
+ expectedLFSPointers: []*gitalypb.LFSPointer{
+ lfsPointers[lfsPointer1],
+ },
+ },
+ {
+ desc: "multiple revisions",
+ revs: []string{"master", "moar-lfs-ptrs"},
+ expectedLFSPointers: []*gitalypb.LFSPointer{
+ lfsPointers[lfsPointer1],
+ lfsPointers[lfsPointer2],
+ lfsPointers[lfsPointer3],
+ },
+ },
+ {
+ desc: "invalid dashed option",
+ revs: []string{"master", "--foobar"},
+ expectedErr: fmt.Errorf("invalid revision: \"--foobar\""),
+ },
+ {
+ desc: "invalid revision",
+ revs: []string{"does-not-exist"},
+ expectedErr: fmt.Errorf("fatal: ambiguous argument 'does-not-exist'"),
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ actualLFSPointers, err := findLFSPointersByRevisions(
+ ctx, repo, gitCmdFactory, tc.opts, tc.revs...)
+ if tc.expectedErr == nil {
+ require.NoError(t, err)
+ } else {
+ require.Contains(t, err.Error(), tc.expectedErr.Error())
+ }
+ require.ElementsMatch(t, tc.expectedLFSPointers, actualLFSPointers)
+ })
+ }
+}
+
+func TestReadLFSPointers(t *testing.T) {
+ gitCmdFactory := git.NewExecCommandFactory(config.Config)
+
+ repoProto, _, cleanup := testhelper.NewTestRepo(t)
+ defer cleanup()
+ repo := localrepo.New(gitCmdFactory, repoProto, config.Config)
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ for _, tc := range []struct {
+ desc string
+ input string
+ filterByObjectName bool
+ expectedErr error
+ expectedLFSPointers []*gitalypb.LFSPointer
+ }{
+ {
+ desc: "single object ID",
+ input: strings.Join([]string{lfsPointer1}, "\n"),
+ expectedLFSPointers: []*gitalypb.LFSPointer{
+ lfsPointers[lfsPointer1],
+ },
+ },
+ {
+ desc: "multiple object IDs",
+ input: strings.Join([]string{
+ lfsPointer1,
+ lfsPointer2,
+ lfsPointer3,
+ lfsPointer4,
+ lfsPointer5,
+ lfsPointer6,
+ }, "\n"),
+ expectedLFSPointers: []*gitalypb.LFSPointer{
+ lfsPointers[lfsPointer1],
+ lfsPointers[lfsPointer2],
+ lfsPointers[lfsPointer3],
+ lfsPointers[lfsPointer4],
+ lfsPointers[lfsPointer5],
+ lfsPointers[lfsPointer6],
+ },
+ },
+ {
+ desc: "multiple object IDs with name filter",
+ input: strings.Join([]string{
+ lfsPointer1,
+ lfsPointer2,
+ lfsPointer3 + " x",
+ lfsPointer4,
+ lfsPointer5 + " z",
+ lfsPointer6 + " a",
+ }, "\n"),
+ filterByObjectName: true,
+ expectedLFSPointers: []*gitalypb.LFSPointer{
+ lfsPointers[lfsPointer3],
+ lfsPointers[lfsPointer5],
+ lfsPointers[lfsPointer6],
+ },
+ },
+ {
+ desc: "non-pointer object",
+ input: strings.Join([]string{
+ "60ecb67744cb56576c30214ff52294f8ce2def98",
+ }, "\n"),
+ },
+ {
+ desc: "mixed objects",
+ input: strings.Join([]string{
+ "60ecb67744cb56576c30214ff52294f8ce2def98",
+ lfsPointer2,
+ }, "\n"),
+ expectedLFSPointers: []*gitalypb.LFSPointer{
+ lfsPointers[lfsPointer2],
+ },
+ },
+ {
+ desc: "missing object",
+ input: strings.Join([]string{
+ "0101010101010101010101010101010101010101",
+ }, "\n"),
+ expectedErr: errors.New("object not found"),
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ reader := strings.NewReader(tc.input)
+
+ actualLFSPointers, err := readLFSPointers(
+ ctx, repo, gitCmdFactory, reader, tc.filterByObjectName)
+ if tc.expectedErr == nil {
+ require.NoError(t, err)
+ } else {
+ require.Contains(t, err.Error(), tc.expectedErr.Error())
+ }
+ require.ElementsMatch(t, tc.expectedLFSPointers, actualLFSPointers)
+ })
+ }
+}