diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-02-22 17:44:16 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-02-23 09:30:41 +0300 |
commit | 6c8fe7ca03be869f58ac9ef1e2b744770c6b8144 (patch) | |
tree | f6c199d31396b66c09c10780442c8229d5a451e5 | |
parent | 3c73c0df32d7aa933aa66d52f7ac19751e1a3e62 (diff) |
blob: Fix limiting of new LFS pointers
When retrieving new LFS pointers, we allow callers to pass in a limit to
only fetch the first `n` blobs. But the way this limit currently works
is completely unintuitive: we take the first `n` _candidates_ for LFS
pointers and filter those down to actual LFS pointers. Which effectively
means that even if there are new pointers, we may just return none at
all because git-rev-list(1) decided to output different objects first.
This behaviour is so weird that it cannot be the desired behaviour: if
less objects than the provided limit are returned to the caller, then
there is no way to determine whether there actually were any LFS
pointers or not. It's thus trivial to smuggle in LFS pointers which we
wouldn't detect.
Fix this issue by limiting actual LFS pointers, not candidate LFS
pointers.
-rw-r--r-- | changelogs/unreleased/pks-blob-get-new-lfs-pointers-filtering.yml | 5 | ||||
-rw-r--r-- | internal/gitaly/service/blob/lfs_pointers_test.go | 35 | ||||
-rw-r--r-- | ruby/lib/gitlab/git/blob.rb | 15 | ||||
-rw-r--r-- | ruby/lib/gitlab/git/lfs_changes.rb | 4 |
4 files changed, 42 insertions, 17 deletions
diff --git a/changelogs/unreleased/pks-blob-get-new-lfs-pointers-filtering.yml b/changelogs/unreleased/pks-blob-get-new-lfs-pointers-filtering.yml new file mode 100644 index 000000000..2299a1020 --- /dev/null +++ b/changelogs/unreleased/pks-blob-get-new-lfs-pointers-filtering.yml @@ -0,0 +1,5 @@ +--- +title: 'blob: Fix filtering of new LFS pointers' +merge_request: 3175 +author: +type: fixed diff --git a/internal/gitaly/service/blob/lfs_pointers_test.go b/internal/gitaly/service/blob/lfs_pointers_test.go index 8845454df..ddceb9cc8 100644 --- a/internal/gitaly/service/blob/lfs_pointers_test.go +++ b/internal/gitaly/service/blob/lfs_pointers_test.go @@ -203,17 +203,36 @@ func TestSuccessfulGetNewLFSPointersRequest(t *testing.T) { }, }, { - desc: "request with limit", + desc: "request with non-exceeding limit", request: &gitalypb.GetNewLFSPointersRequest{ Repository: testRepo, Revision: revision, - // This is limiting the amount of lines processed from the - // output of rev-list. For example, for this revision's output - // there's an LFS object id in line 4 and another in line 14, so - // any limit in [0, 3] will yield no pointers, [4,13] will yield - // one, and [14,] will yield two. This is weird but it's the - // way the current implementation works ¯\_(ツ)_/¯ - Limit: 19, + Limit: 9000, + }, + expectedLFSPointers: []*gitalypb.LFSPointer{ + { + Size: 133, + Data: []byte("version https://git-lfs.github.com/spec/v1\noid sha256:91eff75a492a3ed0dfcb544d7f31326bc4014c8551849c192fd1e48d4dd2c897\nsize 1575078\n\n"), + Oid: "0c304a93cb8430108629bbbcaa27db3343299bc0", + }, + { + Size: 127, + Data: []byte("version https://git-lfs.github.com/spec/v1\noid sha256:bad71f905b60729f502ca339f7c9f001281a3d12c68a5da7f15de8009f4bd63d\nsize 18\n"), + Oid: "bab31d249f78fba464d1b75799aad496cc07fa3b", + }, + { + Size: 127, + Data: []byte("version https://git-lfs.github.com/spec/v1\noid sha256:f2b0a1e7550e9b718dafc9b525a04879a766de62e4fbdfc46593d47f7ab74636\nsize 20\n"), + Oid: "f78df813119a79bfbe0442ab92540a61d3ab7ff3", + }, + }, + }, + { + desc: "request with smaller limit", + request: &gitalypb.GetNewLFSPointersRequest{ + Repository: testRepo, + Revision: revision, + Limit: 2, }, expectedLFSPointers: []*gitalypb.LFSPointer{ { diff --git a/ruby/lib/gitlab/git/blob.rb b/ruby/lib/gitlab/git/blob.rb index 70a835927..9cc7bb6eb 100644 --- a/ruby/lib/gitlab/git/blob.rb +++ b/ruby/lib/gitlab/git/blob.rb @@ -58,12 +58,15 @@ module Gitlab # Find LFS blobs given an array of sha ids # Returns array of Gitlab::Git::Blob # Does not guarantee blob data will be set - def batch_lfs_pointers(repository, blob_ids) - blob_ids.lazy - .select { |sha| possible_lfs_blob?(repository, sha) } - .map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) } - .select(&:lfs_pointer?) - .force + def batch_lfs_pointers(repository, blob_ids, limit: nil) + selector = blob_ids.lazy + .select { |sha| possible_lfs_blob?(repository, sha) } + .map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) } + .select(&:lfs_pointer?) + + return selector.first(limit) if limit + + selector.force end def binary?(data) diff --git a/ruby/lib/gitlab/git/lfs_changes.rb b/ruby/lib/gitlab/git/lfs_changes.rb index 3e1a55465..dcac05f71 100644 --- a/ruby/lib/gitlab/git/lfs_changes.rb +++ b/ruby/lib/gitlab/git/lfs_changes.rb @@ -23,9 +23,7 @@ module Gitlab } rev_list.new_objects(**rev_list_params(params)) do |object_ids| - object_ids = object_ids.take(object_limit) if object_limit - - Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids) + Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids, limit: object_limit) end end |