Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2021-02-22 17:44:16 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2021-02-23 09:30:41 +0300
commit6c8fe7ca03be869f58ac9ef1e2b744770c6b8144 (patch)
treef6c199d31396b66c09c10780442c8229d5a451e5
parent3c73c0df32d7aa933aa66d52f7ac19751e1a3e62 (diff)
blob: Fix limiting of new LFS pointers
When retrieving new LFS pointers, we allow callers to pass in a limit to only fetch the first `n` blobs. But the way this limit currently works is completely unintuitive: we take the first `n` _candidates_ for LFS pointers and filter those down to actual LFS pointers. Which effectively means that even if there are new pointers, we may just return none at all because git-rev-list(1) decided to output different objects first. This behaviour is so weird that it cannot be the desired behaviour: if less objects than the provided limit are returned to the caller, then there is no way to determine whether there actually were any LFS pointers or not. It's thus trivial to smuggle in LFS pointers which we wouldn't detect. Fix this issue by limiting actual LFS pointers, not candidate LFS pointers.
-rw-r--r--changelogs/unreleased/pks-blob-get-new-lfs-pointers-filtering.yml5
-rw-r--r--internal/gitaly/service/blob/lfs_pointers_test.go35
-rw-r--r--ruby/lib/gitlab/git/blob.rb15
-rw-r--r--ruby/lib/gitlab/git/lfs_changes.rb4
4 files changed, 42 insertions, 17 deletions
diff --git a/changelogs/unreleased/pks-blob-get-new-lfs-pointers-filtering.yml b/changelogs/unreleased/pks-blob-get-new-lfs-pointers-filtering.yml
new file mode 100644
index 000000000..2299a1020
--- /dev/null
+++ b/changelogs/unreleased/pks-blob-get-new-lfs-pointers-filtering.yml
@@ -0,0 +1,5 @@
+---
+title: 'blob: Fix filtering of new LFS pointers'
+merge_request: 3175
+author:
+type: fixed
diff --git a/internal/gitaly/service/blob/lfs_pointers_test.go b/internal/gitaly/service/blob/lfs_pointers_test.go
index 8845454df..ddceb9cc8 100644
--- a/internal/gitaly/service/blob/lfs_pointers_test.go
+++ b/internal/gitaly/service/blob/lfs_pointers_test.go
@@ -203,17 +203,36 @@ func TestSuccessfulGetNewLFSPointersRequest(t *testing.T) {
},
},
{
- desc: "request with limit",
+ desc: "request with non-exceeding limit",
request: &gitalypb.GetNewLFSPointersRequest{
Repository: testRepo,
Revision: revision,
- // This is limiting the amount of lines processed from the
- // output of rev-list. For example, for this revision's output
- // there's an LFS object id in line 4 and another in line 14, so
- // any limit in [0, 3] will yield no pointers, [4,13] will yield
- // one, and [14,] will yield two. This is weird but it's the
- // way the current implementation works ¯\_(ツ)_/¯
- Limit: 19,
+ Limit: 9000,
+ },
+ expectedLFSPointers: []*gitalypb.LFSPointer{
+ {
+ Size: 133,
+ Data: []byte("version https://git-lfs.github.com/spec/v1\noid sha256:91eff75a492a3ed0dfcb544d7f31326bc4014c8551849c192fd1e48d4dd2c897\nsize 1575078\n\n"),
+ Oid: "0c304a93cb8430108629bbbcaa27db3343299bc0",
+ },
+ {
+ Size: 127,
+ Data: []byte("version https://git-lfs.github.com/spec/v1\noid sha256:bad71f905b60729f502ca339f7c9f001281a3d12c68a5da7f15de8009f4bd63d\nsize 18\n"),
+ Oid: "bab31d249f78fba464d1b75799aad496cc07fa3b",
+ },
+ {
+ Size: 127,
+ Data: []byte("version https://git-lfs.github.com/spec/v1\noid sha256:f2b0a1e7550e9b718dafc9b525a04879a766de62e4fbdfc46593d47f7ab74636\nsize 20\n"),
+ Oid: "f78df813119a79bfbe0442ab92540a61d3ab7ff3",
+ },
+ },
+ },
+ {
+ desc: "request with smaller limit",
+ request: &gitalypb.GetNewLFSPointersRequest{
+ Repository: testRepo,
+ Revision: revision,
+ Limit: 2,
},
expectedLFSPointers: []*gitalypb.LFSPointer{
{
diff --git a/ruby/lib/gitlab/git/blob.rb b/ruby/lib/gitlab/git/blob.rb
index 70a835927..9cc7bb6eb 100644
--- a/ruby/lib/gitlab/git/blob.rb
+++ b/ruby/lib/gitlab/git/blob.rb
@@ -58,12 +58,15 @@ module Gitlab
# Find LFS blobs given an array of sha ids
# Returns array of Gitlab::Git::Blob
# Does not guarantee blob data will be set
- def batch_lfs_pointers(repository, blob_ids)
- blob_ids.lazy
- .select { |sha| possible_lfs_blob?(repository, sha) }
- .map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) }
- .select(&:lfs_pointer?)
- .force
+ def batch_lfs_pointers(repository, blob_ids, limit: nil)
+ selector = blob_ids.lazy
+ .select { |sha| possible_lfs_blob?(repository, sha) }
+ .map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) }
+ .select(&:lfs_pointer?)
+
+ return selector.first(limit) if limit
+
+ selector.force
end
def binary?(data)
diff --git a/ruby/lib/gitlab/git/lfs_changes.rb b/ruby/lib/gitlab/git/lfs_changes.rb
index 3e1a55465..dcac05f71 100644
--- a/ruby/lib/gitlab/git/lfs_changes.rb
+++ b/ruby/lib/gitlab/git/lfs_changes.rb
@@ -23,9 +23,7 @@ module Gitlab
}
rev_list.new_objects(**rev_list_params(params)) do |object_ids|
- object_ids = object_ids.take(object_limit) if object_limit
-
- Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids)
+ Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids, limit: object_limit)
end
end