Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/git/blob.rb')
-rw-r--r--lib/gitlab/git/blob.rb231
1 files changed, 170 insertions, 61 deletions
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
index a7aceab4c14..eabcf46cf58 100644
--- a/lib/gitlab/git/blob.rb
+++ b/lib/gitlab/git/blob.rb
@@ -1,3 +1,5 @@
+# Gitaly note: JV: seems to be completely migrated (behind feature flags).
+
module Gitlab
module Git
class Blob
@@ -10,6 +12,12 @@ module Gitlab
# blob data should use load_all_data!.
MAX_DATA_DISPLAY_SIZE = 10.megabytes
+ # These limits are used as a heuristic to ignore files which can't be LFS
+ # pointers. The format of these is described in
+ # https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer
+ LFS_POINTER_MIN_SIZE = 120.bytes
+ LFS_POINTER_MAX_SIZE = 200.bytes
+
attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary
class << self
@@ -18,18 +26,132 @@ module Gitlab
if is_enabled
find_by_gitaly(repository, sha, path)
else
- find_by_rugged(repository, sha, path)
+ find_by_rugged(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
+ end
+ end
+ end
+
+ def raw(repository, sha)
+ Gitlab::GitalyClient.migrate(:git_blob_raw) do |is_enabled|
+ if is_enabled
+ repository.gitaly_blob_client.get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE)
+ else
+ rugged_raw(repository, sha, limit: MAX_DATA_DISPLAY_SIZE)
+ end
+ end
+ end
+
+ # Returns an array of Blob instances, specified in blob_references as
+ # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
+ # full blob contents are returned. If blob_size_limit >= 0 then each blob will
+ # contain no more than limit bytes in its data attribute.
+ #
+ # Keep in mind that this method may allocate a lot of memory. It is up
+ # to the caller to limit the number of blobs and blob_size_limit.
+ #
+ # Gitaly migration issue: https://gitlab.com/gitlab-org/gitaly/issues/798
+ def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE)
+ Gitlab::GitalyClient.migrate(:list_blobs_by_sha_path) do |is_enabled|
+ if is_enabled
+ repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a
+ else
+ blob_references.map do |sha, path|
+ find_by_rugged(repository, sha, path, limit: blob_size_limit)
+ end
end
end
end
- def find_by_gitaly(repository, sha, path)
- path = path.sub(/\A\/*/, '')
+ # Find LFS blobs given an array of sha ids
+ # Returns array of Gitlab::Git::Blob
+ # Does not guarantee blob data will be set
+ def batch_lfs_pointers(repository, blob_ids)
+ repository.gitaly_migrate(:batch_lfs_pointers) do |is_enabled|
+ if is_enabled
+ repository.gitaly_blob_client.batch_lfs_pointers(blob_ids.to_a)
+ else
+ blob_ids.lazy
+ .select { |sha| possible_lfs_blob?(repository, sha) }
+ .map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) }
+ .select(&:lfs_pointer?)
+ .force
+ end
+ end
+ end
+
+ def binary?(data)
+ EncodingHelper.detect_libgit2_binary?(data)
+ end
+
+ def size_could_be_lfs?(size)
+ size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE)
+ end
+
+ private
+
+ # Recursive search of blob id by path
+ #
+ # Ex.
+ # blog/ # oid: 1a
+ # app/ # oid: 2a
+ # models/ # oid: 3a
+ # file.rb # oid: 4a
+ #
+ #
+ # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a'
+ #
+ def find_entry_by_path(repository, root_id, path)
+ root_tree = repository.lookup(root_id)
+ # Strip leading slashes
+ path[%r{^/*}] = ''
+ path_arr = path.split('/')
+
+ entry = root_tree.find do |entry|
+ entry[:name] == path_arr[0]
+ end
+
+ return nil unless entry
+
+ if path_arr.size > 1
+ return nil unless entry[:type] == :tree
+
+ path_arr.shift
+ find_entry_by_path(repository, entry[:oid], path_arr.join('/'))
+ else
+ [:blob, :commit].include?(entry[:type]) ? entry : nil
+ end
+ end
+
+ def submodule_blob(blob_entry, path, sha)
+ new(
+ id: blob_entry[:oid],
+ name: blob_entry[:name],
+ size: 0,
+ data: '',
+ path: path,
+ commit_id: sha
+ )
+ end
+
+ def find_by_gitaly(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
+ return unless path
+
+ path = path.sub(%r{\A/*}, '')
path = '/' if path.empty?
name = File.basename(path)
- entry = Gitlab::GitalyClient::Commit.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE)
+
+ # Gitaly will think that setting the limit to 0 means unlimited, while
+ # the client might only need the metadata and thus set the limit to 0.
+ # In this method we'll then set the limit to 1, but clear the byte of data
+ # that we got back so for the outside world it looks like the limit was
+ # actually 0.
+ req_limit = limit == 0 ? 1 : limit
+
+ entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, req_limit)
return unless entry
+ entry.data = "" if limit == 0
+
case entry.type
when :COMMIT
new(
@@ -41,10 +163,6 @@ module Gitlab
commit_id: sha
)
when :BLOB
- # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
- # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
- # which is what we use below to keep a consistent behavior.
- detect = CharlockHolmes::EncodingDetector.new(8000).detect(entry.data)
new(
id: entry.oid,
name: name,
@@ -53,14 +171,16 @@ module Gitlab
mode: entry.mode.to_s(8),
path: path,
commit_id: sha,
- binary: detect && detect[:type] == :binary
+ binary: binary?(entry.data)
)
end
end
- def find_by_rugged(repository, sha, path)
- commit = repository.lookup(sha)
- root_tree = commit.tree
+ def find_by_rugged(repository, sha, path, limit:)
+ return unless path
+
+ rugged_commit = repository.lookup(sha)
+ root_tree = rugged_commit.tree
blob_entry = find_entry_by_path(repository, root_tree.oid, path)
@@ -76,7 +196,8 @@ module Gitlab
id: blob.oid,
name: blob_entry[:name],
size: blob.size,
- data: blob.content(MAX_DATA_DISPLAY_SIZE),
+ # Rugged::Blob#content is expensive; don't call it if we don't have to.
+ data: limit.zero? ? '' : blob.content(limit),
mode: blob_entry[:filemode].to_s(8),
path: path,
commit_id: sha,
@@ -84,71 +205,42 @@ module Gitlab
)
end
end
+ rescue Rugged::ReferenceError
+ nil
end
- def raw(repository, sha)
+ def rugged_raw(repository, sha, limit:)
blob = repository.lookup(sha)
+ return unless blob.is_a?(Rugged::Blob)
+
new(
id: blob.oid,
size: blob.size,
- data: blob.content(MAX_DATA_DISPLAY_SIZE),
+ data: blob.content(limit),
binary: blob.binary?
)
end
- # Recursive search of blob id by path
- #
- # Ex.
- # blog/ # oid: 1a
- # app/ # oid: 2a
- # models/ # oid: 3a
- # file.rb # oid: 4a
- #
- #
- # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a'
- #
- def find_entry_by_path(repository, root_id, path)
- root_tree = repository.lookup(root_id)
- # Strip leading slashes
- path[/^\/*/] = ''
- path_arr = path.split('/')
-
- entry = root_tree.find do |entry|
- entry[:name] == path_arr[0]
- end
-
- return nil unless entry
-
- if path_arr.size > 1
- return nil unless entry[:type] == :tree
- path_arr.shift
- find_entry_by_path(repository, entry[:oid], path_arr.join('/'))
- else
- [:blob, :commit].include?(entry[:type]) ? entry : nil
- end
- end
+ # Efficient lookup to determine if object size
+ # and type make it a possible LFS blob without loading
+ # blob content into memory with repository.lookup(sha)
+ def possible_lfs_blob?(repository, sha)
+ object_header = repository.rugged.read_header(sha)
- def submodule_blob(blob_entry, path, sha)
- new(
- id: blob_entry[:oid],
- name: blob_entry[:name],
- size: 0,
- data: '',
- path: path,
- commit_id: sha
- )
+ object_header[:type] == :blob &&
+ size_could_be_lfs?(object_header[:len])
end
end
def initialize(options)
%w(id name path size data mode commit_id binary).each do |key|
- self.send("#{key}=", options[key.to_sym])
+ self.__send__("#{key}=", options[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend
end
- @loaded_all_data = false
# Retain the actual size before it is encoded
@loaded_size = @data.bytesize if @data
+ @loaded_all_data = @loaded_size == size
end
def binary?
@@ -163,18 +255,35 @@ module Gitlab
# memory as a Ruby string.
def load_all_data!(repository)
return if @data == '' # don't mess with submodule blobs
- return @data if @loaded_all_data
+
+ # Even if we return early, recalculate wether this blob is binary in
+ # case a blob was initialized as text but the full data isn't
+ @binary = nil
+
+ return if @loaded_all_data
+
+ @data = Gitlab::GitalyClient.migrate(:git_blob_load_all_data) do |is_enabled|
+ begin
+ if is_enabled
+ repository.gitaly_blob_client.get_blob(oid: id, limit: -1).data
+ else
+ repository.lookup(id).content
+ end
+ end
+ end
@loaded_all_data = true
- @data = repository.lookup(id).content
@loaded_size = @data.bytesize
- @binary = nil
end
def name
encode! @name
end
+ def path
+ encode! @path
+ end
+
def truncated?
size && (size > loaded_size)
end
@@ -185,7 +294,7 @@ module Gitlab
# size
# see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
def lfs_pointer?
- has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
+ self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
end
def lfs_oid