diff options
author | Jacob Vosmaer (GitLab) <jacob@gitlab.com> | 2018-07-12 17:44:29 +0300 |
---|---|---|
committer | Alejandro Rodríguez <alejorro70@gmail.com> | 2018-07-12 17:44:29 +0300 |
commit | 93edef5be244a5368dca99a5c9b1336c9450911d (patch) | |
tree | 7209a4639a873b394936cda84d7a85ee68341198 | |
parent | 8f5108a6837ca5e92dd28d7928818cdb4f815ece (diff) |
Vendor gitlab_git at 555afe8971c9ab6f9
-rwxr-xr-x | _support/vendor-gitlab-git | 2 | ||||
-rw-r--r-- | changelogs/unreleased/vendor-gitlab-git-20180712-jv.yml | 5 | ||||
-rw-r--r-- | ruby/lib/gitlab/git/blob.rb | 189 | ||||
-rw-r--r-- | ruby/lib/gitlab/git/rev_list.rb (renamed from ruby/vendor/gitlab_git/lib/gitlab/git/rev_list.rb) | 0 | ||||
-rw-r--r-- | ruby/spec/lib/gitlab/git/rev_list_spec.rb | 98 | ||||
-rw-r--r-- | ruby/spec/test_repo_helper.rb | 10 | ||||
-rw-r--r-- | ruby/vendor/gitlab_git/REVISION | 2 | ||||
-rw-r--r-- | ruby/vendor/gitlab_git/lib/gitlab/git/blob.rb | 263 | ||||
-rw-r--r-- | ruby/vendor/gitlab_git/lib/gitlab/git/repository.rb | 21 |
9 files changed, 315 insertions, 275 deletions
diff --git a/_support/vendor-gitlab-git b/_support/vendor-gitlab-git index 7e7510bab..94bf92e91 100755 --- a/_support/vendor-gitlab-git +++ b/_support/vendor-gitlab-git @@ -11,12 +11,14 @@ FILE_LIST = %w[ # We have (already) stopped vendoring these files. EXCLUDE = %w[ + lib/gitlab/git/blob.rb lib/gitlab/git/blob_snippet.rb lib/gitlab/git/commit_stats.rb lib/gitlab/git/compare.rb lib/gitlab/git/conflict/resolver.rb lib/gitlab/git/diff_collection.rb lib/gitlab/git/gitmodules_parser.rb + lib/gitlab/git/rev_list.rb lib/gitlab/git/storage/ lib/gitlab/git/storage.rb lib/gitlab/git/tree.rb diff --git a/changelogs/unreleased/vendor-gitlab-git-20180712-jv.yml b/changelogs/unreleased/vendor-gitlab-git-20180712-jv.yml new file mode 100644 index 000000000..aa3411c61 --- /dev/null +++ b/changelogs/unreleased/vendor-gitlab-git-20180712-jv.yml @@ -0,0 +1,5 @@ +--- +title: Vendor gitlab_git at 555afe8971c9ab6f9 +merge_request: 803 +author: +type: other diff --git a/ruby/lib/gitlab/git/blob.rb b/ruby/lib/gitlab/git/blob.rb index 82619b1dc..5c164fc35 100644 --- a/ruby/lib/gitlab/git/blob.rb +++ b/ruby/lib/gitlab/git/blob.rb @@ -1,6 +1,23 @@ module Gitlab module Git class Blob + include Linguist::BlobHelper + include Gitlab::EncodingHelper + + # This number is the maximum amount of data that we want to display to + # the user. We load as much as we can for encoding detection + # (Linguist) and LFS pointer parsing. All other cases where we need full + # blob data should use load_all_data!. + MAX_DATA_DISPLAY_SIZE = 10.megabytes + + # These limits are used as a heuristic to ignore files which can't be LFS + # pointers. The format of these is described in + # https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer + LFS_POINTER_MIN_SIZE = 120.bytes + LFS_POINTER_MAX_SIZE = 200.bytes + + attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary + class << self def find(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE) return unless path @@ -37,6 +54,178 @@ module Gitlab rescue Rugged::ReferenceError nil end + + # Find LFS blobs given an array of sha ids + # Returns array of Gitlab::Git::Blob + # Does not guarantee blob data will be set + def batch_lfs_pointers(repository, blob_ids) + blob_ids.lazy + .select { |sha| possible_lfs_blob?(repository, sha) } + .map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) } + .select(&:lfs_pointer?) + .force + end + + def binary?(data) + EncodingHelper.detect_libgit2_binary?(data) + end + + def size_could_be_lfs?(size) + size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE) + end + + private + + # Recursive search of blob id by path + # + # Ex. + # blog/ # oid: 1a + # app/ # oid: 2a + # models/ # oid: 3a + # file.rb # oid: 4a + # + # + # Blob.find_entry_by_path(repo, '1a', 'blog', 'app', 'file.rb') # => '4a' + # + def find_entry_by_path(repository, root_id, *path_parts) + root_tree = repository.lookup(root_id) + + entry = root_tree.find do |entry| + entry[:name] == path_parts[0] + end + + return nil unless entry + + if path_parts.size > 1 + return nil unless entry[:type] == :tree + + path_parts.shift + find_entry_by_path(repository, entry[:oid], *path_parts) + else + [:blob, :commit].include?(entry[:type]) ? entry : nil + end + end + + def submodule_blob(blob_entry, path, sha) + new( + id: blob_entry[:oid], + name: blob_entry[:name], + size: 0, + data: '', + path: path, + commit_id: sha + ) + end + + def rugged_raw(repository, sha, limit:) + blob = repository.lookup(sha) + + return unless blob.is_a?(Rugged::Blob) + + new( + id: blob.oid, + size: blob.size, + data: blob.content(limit), + binary: blob.binary? + ) + end + + # Efficient lookup to determine if object size + # and type make it a possible LFS blob without loading + # blob content into memory with repository.lookup(sha) + def possible_lfs_blob?(repository, sha) + object_header = repository.rugged.read_header(sha) + + object_header[:type] == :blob && + size_could_be_lfs?(object_header[:len]) + end + end + + def initialize(options) + %w(id name path size data mode commit_id binary).each do |key| + self.__send__("#{key}=", options[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend + end + + # Retain the actual size before it is encoded + @loaded_size = @data.bytesize if @data + @loaded_all_data = @loaded_size == size + end + + def binary? + @binary.nil? ? super : @binary == true + end + + def data + encode! @data + end + + # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into + # memory as a Ruby string. + def load_all_data!(repository) + return if @data == '' # don't mess with submodule blobs + + # Even if we return early, recalculate wether this blob is binary in + # case a blob was initialized as text but the full data isn't + @binary = nil + + return if @loaded_all_data + + @data = repository.gitaly_blob_client.get_blob(oid: id, limit: -1).data + @loaded_all_data = true + @loaded_size = @data.bytesize + end + + def name + encode! @name + end + + def path + encode! @path + end + + def truncated? + size && (size > loaded_size) + end + + # Valid LFS object pointer is a text file consisting of + # version + # oid + # size + # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer + def lfs_pointer? + self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present? + end + + def lfs_oid + if has_lfs_version_key? + oid = data.match(/(?<=sha256:)([0-9a-f]{64})/) + return oid[1] if oid + end + + nil + end + + def lfs_size + if has_lfs_version_key? + size = data.match(/(?<=size )([0-9]+)/) + return size[1].to_i if size + end + + nil + end + + def external_storage + return unless lfs_pointer? + + :lfs + end + + alias_method :external_size, :lfs_size + + private + + def has_lfs_version_key? + !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec") end end end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/rev_list.rb b/ruby/lib/gitlab/git/rev_list.rb index 5fdad077e..5fdad077e 100644 --- a/ruby/vendor/gitlab_git/lib/gitlab/git/rev_list.rb +++ b/ruby/lib/gitlab/git/rev_list.rb diff --git a/ruby/spec/lib/gitlab/git/rev_list_spec.rb b/ruby/spec/lib/gitlab/git/rev_list_spec.rb new file mode 100644 index 000000000..c6601afb9 --- /dev/null +++ b/ruby/spec/lib/gitlab/git/rev_list_spec.rb @@ -0,0 +1,98 @@ +require 'spec_helper' + +describe Gitlab::Git::RevList do + include TestRepo + + let(:repository) { gitlab_git_from_gitaly(test_repo_read_only) } + let(:rev_list) { described_class.new(repository, newrev: 'newrev') } + + def args_for_popen(args_list) + [Gitlab.config.git.bin_path, 'rev-list', *args_list] + end + + def stub_popen_rev_list(*additional_args, with_lazy_block: true, output:) + repo_path = repository.path + + params = [ + args_for_popen(additional_args), + repo_path, + {}, + hash_including(lazy_block: with_lazy_block ? anything : nil) + ] + + expect(repository).to receive(:popen).with(*params) do |*_, lazy_block:| + output = lazy_block.call(output.lines.lazy.map(&:chomp)) if with_lazy_block + + [output, 0] + end + end + + context "#new_refs" do + it 'calls out to `popen`' do + stub_popen_rev_list('newrev', '--not', '--all', with_lazy_block: false, output: "sha1\nsha2") + + expect(rev_list.new_refs).to eq(%w[sha1 sha2]) + end + end + + context '#new_objects' do + it 'fetches list of newly pushed objects using rev-list' do + stub_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2") + + expect { |b| rev_list.new_objects(&b) }.to yield_with_args(%w[sha1 sha2]) + end + + it 'can skip pathless objects' do + stub_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2 path/to/file") + + expect { |b| rev_list.new_objects(require_path: true, &b) }.to yield_with_args(%w[sha2]) + end + + it 'can handle non utf-8 paths' do + non_utf_char = [0x89].pack("c*").force_encoding("UTF-8") + stub_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha2 πå†h/†ø/ƒîlé#{non_utf_char}\nsha1") + + rev_list.new_objects(require_path: true) do |object_ids| + expect(object_ids.force).to eq(%w[sha2]) + end + end + + it 'can yield a lazy enumerator' do + stub_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2") + + rev_list.new_objects do |object_ids| + expect(object_ids).to be_a Enumerator::Lazy + end + end + + it 'returns the result of the block when given' do + stub_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2") + + objects = rev_list.new_objects do |object_ids| + object_ids.first + end + + expect(objects).to eq 'sha1' + end + + it 'can accept list of references to exclude' do + stub_popen_rev_list('newrev', '--not', 'master', '--objects', output: "sha1\nsha2") + + expect { |b| rev_list.new_objects(not_in: ['master'], &b) }.to yield_with_args(%w[sha1 sha2]) + end + + it 'handles empty list of references to exclude as listing all known objects' do + stub_popen_rev_list('newrev', '--objects', output: "sha1\nsha2") + + expect { |b| rev_list.new_objects(not_in: [], &b) }.to yield_with_args(%w[sha1 sha2]) + end + end + + context '#all_objects' do + it 'fetches list of all pushed objects using rev-list' do + stub_popen_rev_list('--all', '--objects', output: "sha1\nsha2") + + expect { |b| rev_list.all_objects(&b) }.to yield_with_args(%w[sha1 sha2]) + end + end +end diff --git a/ruby/spec/test_repo_helper.rb b/ruby/spec/test_repo_helper.rb index 53cdfe536..afcddf9ca 100644 --- a/ruby/spec/test_repo_helper.rb +++ b/ruby/spec/test_repo_helper.rb @@ -37,6 +37,16 @@ module TestRepo File.join(DEFAULT_STORAGE_DIR, gitaly_repo.relative_path) end + def gitlab_git_from_gitaly(gitaly_repo) + Gitlab::Git::Repository.new( + gitaly_repo, + repo_path_from_gitaly(gitaly_repo), + '', + nil, + '' + ) + end + def self.clone_new_repo!(destination) return if system(*%W[git clone --quiet --bare #{TEST_REPO_ORIGIN} #{destination}]) abort "Failed to clone test repo. Try running 'make prepare-tests' and try again." diff --git a/ruby/vendor/gitlab_git/REVISION b/ruby/vendor/gitlab_git/REVISION index 67a9ef530..a3e915a34 100644 --- a/ruby/vendor/gitlab_git/REVISION +++ b/ruby/vendor/gitlab_git/REVISION @@ -1 +1 @@ -49d7f92fd7476b4fb10e44ff92f36be99de0df49 +555afe8971c9ab6f907cfc248842e268109ce367 diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/blob.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/blob.rb deleted file mode 100644 index 96fa94d57..000000000 --- a/ruby/vendor/gitlab_git/lib/gitlab/git/blob.rb +++ /dev/null @@ -1,263 +0,0 @@ -# Gitaly note: JV: seems to be completely migrated (behind feature flags). - -module Gitlab - module Git - class Blob - include Linguist::BlobHelper - include Gitlab::EncodingHelper - - # This number is the maximum amount of data that we want to display to - # the user. We load as much as we can for encoding detection - # (Linguist) and LFS pointer parsing. All other cases where we need full - # blob data should use load_all_data!. - MAX_DATA_DISPLAY_SIZE = 10.megabytes - - # These limits are used as a heuristic to ignore files which can't be LFS - # pointers. The format of these is described in - # https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer - LFS_POINTER_MIN_SIZE = 120.bytes - LFS_POINTER_MAX_SIZE = 200.bytes - - attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary - - class << self - def find(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE) - return unless path - - path = path.sub(%r{\A/*}, '') - path = '/' if path.empty? - name = File.basename(path) - - # Gitaly will think that setting the limit to 0 means unlimited, while - # the client might only need the metadata and thus set the limit to 0. - # In this method we'll then set the limit to 1, but clear the byte of data - # that we got back so for the outside world it looks like the limit was - # actually 0. - req_limit = limit == 0 ? 1 : limit - - entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, req_limit) - return unless entry - - entry.data = "" if limit == 0 - - case entry.type - when :COMMIT - new(id: entry.oid, name: name, size: 0, data: '', path: path, commit_id: sha) - when :BLOB - new(id: entry.oid, name: name, size: entry.size, data: entry.data.dup, mode: entry.mode.to_s(8), - path: path, commit_id: sha, binary: binary?(entry.data)) - end - end - - def raw(repository, sha) - repository.gitaly_blob_client.get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE) - end - - # Returns an array of Blob instances, specified in blob_references as - # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the - # full blob contents are returned. If blob_size_limit >= 0 then each blob will - # contain no more than limit bytes in its data attribute. - # - # Keep in mind that this method may allocate a lot of memory. It is up - # to the caller to limit the number of blobs and blob_size_limit. - # - # Gitaly migration issue: https://gitlab.com/gitlab-org/gitaly/issues/798 - def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE) - Gitlab::GitalyClient.migrate(:list_blobs_by_sha_path) do |is_enabled| - if is_enabled - repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a - else - blob_references.map do |sha, path| - find(repository, sha, path, limit: blob_size_limit) - end - end - end - end - - # Returns an array of Blob instances just with the metadata, that means - # the data attribute has no content. - def batch_metadata(repository, blob_references) - batch(repository, blob_references, blob_size_limit: 0) - end - - # Find LFS blobs given an array of sha ids - # Returns array of Gitlab::Git::Blob - # Does not guarantee blob data will be set - def batch_lfs_pointers(repository, blob_ids) - repository.gitaly_migrate(:batch_lfs_pointers) do |is_enabled| - if is_enabled - repository.gitaly_blob_client.batch_lfs_pointers(blob_ids.to_a) - else - blob_ids.lazy - .select { |sha| possible_lfs_blob?(repository, sha) } - .map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) } - .select(&:lfs_pointer?) - .force - end - end - end - - def binary?(data) - EncodingHelper.detect_libgit2_binary?(data) - end - - def size_could_be_lfs?(size) - size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE) - end - - private - - # Recursive search of blob id by path - # - # Ex. - # blog/ # oid: 1a - # app/ # oid: 2a - # models/ # oid: 3a - # file.rb # oid: 4a - # - # - # Blob.find_entry_by_path(repo, '1a', 'blog', 'app', 'file.rb') # => '4a' - # - def find_entry_by_path(repository, root_id, *path_parts) - root_tree = repository.lookup(root_id) - - entry = root_tree.find do |entry| - entry[:name] == path_parts[0] - end - - return nil unless entry - - if path_parts.size > 1 - return nil unless entry[:type] == :tree - - path_parts.shift - find_entry_by_path(repository, entry[:oid], *path_parts) - else - [:blob, :commit].include?(entry[:type]) ? entry : nil - end - end - - def submodule_blob(blob_entry, path, sha) - new( - id: blob_entry[:oid], - name: blob_entry[:name], - size: 0, - data: '', - path: path, - commit_id: sha - ) - end - - def rugged_raw(repository, sha, limit:) - blob = repository.lookup(sha) - - return unless blob.is_a?(Rugged::Blob) - - new( - id: blob.oid, - size: blob.size, - data: blob.content(limit), - binary: blob.binary? - ) - end - - # Efficient lookup to determine if object size - # and type make it a possible LFS blob without loading - # blob content into memory with repository.lookup(sha) - def possible_lfs_blob?(repository, sha) - object_header = repository.rugged.read_header(sha) - - object_header[:type] == :blob && - size_could_be_lfs?(object_header[:len]) - end - end - - def initialize(options) - %w(id name path size data mode commit_id binary).each do |key| - self.__send__("#{key}=", options[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend - end - - # Retain the actual size before it is encoded - @loaded_size = @data.bytesize if @data - @loaded_all_data = @loaded_size == size - end - - def binary? - @binary.nil? ? super : @binary == true - end - - def data - encode! @data - end - - # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into - # memory as a Ruby string. - def load_all_data!(repository) - return if @data == '' # don't mess with submodule blobs - - # Even if we return early, recalculate wether this blob is binary in - # case a blob was initialized as text but the full data isn't - @binary = nil - - return if @loaded_all_data - - @data = repository.gitaly_blob_client.get_blob(oid: id, limit: -1).data - @loaded_all_data = true - @loaded_size = @data.bytesize - end - - def name - encode! @name - end - - def path - encode! @path - end - - def truncated? - size && (size > loaded_size) - end - - # Valid LFS object pointer is a text file consisting of - # version - # oid - # size - # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer - def lfs_pointer? - self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present? - end - - def lfs_oid - if has_lfs_version_key? - oid = data.match(/(?<=sha256:)([0-9a-f]{64})/) - return oid[1] if oid - end - - nil - end - - def lfs_size - if has_lfs_version_key? - size = data.match(/(?<=size )([0-9]+)/) - return size[1].to_i if size - end - - nil - end - - def external_storage - return unless lfs_pointer? - - :lfs - end - - alias_method :external_size, :lfs_size - - private - - def has_lfs_version_key? - !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec") - end - end - end -end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/repository.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/repository.rb index a1a050647..d50ac2707 100644 --- a/ruby/vendor/gitlab_git/lib/gitlab/git/repository.rb +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/repository.rb @@ -86,9 +86,6 @@ module Gitlab # Relative path of repo attr_reader :relative_path - # Rugged repo object - attr_reader :rugged - attr_reader :gitlab_projects, :storage, :gl_repository, :relative_path # This initializer method is only used on the client side (gitlab-ce). @@ -112,8 +109,9 @@ module Gitlab [storage, relative_path] == [other.storage, other.relative_path] end + # This method will be removed when Gitaly reaches v1.1. def path - @path ||= File.join( + File.join( Gitlab.config.repositories.storages[@storage].legacy_disk_path, @relative_path ) end @@ -127,8 +125,9 @@ module Gitlab raise Gitlab::Git::CommandError.new(e.message) end + # This method will be removed when Gitaly reaches v1.1. def rugged - @rugged ||= circuit_breaker.perform do + circuit_breaker.perform do Rugged::Repository.new(path, alternates: alternate_object_directories) end rescue Rugged::RepositoryError, Rugged::OSError @@ -713,12 +712,6 @@ module Gitlab Gitlab::Git.committer_hash(email: user.email, name: user.name) end - def create_commit(params = {}) - params[:message].delete!("\r") - - Rugged::Commit.create(rugged, params) - end - # Delete the specified branch from the repository def delete_branch(branch_name) gitaly_migrate(:delete_branch, status: Gitlab::GitalyClient::MigrationStatus::OPT_OUT) do |is_enabled| @@ -1758,6 +1751,12 @@ module Gitlab def sha_from_ref(ref) rev_parse_target(ref).oid end + + def create_commit(params = {}) + params[:message].delete!("\r") + + Rugged::Commit.create(rugged, params) + end end end end |