diff options
author | Jacob Vosmaer (GitLab) <jacob@gitlab.com> | 2017-08-14 19:00:32 +0300 |
---|---|---|
committer | Jacob Vosmaer (GitLab) <jacob@gitlab.com> | 2017-08-14 19:00:32 +0300 |
commit | 48d4566794eba53ce4066aeaeb968949e9f18888 (patch) | |
tree | 7284fd5a43e47d1d42b505757c6e15d21a241d04 | |
parent | ef3ef407eaf7a5676987bcdaf1e4d26c6ff3672e (diff) | |
parent | 5d0dc144b500b33dff45a67bf000a31f3f63e86d (diff) |
Merge branch 'find-commits' into 'master'
Implement CommitService.FindCommits
Closes #448
See merge request !266
44 files changed, 4086 insertions, 30 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 390f15a4f..8df3e955b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Gitaly changelog +UNRELEASED + +- Implement CommitService.FindCommits + https://gitlab.com/gitlab-org/gitaly/merge_requests/266 + v0.30.0 - Add a middleware for handling Git object dir attributes diff --git a/_support/vendor-gitlab-git b/_support/vendor-gitlab-git new file mode 100755 index 000000000..de424fa55 --- /dev/null +++ b/_support/vendor-gitlab-git @@ -0,0 +1,36 @@ +#!/usr/bin/env ruby + +# These files and directories of gitlab-ce will be vendored +FILE_LIST = %w[lib/gitlab/git.rb lib/gitlab/git lib/gitlab/encoding_helper.rb] + +REMOTE = 'https://gitlab.com/gitlab-org/gitlab-ce' + +# This directory in Gitaly will be the 'root' of gitlab-ce +VENDOR_DIR = 'ruby/vendor/gitlab_git' + +require_relative 'run.rb' +require 'tempfile' + +def main + if ARGV.count != 1 + abort "usage: #{$0} BRANCH_OR_TAG" + end + + revision = ARGV.first + revision_sha = nil + + Dir.mktmpdir do |dir| + gitlab_dir = File.join(dir, 'gitlab') + run!(%W[git clone --depth=1 -b #{revision} #{REMOTE}.git #{gitlab_dir}]) + revision_sha = capture!(%W[git rev-parse HEAD], gitlab_dir).chomp + + FileUtils.rm_rf(VENDOR_DIR) + FileUtils.mkdir_p(VENDOR_DIR) + run!(%W[rsync -avR] + FILE_LIST + %W[#{File.join(Dir.pwd, VENDOR_DIR)}/], gitlab_dir) + end + + File.write(File.join(VENDOR_DIR, 'REVISION'), "#{revision_sha}\n") + File.write(File.join(VENDOR_DIR, 'ORIGIN'), "Cloned from #{REMOTE}.\n") +end + +main diff --git a/internal/helper/error.go b/internal/helper/error.go index 1d7016f04..604be7764 100644 --- a/internal/helper/error.go +++ b/internal/helper/error.go @@ -5,6 +5,9 @@ import ( "google.golang.org/grpc/codes" ) +// Unimplemented is a Go error with gRPC error code 'Unimplemented' +var Unimplemented = grpc.Errorf(codes.Unimplemented, "this rpc is not implemented") + // DecorateError unless it's already a grpc error. // If given nil it will return nil. func DecorateError(code codes.Code, err error) error { diff --git a/internal/rubyserver/rubyserver.go b/internal/rubyserver/rubyserver.go index 558ce174c..0859d10aa 100644 --- a/internal/rubyserver/rubyserver.go +++ b/internal/rubyserver/rubyserver.go @@ -2,6 +2,7 @@ package rubyserver import ( "fmt" + "io" "io/ioutil" "net" "os" @@ -61,7 +62,8 @@ func Start() (*supervisor.Process, error) { lazyInit.Do(prepareSocketPath) args := []string{"bundle", "exec", "bin/gitaly-ruby", fmt.Sprintf("%d", os.Getpid()), socketPath} - return supervisor.New(nil, args, config.Config.Ruby.Dir) + env := append(os.Environ(), "GITALY_RUBY_GIT_BIN_PATH="+helper.GitPath()) + return supervisor.New(env, args, config.Config.Ruby.Dir) } // CommitServiceClient returns a CommitServiceClient instance that is @@ -98,3 +100,16 @@ func SetHeaders(ctx context.Context, repo *pb.Repository) (context.Context, erro newCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs(repoPathHeader, repoPath)) return newCtx, nil } + +// Proxy calls recvSend until it receives an error. The error is returned +// to the caller unless it is io.EOF. +func Proxy(recvSend func() error) (err error) { + for err == nil { + err = recvSend() + } + + if err == io.EOF { + err = nil + } + return err +} diff --git a/internal/service/commit/find_commits.go b/internal/service/commit/find_commits.go new file mode 100644 index 000000000..b4c43bf5b --- /dev/null +++ b/internal/service/commit/find_commits.go @@ -0,0 +1,52 @@ +package commit + +import ( + "gitlab.com/gitlab-org/gitaly/internal/helper" + "gitlab.com/gitlab-org/gitaly/internal/rubyserver" + + pb "gitlab.com/gitlab-org/gitaly-proto/go" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" +) + +func (*server) FindCommits(req *pb.FindCommitsRequest, stream pb.CommitService_FindCommitsServer) error { + ctx := stream.Context() + + // Use Gitaly's default branch lookup function because that is already + // migrated. + if revision := req.Revision; len(revision) == 0 { + repoPath, err := helper.GetRepoPath(req.Repository) + if err != nil { + return err + } + + req.Revision, err = defaultBranchName(ctx, repoPath) + if err != nil { + return grpc.Errorf(codes.Internal, "defaultBranchName: %v", err) + } + } + + client, err := rubyserver.CommitServiceClient(ctx) + if err != nil { + return err + } + + clientCtx, err := rubyserver.SetHeaders(ctx, req.GetRepository()) + if err != nil { + return err + } + + rubyStream, err := client.FindCommits(clientCtx, req) + if err != nil { + return err + } + + return rubyserver.Proxy(func() error { + resp, err := rubyStream.Recv() + if err != nil { + return err + } + return stream.Send(resp) + }) +} diff --git a/internal/service/commit/find_commits_test.go b/internal/service/commit/find_commits_test.go new file mode 100644 index 000000000..97310a252 --- /dev/null +++ b/internal/service/commit/find_commits_test.go @@ -0,0 +1,213 @@ +package commit + +import ( + "context" + "io" + "testing" + + "gitlab.com/gitlab-org/gitaly/internal/testhelper" + + pb "gitlab.com/gitlab-org/gitaly-proto/go" + + "github.com/golang/protobuf/ptypes/timestamp" + "github.com/stretchr/testify/require" +) + +func TestFindCommitsFields(t *testing.T) { + service, ruby, serverSocketPath := startTestServices(t) + defer stopTestServices(service, ruby) + + client := newCommitServiceClient(t, serverSocketPath) + + expectedCommit := &pb.GitCommit{ + Id: "b83d6e391c22777fca1ed3012fce84f633d7fed0", + Subject: []byte("Merge branch 'branch-merged' into 'master'"), + Body: []byte("Merge branch 'branch-merged' into 'master'\r\n\r\nadds bar folder and branch-test text file to check Repository merged_to_root_ref method\r\n\r\n\r\n\r\nSee merge request !12"), + Author: &pb.CommitAuthor{ + Name: []byte("Job van der Voort"), + Email: []byte("job@gitlab.com"), + Date: ×tamp.Timestamp{Seconds: 1474987066}, + }, + Committer: &pb.CommitAuthor{ + Name: []byte("Job van der Voort"), + Email: []byte("job@gitlab.com"), + Date: ×tamp.Timestamp{Seconds: 1474987066}, + }, + ParentIds: []string{ + "1b12f15a11fc6e62177bef08f47bc7b5ce50b141", + "498214de67004b1da3d820901307bed2a68a8ef6", + }, + } + request := &pb.FindCommitsRequest{ + Repository: testRepo, + Revision: []byte(expectedCommit.Id), + Limit: 1, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + stream, err := client.FindCommits(ctx, request) + require.NoError(t, err) + + resp, err := stream.Recv() + require.NoError(t, err) + + require.Equal(t, 1, len(resp.Commits), "expected exactly one commit in the first message") + firstCommit := resp.Commits[0] + + require.True(t, testhelper.CommitsEqual(expectedCommit, firstCommit), "expected %v, got %v", expectedCommit, firstCommit) +} + +func TestSuccessfulFindCommitsRequest(t *testing.T) { + service, ruby, serverSocketPath := startTestServices(t) + defer stopTestServices(service, ruby) + + client := newCommitServiceClient(t, serverSocketPath) + + testCases := []struct { + desc string + request *pb.FindCommitsRequest + // Use 'ids' if you know the exact commits id's that should be returned + ids []string + // Use minCommits if you don't know the exact commit id's + minCommits int + }{ + { + desc: "only revision, limit commits", + request: &pb.FindCommitsRequest{ + Repository: testRepo, + Revision: []byte("0031876facac3f2b2702a0e53a26e89939a42209"), + Limit: 3, + }, + ids: []string{ + "0031876facac3f2b2702a0e53a26e89939a42209", + "bf6e164cac2dc32b1f391ca4290badcbe4ffc5fb", + "48ca272b947f49eee601639d743784a176574a09", + }, + }, + { + desc: "revision, default commit limit", + request: &pb.FindCommitsRequest{ + Repository: testRepo, + Revision: []byte("0031876facac3f2b2702a0e53a26e89939a42209"), + }, + ids: []string{ + "0031876facac3f2b2702a0e53a26e89939a42209", + "bf6e164cac2dc32b1f391ca4290badcbe4ffc5fb", + "48ca272b947f49eee601639d743784a176574a09", + "9d526f87b82e2b2fd231ca44c95508e5e85624ca", + "335bc94d5b7369b10251e612158da2e4a4aaa2a5", + "1039376155a0d507eba0ea95c29f8f5b983ea34b", + "54188278422b1fa877c2e71c4e37fc6640a58ad1", + "8b9270332688d58e25206601900ee5618fab2390", + "f9220df47bce1530e90c189064d301bfc8ceb5ab", + "40d408f89c1fd26b7d02e891568f880afe06a9f8", + "df914c609a1e16d7d68e4a61777ff5d6f6b6fde3", + "6762605237fc246ae146ac64ecb467f71d609120", + "79b06233d3dc769921576771a4e8bee4b439595d", + "1a0b36b3cdad1d2ee32457c102a8c0b7056fa863", + }, + }, + { + desc: "revision, default commit limit, bypassing rugged walk", + request: &pb.FindCommitsRequest{ + Repository: testRepo, + Revision: []byte("0031876facac3f2b2702a0e53a26e89939a42209"), + DisableWalk: true, + }, + }, { + desc: "revision and paths", + request: &pb.FindCommitsRequest{ + Repository: testRepo, + Revision: []byte("0031876facac3f2b2702a0e53a26e89939a42209"), + Paths: [][]byte{[]byte("LICENSE")}, + Limit: 10, + }, + ids: []string{"1a0b36b3cdad1d2ee32457c102a8c0b7056fa863"}, + }, + { + desc: "empty revision", + request: &pb.FindCommitsRequest{ + Repository: testRepo, + Limit: 35, + }, + minCommits: 35, + }, + { + desc: "before and after", + request: &pb.FindCommitsRequest{ + Repository: testRepo, + Before: ×tamp.Timestamp{Seconds: 1483225200}, + After: ×tamp.Timestamp{Seconds: 1472680800}, + Limit: 10, + }, + ids: []string{ + "b83d6e391c22777fca1ed3012fce84f633d7fed0", + "498214de67004b1da3d820901307bed2a68a8ef6", + }, + }, + { + desc: "no merges", + request: &pb.FindCommitsRequest{ + Repository: testRepo, + Revision: []byte("e63f41fe459e62e1228fcef60d7189127aeba95a"), + SkipMerges: true, + Limit: 10, + }, + ids: []string{ + "4a24d82dbca5c11c61556f3b35ca472b7463187e", + "498214de67004b1da3d820901307bed2a68a8ef6", + "38008cb17ce1466d8fec2dfa6f6ab8dcfe5cf49e", + "c347ca2e140aa667b968e51ed0ffe055501fe4f4", + "d59c60028b053793cecfb4022de34602e1a9218e", + "a5391128b0ef5d21df5dd23d98557f4ef12fae20", + "54fcc214b94e78d7a41a9a8fe6d87a5e59500e51", + "048721d90c449b244b7b4c53a9186b04330174ec", + "5f923865dde3436854e9ceb9cdb7815618d4e849", + "2ea1f3dec713d940208fb5ce4a38765ecb5d3f73", + }, + }, + { + desc: "following renames", + request: &pb.FindCommitsRequest{ + Repository: testRepo, + Revision: []byte("94bb47ca1297b7b3731ff2a36923640991e9236f"), + Paths: [][]byte{[]byte("CHANGELOG.md")}, + Follow: true, + Limit: 10, + }, + ids: []string{ + "94bb47ca1297b7b3731ff2a36923640991e9236f", + "5f923865dde3436854e9ceb9cdb7815618d4e849", + "913c66a37b4a45b9769037c55c2d238bd0942d2e", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + stream, err := client.FindCommits(context.Background(), tc.request) + require.NoError(t, err) + + var ids []string + for err == nil { + var resp *pb.FindCommitsResponse + resp, err = stream.Recv() + for _, c := range resp.GetCommits() { + ids = append(ids, c.Id) + } + } + require.Equal(t, io.EOF, err) + + if tc.minCommits > 0 { + require.True(t, len(ids) >= tc.minCommits, "expected at least %d commits, got %d", tc.minCommits, len(ids)) + return + } + + require.Equal(t, len(tc.ids), len(ids)) + for i, id := range tc.ids { + require.Equal(t, id, ids[i]) + } + }) + } +} diff --git a/internal/service/commit/server.go b/internal/service/commit/server.go index 4476cd543..d812e6da6 100644 --- a/internal/service/commit/server.go +++ b/internal/service/commit/server.go @@ -14,7 +14,3 @@ var defaultBranchName = ref.DefaultBranchName func NewServer() pb.CommitServiceServer { return &server{} } - -func (s *server) FindCommits(in *pb.FindCommitsRequest, stream pb.CommitService_FindCommitsServer) error { - return nil -} diff --git a/ruby/Gemfile b/ruby/Gemfile index cb634a49a..239d1217e 100644 --- a/ruby/Gemfile +++ b/ruby/Gemfile @@ -1,5 +1,5 @@ source 'https://rubygems.org' gem 'github-linguist', '~> 4.7.0', require: 'linguist' - -gem 'gitaly', '~> 0.19.0' +gem 'gitaly', '~> 0.27.0' +gem 'activesupport' diff --git a/ruby/Gemfile.lock b/ruby/Gemfile.lock index 6912ce0da..0d03be285 100644 --- a/ruby/Gemfile.lock +++ b/ruby/Gemfile.lock @@ -1,13 +1,19 @@ GEM remote: https://rubygems.org/ specs: + activesupport (5.0.0.1) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (~> 0.7) + minitest (~> 5.1) + tzinfo (~> 1.1) addressable (2.5.1) public_suffix (~> 2.0, >= 2.0.2) charlock_holmes (0.7.3) + concurrent-ruby (1.0.5) escape_utils (1.1.1) - faraday (0.12.1) + faraday (0.12.2) multipart-post (>= 1.2, < 3) - gitaly (0.19.0) + gitaly (0.27.0) google-protobuf (~> 3.1) grpc (~> 1.0) github-linguist (4.7.6) @@ -16,8 +22,8 @@ GEM mime-types (>= 1.19) rugged (>= 0.23.0b) google-protobuf (3.3.0) - googleauth (0.5.1) - faraday (~> 0.9) + googleauth (0.5.3) + faraday (~> 0.12) jwt (~> 1.4) logging (~> 2.0) memoist (~> 0.12) @@ -27,6 +33,7 @@ GEM grpc (1.4.1) google-protobuf (~> 3.1) googleauth (~> 0.5.1) + i18n (0.8.1) jwt (1.5.6) little-plugger (1.1.4) logging (2.2.2) @@ -36,6 +43,7 @@ GEM mime-types (3.1) mime-types-data (~> 3.2015) mime-types-data (3.2016.0521) + minitest (5.9.1) multi_json (1.12.1) multipart-post (2.0.0) os (0.9.6) @@ -46,13 +54,17 @@ GEM faraday (~> 0.9) jwt (~> 1.5) multi_json (~> 1.10) + thread_safe (0.3.6) + tzinfo (1.2.2) + thread_safe (~> 0.1) PLATFORMS ruby DEPENDENCIES - gitaly (~> 0.19.0) + activesupport + gitaly (~> 0.27.0) github-linguist (~> 4.7.0) BUNDLED WITH - 1.15.0 + 1.15.3 diff --git a/ruby/README.md b/ruby/README.md index 4adf74208..8125f97bf 100644 --- a/ruby/README.md +++ b/ruby/README.md @@ -27,3 +27,13 @@ for two reasons. Firstly, testing through the parent proves that the Ruby code under test is reachable. Secondly, testing through the parent will make it easier to create a Go implementation in the parent if we ever want to do that. + +## Vendored copy of Gitlab::Git + +`gitaly-ruby` contains a vendored copy of `lib/gitlab/git` from +https://gitlab.com/gitlab-org/gitlab-ce. This allows us to share code +between gitlab-ce / gitlab-ee and `gitaly-ruby`. + +To update the vendored copy of Gitlab::Git, run +`_support/vendor-gitlab-git COMMIT_ID` from the root of the Gitaly +repository. diff --git a/ruby/lib/gitaly_server.rb b/ruby/lib/gitaly_server.rb index de8c2da3e..c5142c6eb 100644 --- a/ruby/lib/gitaly_server.rb +++ b/ruby/lib/gitaly_server.rb @@ -1,5 +1,7 @@ require 'gitaly' +require_relative 'gitlab/git.rb' + require_relative 'gitaly_server/commit_service.rb' module GitalyServer diff --git a/ruby/lib/gitaly_server/commit_service.rb b/ruby/lib/gitaly_server/commit_service.rb index 8811dc614..32c865cd4 100644 --- a/ruby/lib/gitaly_server/commit_service.rb +++ b/ruby/lib/gitaly_server/commit_service.rb @@ -1,28 +1,67 @@ -require 'linguist' -require 'rugged' - module GitalyServer class CommitService < Gitaly::CommitService::Service def commit_languages(request, _call) - rugged_repo = Rugged::Repository.new(GitalyServer.repo_path(_call)) - revision = request.revision - revision = rugged_repo.head.target_id if revision.empty? - - languages = Linguist::Repository.new(rugged_repo, revision).languages + repo = Gitlab::Git::Repository.from_call(_call) + revision = request.revision unless request.revision.empty? - total = languages.values.inject(0, :+) - language_messages = languages.map do |name, share| + language_messages = repo.languages(revision).map do |language| Gitaly::CommitLanguagesResponse::Language.new( - name: name, - share: (share.to_f * 100 / total).round(2), - color: Linguist::Language[name].color || "##{Digest::SHA256.hexdigest(name)[0...6]}" - ) - end - language_messages.sort! do |x, y| - y.share <=> x.share + name: language[:label], + share: language[:value], + color: language[:color] + ) end Gitaly::CommitLanguagesResponse.new(languages: language_messages) end + + def find_commits(request, _call) + repository = Gitlab::Git::Repository.from_call(_call) + options = { + ref: request.revision, + limit: request.limit, + follow: request.follow, + skip_merges: request.skip_merges, + disable_walk: request.disable_walk, + offset: request.offset, + } + options[:path] = request.paths unless request.paths.empty? + + options[:before] = Time.at(request.before.seconds).to_datetime if request.before + options[:after] = Time.at(request.after.seconds).to_datetime if request.after + + Enumerator.new do |y| + # Send back 'pages' with 20 commits each + repository.raw_log(options).each_slice(20) do |rugged_commits| + commits = rugged_commits.map do |rugged_commit| + gitaly_commit_from_rugged(rugged_commit) + end + y.yield Gitaly::FindCommitsResponse.new(commits: commits) + end + end + end + + def gitaly_commit_from_rugged(rugged_commit) + Gitaly::GitCommit.new( + id: rugged_commit.oid, + subject: rugged_commit.message.split("\n", 2)[0].chomp, + body: rugged_commit.message, + parent_ids: rugged_commit.parent_ids, + author: gitaly_commit_author_from_rugged(rugged_commit.author), + committer: gitaly_commit_author_from_rugged(rugged_commit.committer), + ) + end + + def gitaly_commit_author_from_rugged(rugged_author) + Gitaly::CommitAuthor.new( + name: bytes!(rugged_author[:name]), + email: bytes!(rugged_author[:email]), + date: Google::Protobuf::Timestamp.new(seconds: rugged_author[:time].to_i) + ) + end + + def bytes!(string) + string.force_encoding('ASCII-8BIT') + end end end diff --git a/ruby/lib/gitlab/git.rb b/ruby/lib/gitlab/git.rb new file mode 100644 index 000000000..a2c50d0ff --- /dev/null +++ b/ruby/lib/gitlab/git.rb @@ -0,0 +1,65 @@ +# External dependencies of Gitlab::Git +require 'rugged' +require 'linguist' + +# Ruby on Rails mix-ins that GitLab::Git code relies on +require 'active_support/core_ext/object/blank' +require 'active_support/core_ext/numeric/bytes' +require 'active_support/core_ext/module/delegation' +require 'active_support/core_ext/enumerable' + +# We split our mock implementation of Gitlab::GitalyClient into a separate file +require_relative 'gitaly_client.rb' + +vendor_gitlab_git = '../../vendor/gitlab_git/' + +# Some later requires are order-sensitive. Manually require whatever we need. +require_relative File.join(vendor_gitlab_git, 'lib/gitlab/encoding_helper.rb') +require_relative File.join(vendor_gitlab_git, 'lib/gitlab/git.rb') +require_relative File.join(vendor_gitlab_git, 'lib/gitlab/git/ref.rb') + +# Require all .rb files we can find in the vendored gitlab/git directory +dir = File.expand_path(File.join('..', vendor_gitlab_git, 'lib/gitlab/'), __FILE__) +Dir["#{dir}/git/**/*.rb"].each do |ruby_file| + require_relative ruby_file.sub(dir, File.join(vendor_gitlab_git, 'lib/gitlab/')).sub(%r{^/*}, '') +end + +module Gitlab + # Config lets Gitlab::Git do mock config lookups. + class Config + class Git + def bin_path + ENV['GITALY_RUBY_GIT_BIN_PATH'] + end + end + + def git + Git.new + end + end + + def self.config + Config.new + end +end + +module Gitlab + module Git + class Repository + def self.from_call(_call) + new(GitalyServer.repo_path(_call)) + end + + def initialize(path) + @path = path + @rugged = Rugged::Repository.new(path) + @attributes = Gitlab::Git::Attributes.new(path) + end + + # Bypass the CircuitBreaker class which needs Redis + def rugged + @rugged + end + end + end +end diff --git a/ruby/lib/gitlab/gitaly_client.rb b/ruby/lib/gitlab/gitaly_client.rb new file mode 100644 index 000000000..25c021dd6 --- /dev/null +++ b/ruby/lib/gitlab/gitaly_client.rb @@ -0,0 +1,11 @@ +module Gitlab + module GitalyClient + class << self + # In case we hit a method that tries to do a Gitaly RPC, prevent this. + # We also don't want to instrument the block. + def migrate(*args) + yield false # 'false' means 'don't use gitaly for this block' + end + end + end +end diff --git a/ruby/vendor/gitlab_git/ORIGIN b/ruby/vendor/gitlab_git/ORIGIN new file mode 100644 index 000000000..14c24eb7c --- /dev/null +++ b/ruby/vendor/gitlab_git/ORIGIN @@ -0,0 +1 @@ +Cloned from https://gitlab.com/gitlab-org/gitlab-ce. diff --git a/ruby/vendor/gitlab_git/REVISION b/ruby/vendor/gitlab_git/REVISION new file mode 100644 index 000000000..b3c602eb2 --- /dev/null +++ b/ruby/vendor/gitlab_git/REVISION @@ -0,0 +1 @@ +b21539cc57148c68aa99ac9ec705d2b1ff2a7b04 diff --git a/ruby/vendor/gitlab_git/lib/gitlab/encoding_helper.rb b/ruby/vendor/gitlab_git/lib/gitlab/encoding_helper.rb new file mode 100644 index 000000000..8ddc91e34 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/encoding_helper.rb @@ -0,0 +1,62 @@ +module Gitlab + module EncodingHelper + extend self + + # This threshold is carefully tweaked to prevent usage of encodings detected + # by CharlockHolmes with low confidence. If CharlockHolmes confidence is low, + # we're better off sticking with utf8 encoding. + # Reason: git diff can return strings with invalid utf8 byte sequences if it + # truncates a diff in the middle of a multibyte character. In this case + # CharlockHolmes will try to guess the encoding and will likely suggest an + # obscure encoding with low confidence. + # There is a lot more info with this merge request: + # https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193 + ENCODING_CONFIDENCE_THRESHOLD = 50 + + def encode!(message) + return nil unless message.respond_to? :force_encoding + + # if message is utf-8 encoding, just return it + message.force_encoding("UTF-8") + return message if message.valid_encoding? + + # return message if message type is binary + detect = CharlockHolmes::EncodingDetector.detect(message) + return message.force_encoding("BINARY") if detect && detect[:type] == :binary + + # force detected encoding if we have sufficient confidence. + if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD + message.force_encoding(detect[:encoding]) + end + + # encode and clean the bad chars + message.replace clean(message) + rescue + encoding = detect ? detect[:encoding] : "unknown" + "--broken encoding: #{encoding}" + end + + def encode_utf8(message) + detect = CharlockHolmes::EncodingDetector.detect(message) + if detect && detect[:encoding] + begin + CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8') + rescue ArgumentError => e + Rails.logger.warn("Ignoring error converting #{detect[:encoding]} into UTF8: #{e.message}") + + '' + end + else + clean(message) + end + end + + private + + def clean(message) + message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "") + .encode("UTF-8") + .gsub("\0".encode("UTF-8"), "") + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git.rb b/ruby/vendor/gitlab_git/lib/gitlab/git.rb new file mode 100644 index 000000000..b6449f270 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git.rb @@ -0,0 +1,61 @@ +module Gitlab + module Git + BLANK_SHA = ('0' * 40).freeze + TAG_REF_PREFIX = "refs/tags/".freeze + BRANCH_REF_PREFIX = "refs/heads/".freeze + + CommandError = Class.new(StandardError) + + class << self + include Gitlab::EncodingHelper + + def ref_name(ref) + encode! ref.sub(/\Arefs\/(tags|heads|remotes)\//, '') + end + + def branch_name(ref) + ref = ref.to_s + if self.branch_ref?(ref) + self.ref_name(ref) + else + nil + end + end + + def committer_hash(email:, name:) + return if email.nil? || name.nil? + + { + email: email, + name: name, + time: Time.now + } + end + + def tag_name(ref) + ref = ref.to_s + if self.tag_ref?(ref) + self.ref_name(ref) + else + nil + end + end + + def tag_ref?(ref) + ref.start_with?(TAG_REF_PREFIX) + end + + def branch_ref?(ref) + ref.start_with?(BRANCH_REF_PREFIX) + end + + def blank_ref?(ref) + ref == BLANK_SHA + end + + def version + Gitlab::VersionInfo.parse(Gitlab::Popen.popen(%W(#{Gitlab.config.git.bin_path} --version)).first) + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/attributes.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/attributes.rb new file mode 100644 index 000000000..2d20cd473 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/attributes.rb @@ -0,0 +1,136 @@ +# Gitaly note: JV: not sure what to make of this class. Why does it use +# the full disk path of the repository to look up attributes This is +# problematic in Gitaly, because Gitaly hides the full disk path to the +# repository from gitlab-ce. + +module Gitlab + module Git + # Class for parsing Git attribute files and extracting the attributes for + # file patterns. + # + # Unlike Rugged this parser only needs a single IO call (a call to `open`), + # vastly reducing the time spent in extracting attributes. + # + # This class _only_ supports parsing the attributes file located at + # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files + # (`.gitattributes` is copied to this particular path). + # + # Basic usage: + # + # attributes = Gitlab::Git::Attributes.new(some_repo.path) + # + # attributes.attributes('README.md') # => { "eol" => "lf } + class Attributes + # path - The path to the Git repository. + def initialize(path) + @path = File.expand_path(path) + @patterns = nil + end + + # Returns all the Git attributes for the given path. + # + # path - A path to a file for which to get the attributes. + # + # Returns a Hash. + def attributes(path) + full_path = File.join(@path, path) + + patterns.each do |pattern, attrs| + return attrs if File.fnmatch?(pattern, full_path) + end + + {} + end + + # Returns a Hash containing the file patterns and their attributes. + def patterns + @patterns ||= parse_file + end + + # Parses an attribute string. + # + # These strings can be in the following formats: + # + # text # => { "text" => true } + # -text # => { "text" => false } + # key=value # => { "key" => "value" } + # + # string - The string to parse. + # + # Returns a Hash containing the attributes and their values. + def parse_attributes(string) + values = {} + dash = '-' + equal = '=' + binary = 'binary' + + string.split(/\s+/).each do |chunk| + # Data such as "foo = bar" should be treated as "foo" and "bar" being + # separate boolean attributes. + next if chunk == equal + + key = chunk + + # Input: "-foo" + if chunk.start_with?(dash) + key = chunk.byteslice(1, chunk.length - 1) + value = false + + # Input: "foo=bar" + elsif chunk.include?(equal) + key, value = chunk.split(equal, 2) + + # Input: "foo" + else + value = true + end + + values[key] = value + + # When the "binary" option is set the "diff" option should be set to + # the inverse. If "diff" is later set it should overwrite the + # automatically set value. + values['diff'] = false if key == binary && value + end + + values + end + + # Iterates over every line in the attributes file. + def each_line + full_path = File.join(@path, 'info/attributes') + + return unless File.exist?(full_path) + + File.open(full_path, 'r') do |handle| + handle.each_line do |line| + break unless line.valid_encoding? + + yield line.strip + end + end + end + + private + + # Parses the Git attributes file. + def parse_file + pairs = [] + comment = '#' + + each_line do |line| + next if line.start_with?(comment) || line.empty? + + pattern, attrs = line.split(/\s+/, 2) + + parsed = attrs ? parse_attributes(attrs) : {} + + pairs << [File.join(@path, pattern), parsed] + end + + # Newer entries take precedence over older entries. + pairs.reverse.to_h + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/blame.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/blame.rb new file mode 100644 index 000000000..31effdba2 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/blame.rb @@ -0,0 +1,90 @@ +module Gitlab + module Git + class Blame + include Gitlab::EncodingHelper + + attr_reader :lines, :blames + + def initialize(repository, sha, path) + @repo = repository + @sha = sha + @path = path + @lines = [] + @blames = load_blame + end + + def each + @blames.each do |blame| + yield( + Gitlab::Git::Commit.new(@repo, blame.commit), + blame.line + ) + end + end + + private + + def load_blame + raw_output = @repo.gitaly_migrate(:blame) do |is_enabled| + if is_enabled + load_blame_by_gitaly + else + load_blame_by_shelling_out + end + end + + output = encode_utf8(raw_output) + process_raw_blame output + end + + def load_blame_by_gitaly + @repo.gitaly_commit_client.raw_blame(@sha, @path) + end + + def load_blame_by_shelling_out + cmd = %W(#{Gitlab.config.git.bin_path} --git-dir=#{@repo.path} blame -p #{@sha} -- #{@path}) + # Read in binary mode to ensure ASCII-8BIT + IO.popen(cmd, 'rb') {|io| io.read } + end + + def process_raw_blame(output) + lines, final = [], [] + info, commits = {}, {} + + # process the output + output.split("\n").each do |line| + if line[0, 1] == "\t" + lines << line[1, line.size] + elsif m = /^(\w{40}) (\d+) (\d+)/.match(line) + commit_id, old_lineno, lineno = m[1], m[2].to_i, m[3].to_i + commits[commit_id] = nil unless commits.key?(commit_id) + info[lineno] = [commit_id, old_lineno] + end + end + + # load all commits in single call + commits.keys.each do |key| + commits[key] = @repo.lookup(key) + end + + # get it together + info.sort.each do |lineno, (commit_id, old_lineno)| + commit = commits[commit_id] + final << BlameLine.new(lineno, old_lineno, commit, lines[lineno - 1]) + end + + @lines = final + end + end + + class BlameLine + attr_accessor :lineno, :oldlineno, :commit, :line + def initialize(lineno, oldlineno, commit, line) + @lineno = lineno + @oldlineno = oldlineno + @commit = commit + @line = line + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/blob.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/blob.rb new file mode 100644 index 000000000..77b81d2d4 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/blob.rb @@ -0,0 +1,267 @@ +# Gitaly note: JV: seems to be completely migrated (behind feature flags). + +module Gitlab + module Git + class Blob + include Linguist::BlobHelper + include Gitlab::EncodingHelper + + # This number is the maximum amount of data that we want to display to + # the user. We load as much as we can for encoding detection + # (Linguist) and LFS pointer parsing. All other cases where we need full + # blob data should use load_all_data!. + MAX_DATA_DISPLAY_SIZE = 10.megabytes + + attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary + + class << self + def find(repository, sha, path) + Gitlab::GitalyClient.migrate(:project_raw_show) do |is_enabled| + if is_enabled + find_by_gitaly(repository, sha, path) + else + find_by_rugged(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE) + end + end + end + + def raw(repository, sha) + Gitlab::GitalyClient.migrate(:git_blob_raw) do |is_enabled| + if is_enabled + Gitlab::GitalyClient::BlobService.new(repository).get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE) + else + blob = repository.lookup(sha) + + new( + id: blob.oid, + size: blob.size, + data: blob.content(MAX_DATA_DISPLAY_SIZE), + binary: blob.binary? + ) + end + end + end + + def binary?(data) + # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks + # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), + # which is what we use below to keep a consistent behavior. + detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) + detect && detect[:type] == :binary + end + + # Returns an array of Blob instances, specified in blob_references as + # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the + # full blob contents are returned. If blob_size_limit >= 0 then each blob will + # contain no more than limit bytes in its data attribute. + # + # Keep in mind that this method may allocate a lot of memory. It is up + # to the caller to limit the number of blobs and blob_size_limit. + # + def batch(repository, blob_references, blob_size_limit: nil) + blob_size_limit ||= MAX_DATA_DISPLAY_SIZE + blob_references.map do |sha, path| + find_by_rugged(repository, sha, path, limit: blob_size_limit) + end + end + + private + + # Recursive search of blob id by path + # + # Ex. + # blog/ # oid: 1a + # app/ # oid: 2a + # models/ # oid: 3a + # file.rb # oid: 4a + # + # + # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a' + # + def find_entry_by_path(repository, root_id, path) + root_tree = repository.lookup(root_id) + # Strip leading slashes + path[/^\/*/] = '' + path_arr = path.split('/') + + entry = root_tree.find do |entry| + entry[:name] == path_arr[0] + end + + return nil unless entry + + if path_arr.size > 1 + return nil unless entry[:type] == :tree + path_arr.shift + find_entry_by_path(repository, entry[:oid], path_arr.join('/')) + else + [:blob, :commit].include?(entry[:type]) ? entry : nil + end + end + + def submodule_blob(blob_entry, path, sha) + new( + id: blob_entry[:oid], + name: blob_entry[:name], + size: 0, + data: '', + path: path, + commit_id: sha + ) + end + + def find_by_gitaly(repository, sha, path) + path = path.sub(/\A\/*/, '') + path = '/' if path.empty? + name = File.basename(path) + entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE) + return unless entry + + case entry.type + when :COMMIT + new( + id: entry.oid, + name: name, + size: 0, + data: '', + path: path, + commit_id: sha + ) + when :BLOB + new( + id: entry.oid, + name: name, + size: entry.size, + data: entry.data.dup, + mode: entry.mode.to_s(8), + path: path, + commit_id: sha, + binary: binary?(entry.data) + ) + end + end + + def find_by_rugged(repository, sha, path, limit:) + commit = repository.lookup(sha) + root_tree = commit.tree + + blob_entry = find_entry_by_path(repository, root_tree.oid, path) + + return nil unless blob_entry + + if blob_entry[:type] == :commit + submodule_blob(blob_entry, path, sha) + else + blob = repository.lookup(blob_entry[:oid]) + + if blob + new( + id: blob.oid, + name: blob_entry[:name], + size: blob.size, + # Rugged::Blob#content is expensive; don't call it if we don't have to. + data: limit.zero? ? '' : blob.content(limit), + mode: blob_entry[:filemode].to_s(8), + path: path, + commit_id: sha, + binary: blob.binary? + ) + end + end + end + end + + def initialize(options) + %w(id name path size data mode commit_id binary).each do |key| + self.send("#{key}=", options[key.to_sym]) + end + + @loaded_all_data = false + # Retain the actual size before it is encoded + @loaded_size = @data.bytesize if @data + end + + def binary? + @binary.nil? ? super : @binary == true + end + + def data + encode! @data + end + + # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into + # memory as a Ruby string. + def load_all_data!(repository) + return if @data == '' # don't mess with submodule blobs + return @data if @loaded_all_data + + Gitlab::GitalyClient.migrate(:git_blob_load_all_data) do |is_enabled| + @data = begin + if is_enabled + Gitlab::GitalyClient::BlobService.new(repository).get_blob(oid: id, limit: -1).data + else + repository.lookup(id).content + end + end + end + + @loaded_all_data = true + @loaded_size = @data.bytesize + @binary = nil + end + + def name + encode! @name + end + + def path + encode! @path + end + + def truncated? + size && (size > loaded_size) + end + + # Valid LFS object pointer is a text file consisting of + # version + # oid + # size + # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer + def lfs_pointer? + has_lfs_version_key? && lfs_oid.present? && lfs_size.present? + end + + def lfs_oid + if has_lfs_version_key? + oid = data.match(/(?<=sha256:)([0-9a-f]{64})/) + return oid[1] if oid + end + + nil + end + + def lfs_size + if has_lfs_version_key? + size = data.match(/(?<=size )([0-9]+)/) + return size[1].to_i if size + end + + nil + end + + def external_storage + return unless lfs_pointer? + + :lfs + end + + alias_method :external_size, :lfs_size + + private + + def has_lfs_version_key? + !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec") + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/blob_snippet.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/blob_snippet.rb new file mode 100644 index 000000000..68116e775 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/blob_snippet.rb @@ -0,0 +1,34 @@ +# Gitaly note: JV: no RPC's here. + +module Gitlab + module Git + class BlobSnippet + include Linguist::BlobHelper + + attr_accessor :ref + attr_accessor :lines + attr_accessor :filename + attr_accessor :startline + + def initialize(ref, lines, startline, filename) + @ref, @lines, @startline, @filename = ref, lines, startline, filename + end + + def data + lines&.join("\n") + end + + def name + filename + end + + def size + data.length + end + + def mode + nil + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/branch.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/branch.rb new file mode 100644 index 000000000..c53882787 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/branch.rb @@ -0,0 +1,11 @@ +# Gitaly note: JV: no RPC's here. + +module Gitlab + module Git + class Branch < Ref + def initialize(repository, name, target, target_commit) + super(repository, name, target, target_commit) + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/commit.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/commit.rb new file mode 100644 index 000000000..fd4dfdb09 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/commit.rb @@ -0,0 +1,456 @@ +# Gitlab::Git::Commit is a wrapper around native Rugged::Commit object +module Gitlab + module Git + class Commit + include Gitlab::EncodingHelper + + attr_accessor :raw_commit, :head + + SERIALIZE_KEYS = [ + :id, :message, :parent_ids, + :authored_date, :author_name, :author_email, + :committed_date, :committer_name, :committer_email + ].freeze + + attr_accessor *SERIALIZE_KEYS # rubocop:disable Lint/AmbiguousOperator + + delegate :tree, to: :rugged_commit + + def ==(other) + return false unless other.is_a?(Gitlab::Git::Commit) + + id && id == other.id + end + + class << self + # Get commits collection + # + # Ex. + # Commit.where( + # repo: repo, + # ref: 'master', + # path: 'app/models', + # limit: 10, + # offset: 5, + # ) + # + def where(options) + repo = options.delete(:repo) + raise 'Gitlab::Git::Repository is required' unless repo.respond_to?(:log) + + repo.log(options) + end + + # Get single commit + # + # Ex. + # Commit.find(repo, '29eda46b') + # + # Commit.find(repo, 'master') + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/321 + def find(repo, commit_id = "HEAD") + # Already a commit? + return commit_id if commit_id.is_a?(Gitlab::Git::Commit) + + # A rugged reference? + commit_id = Gitlab::Git::Ref.dereference_object(commit_id) + return decorate(repo, commit_id) if commit_id.is_a?(Rugged::Commit) + + # Some weird thing? + return nil unless commit_id.is_a?(String) + + commit = repo.gitaly_migrate(:find_commit) do |is_enabled| + if is_enabled + repo.gitaly_commit_client.find_commit(commit_id) + else + obj = repo.rev_parse_target(commit_id) + + obj.is_a?(Rugged::Commit) ? obj : nil + end + end + + decorate(repo, commit) if commit + rescue Rugged::ReferenceError, Rugged::InvalidError, Rugged::ObjectError, + Gitlab::Git::CommandError, Gitlab::Git::Repository::NoRepository + nil + end + + # Get last commit for HEAD + # + # Ex. + # Commit.last(repo) + # + def last(repo) + find(repo) + end + + # Get last commit for specified path and ref + # + # Ex. + # Commit.last_for_path(repo, '29eda46b', 'app/models') + # + # Commit.last_for_path(repo, 'master', 'Gemfile') + # + def last_for_path(repo, ref, path = nil) + where( + repo: repo, + ref: ref, + path: path, + limit: 1 + ).first + end + + # Get commits between two revspecs + # See also #repository.commits_between + # + # Ex. + # Commit.between(repo, '29eda46b', 'master') + # + def between(repo, base, head) + Gitlab::GitalyClient.migrate(:commits_between) do |is_enabled| + if is_enabled + repo.gitaly_commit_client.between(base, head) + else + repo.rugged_commits_between(base, head).map { |c| decorate(repo, c) } + end + end + rescue Rugged::ReferenceError + [] + end + + # Returns commits collection + # + # Ex. + # Commit.find_all( + # repo, + # ref: 'master', + # max_count: 10, + # skip: 5, + # order: :date + # ) + # + # +options+ is a Hash of optional arguments to git + # :ref is the ref from which to begin (SHA1 or name) + # :max_count is the maximum number of commits to fetch + # :skip is the number of commits to skip + # :order is the commits order and allowed value is :none (default), :date, + # :topo, or any combination of them (in an array). Commit ordering types + # are documented here: + # http://www.rubydoc.info/github/libgit2/rugged/Rugged#SORT_NONE-constant) + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/326 + def find_all(repo, options = {}) + Gitlab::GitalyClient.migrate(:find_all_commits) do |is_enabled| + if is_enabled + find_all_by_gitaly(repo, options) + else + find_all_by_rugged(repo, options) + end + end + end + + def find_all_by_rugged(repo, options = {}) + actual_options = options.dup + + allowed_options = [:ref, :max_count, :skip, :order] + + actual_options.keep_if do |key| + allowed_options.include?(key) + end + + default_options = { skip: 0 } + actual_options = default_options.merge(actual_options) + + rugged = repo.rugged + walker = Rugged::Walker.new(rugged) + + if actual_options[:ref] + walker.push(rugged.rev_parse_oid(actual_options[:ref])) + else + rugged.references.each("refs/heads/*") do |ref| + walker.push(ref.target_id) + end + end + + walker.sorting(rugged_sort_type(actual_options[:order])) + + commits = [] + offset = actual_options[:skip] + limit = actual_options[:max_count] + walker.each(offset: offset, limit: limit) do |commit| + commits.push(decorate(repo, commit)) + end + + walker.reset + + commits + rescue Rugged::OdbError + [] + end + + def find_all_by_gitaly(repo, options = {}) + Gitlab::GitalyClient::CommitService.new(repo).find_all_commits(options) + end + + def decorate(repository, commit, ref = nil) + Gitlab::Git::Commit.new(repository, commit, ref) + end + + # Returns the `Rugged` sorting type constant for one or more given + # sort types. Valid keys are `:none`, `:topo`, and `:date`, or an array + # containing more than one of them. `:date` uses a combination of date and + # topological sorting to closer mimic git's native ordering. + def rugged_sort_type(sort_type) + @rugged_sort_types ||= { + none: Rugged::SORT_NONE, + topo: Rugged::SORT_TOPO, + date: Rugged::SORT_DATE | Rugged::SORT_TOPO + } + + @rugged_sort_types.fetch(sort_type, Rugged::SORT_NONE) + end + end + + def initialize(repository, raw_commit, head = nil) + raise "Nil as raw commit passed" unless raw_commit + + case raw_commit + when Hash + init_from_hash(raw_commit) + when Rugged::Commit + init_from_rugged(raw_commit) + when Gitaly::GitCommit + init_from_gitaly(raw_commit) + else + raise "Invalid raw commit type: #{raw_commit.class}" + end + + @repository = repository + @head = head + end + + def sha + id + end + + def short_id(length = 10) + id.to_s[0..length] + end + + def safe_message + @safe_message ||= message + end + + def created_at + committed_date + end + + # Was this commit committed by a different person than the original author? + def different_committer? + author_name != committer_name || author_email != committer_email + end + + def parent_id + parent_ids.first + end + + # Shows the diff between the commit's parent and the commit. + # + # Cuts out the header and stats from #to_patch and returns only the diff. + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/324 + def to_diff + rugged_diff_from_parent.patch + end + + # Returns a diff object for the changes from this commit's first parent. + # If there is no parent, then the diff is between this commit and an + # empty repo. See Repository#diff for keys allowed in the +options+ + # hash. + def diff_from_parent(options = {}) + Gitlab::GitalyClient.migrate(:commit_raw_diffs) do |is_enabled| + if is_enabled + @repository.gitaly_commit_client.diff_from_parent(self, options) + else + rugged_diff_from_parent(options) + end + end + end + + def rugged_diff_from_parent(options = {}) + options ||= {} + break_rewrites = options[:break_rewrites] + actual_options = Gitlab::Git::Diff.filter_diff_options(options) + + diff = if rugged_commit.parents.empty? + rugged_commit.diff(actual_options.merge(reverse: true)) + else + rugged_commit.parents[0].diff(rugged_commit, actual_options) + end + + diff.find_similar!(break_rewrites: break_rewrites) + diff + end + + def deltas + @deltas ||= begin + deltas = Gitlab::GitalyClient.migrate(:commit_deltas) do |is_enabled| + if is_enabled + @repository.gitaly_commit_client.commit_deltas(self) + else + rugged_diff_from_parent.each_delta + end + end + + deltas.map { |delta| Gitlab::Git::Diff.new(delta) } + end + end + + def has_zero_stats? + stats.total.zero? + rescue + true + end + + def no_commit_message + "--no commit message" + end + + def to_hash + serialize_keys.map.with_object({}) do |key, hash| + hash[key] = send(key) # rubocop:disable GitlabSecurity/PublicSend + end + end + + def date + committed_date + end + + def diffs(options = {}) + Gitlab::Git::DiffCollection.new(diff_from_parent(options), options) + end + + def parents + parent_ids.map { |oid| self.class.find(@repository, oid) }.compact + end + + # Get the gpg signature of this commit. + # + # Ex. + # commit.signature(repo) + # + def signature(repo) + Rugged::Commit.extract_signature(repo.rugged, sha) + end + + def stats + Gitlab::Git::CommitStats.new(self) + end + + def to_patch(options = {}) + begin + rugged_commit.to_mbox(options) + rescue Rugged::InvalidError => ex + if ex.message =~ /commit \w+ is a merge commit/i + 'Patch format is not currently supported for merge commits.' + end + end + end + + # Get a collection of Rugged::Reference objects for this commit. + # + # Ex. + # commit.ref(repo) + # + def refs(repo) + repo.refs_hash[id] + end + + # Get ref names collection + # + # Ex. + # commit.ref_names(repo) + # + def ref_names(repo) + refs(repo).map do |ref| + ref.name.sub(%r{^refs/(heads|remotes|tags)/}, "") + end + end + + def message + encode! @message + end + + def author_name + encode! @author_name + end + + def author_email + encode! @author_email + end + + def committer_name + encode! @committer_name + end + + def committer_email + encode! @committer_email + end + + def rugged_commit + @rugged_commit ||= if raw_commit.is_a?(Rugged::Commit) + raw_commit + else + @repository.rev_parse_target(id) + end + end + + private + + def init_from_hash(hash) + raw_commit = hash.symbolize_keys + + serialize_keys.each do |key| + send("#{key}=", raw_commit[key]) # rubocop:disable GitlabSecurity/PublicSend + end + end + + def init_from_rugged(commit) + author = commit.author + committer = commit.committer + + @raw_commit = commit + @id = commit.oid + @message = commit.message + @authored_date = author[:time] + @committed_date = committer[:time] + @author_name = author[:name] + @author_email = author[:email] + @committer_name = committer[:name] + @committer_email = committer[:email] + @parent_ids = commit.parents.map(&:oid) + end + + def init_from_gitaly(commit) + @raw_commit = commit + @id = commit.id + # TODO: Once gitaly "takes over" Rugged consider separating the + # subject from the message to make it clearer when there's one + # available but not the other. + @message = (commit.body.presence || commit.subject).dup + @authored_date = Time.at(commit.author.date.seconds).utc + @author_name = commit.author.name.dup + @author_email = commit.author.email.dup + @committed_date = Time.at(commit.committer.date.seconds).utc + @committer_name = commit.committer.name.dup + @committer_email = commit.committer.email.dup + @parent_ids = commit.parent_ids + end + + def serialize_keys + SERIALIZE_KEYS + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/commit_stats.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/commit_stats.rb new file mode 100644 index 000000000..00acb4763 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/commit_stats.rb @@ -0,0 +1,30 @@ +# Gitaly note: JV: 1 RPC, migration in progress. + +# Gitlab::Git::CommitStats counts the additions, deletions, and total changes +# in a commit. +module Gitlab + module Git + class CommitStats + attr_reader :id, :additions, :deletions, :total + + # Instantiate a CommitStats object + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/323 + def initialize(commit) + @id = commit.id + @additions = 0 + @deletions = 0 + @total = 0 + + diff = commit.rugged_diff_from_parent + + diff.each_patch do |p| + # TODO: Use the new Rugged convenience methods when they're released + @additions += p.stat[0] + @deletions += p.stat[1] + @total += p.changes + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/compare.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/compare.rb new file mode 100644 index 000000000..7cb842256 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/compare.rb @@ -0,0 +1,45 @@ +# Gitaly note: JV: no RPC's here. + +module Gitlab + module Git + class Compare + attr_reader :head, :base, :straight + + def initialize(repository, base, head, straight: false) + @repository = repository + @straight = straight + + unless base && head + @commits = [] + return + end + + @base = Gitlab::Git::Commit.find(repository, base.try(:strip)) + @head = Gitlab::Git::Commit.find(repository, head.try(:strip)) + + @commits = [] unless @base && @head + @commits = [] if same + end + + def same + @base && @head && @base.id == @head.id + end + + def commits + return @commits if defined?(@commits) + + @commits = Gitlab::Git::Commit.between(@repository, @base.id, @head.id) + end + + def diffs(options = {}) + unless @head && @base + return Gitlab::Git::DiffCollection.new([]) + end + + paths = options.delete(:paths) || [] + options[:straight] = @straight + Gitlab::Git::Diff.between(@repository, @head.id, @base.id, options, *paths) + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/diff.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/diff.rb new file mode 100644 index 000000000..ce3d65062 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/diff.rb @@ -0,0 +1,291 @@ +# Gitaly note: JV: needs RPC for Gitlab::Git::Diff.between. + +# Gitlab::Git::Diff is a wrapper around native Rugged::Diff object +module Gitlab + module Git + class Diff + TimeoutError = Class.new(StandardError) + include Gitlab::EncodingHelper + + # Diff properties + attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff + + # Stats properties + attr_accessor :new_file, :renamed_file, :deleted_file + + alias_method :new_file?, :new_file + alias_method :deleted_file?, :deleted_file + alias_method :renamed_file?, :renamed_file + + attr_accessor :expanded + attr_writer :too_large + + alias_method :expanded?, :expanded + + SERIALIZE_KEYS = %i(diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large).freeze + + class << self + # The maximum size of a diff to display. + def size_limit + if RequestStore.active? + RequestStore['gitlab_git_diff_size_limit'] ||= find_size_limit + else + find_size_limit + end + end + + # The maximum size before a diff is collapsed. + def collapse_limit + if RequestStore.active? + RequestStore['gitlab_git_diff_collapse_limit'] ||= find_collapse_limit + else + find_collapse_limit + end + end + + def find_size_limit + if Feature.enabled?('gitlab_git_diff_size_limit_increase') + 200.kilobytes + else + 100.kilobytes + end + end + + def find_collapse_limit + if Feature.enabled?('gitlab_git_diff_size_limit_increase') + 100.kilobytes + else + 10.kilobytes + end + end + + def between(repo, head, base, options = {}, *paths) + straight = options.delete(:straight) || false + + common_commit = if straight + base + else + # Only show what is new in the source branch + # compared to the target branch, not the other way + # around. The linex below with merge_base is + # equivalent to diff with three dots (git diff + # branch1...branch2) From the git documentation: + # "git diff A...B" is equivalent to "git diff + # $(git-merge-base A B) B" + repo.merge_base_commit(head, base) + end + + options ||= {} + actual_options = filter_diff_options(options) + repo.diff(common_commit, head, actual_options, *paths) + end + + # Return a copy of the +options+ hash containing only keys that can be + # passed to Rugged. Allowed options are: + # + # :ignore_whitespace_change :: + # If true, changes in amount of whitespace will be ignored. + # + # :disable_pathspec_match :: + # If true, the given +*paths+ will be applied as exact matches, + # instead of as fnmatch patterns. + # + def filter_diff_options(options, default_options = {}) + allowed_options = [:ignore_whitespace_change, + :disable_pathspec_match, :paths, + :max_files, :max_lines, :limits, :expanded] + + if default_options + actual_defaults = default_options.dup + actual_defaults.keep_if do |key| + allowed_options.include?(key) + end + else + actual_defaults = {} + end + + if options + filtered_opts = options.dup + filtered_opts.keep_if do |key| + allowed_options.include?(key) + end + filtered_opts = actual_defaults.merge(filtered_opts) + else + filtered_opts = actual_defaults + end + + filtered_opts + end + end + + def initialize(raw_diff, expanded: true) + @expanded = expanded + + case raw_diff + when Hash + init_from_hash(raw_diff) + prune_diff_if_eligible + when Rugged::Patch, Rugged::Diff::Delta + init_from_rugged(raw_diff) + when Gitlab::GitalyClient::Diff + init_from_gitaly(raw_diff) + prune_diff_if_eligible + when Gitaly::CommitDelta + init_from_gitaly(raw_diff) + when nil + raise "Nil as raw diff passed" + else + raise "Invalid raw diff type: #{raw_diff.class}" + end + end + + def to_hash + hash = {} + + SERIALIZE_KEYS.each do |key| + hash[key] = send(key) # rubocop:disable GitlabSecurity/PublicSend + end + + hash + end + + def mode_changed? + a_mode && b_mode && a_mode != b_mode + end + + def submodule? + a_mode == '160000' || b_mode == '160000' + end + + def line_count + @line_count ||= Util.count_lines(@diff) + end + + def too_large? + if @too_large.nil? + @too_large = @diff.bytesize >= self.class.size_limit + else + @too_large + end + end + + # This is used by `to_hash` and `init_from_hash`. + alias_method :too_large, :too_large? + + def too_large! + @diff = '' + @line_count = 0 + @too_large = true + end + + def collapsed? + return @collapsed if defined?(@collapsed) + + @collapsed = !expanded && @diff.bytesize >= self.class.collapse_limit + end + + def collapse! + @diff = '' + @line_count = 0 + @collapsed = true + end + + private + + def init_from_rugged(rugged) + if rugged.is_a?(Rugged::Patch) + init_from_rugged_patch(rugged) + d = rugged.delta + else + d = rugged + end + + @new_path = encode!(d.new_file[:path]) + @old_path = encode!(d.old_file[:path]) + @a_mode = d.old_file[:mode].to_s(8) + @b_mode = d.new_file[:mode].to_s(8) + @new_file = d.added? + @renamed_file = d.renamed? + @deleted_file = d.deleted? + end + + def init_from_rugged_patch(patch) + # Don't bother initializing diffs that are too large. If a diff is + # binary we're not going to display anything so we skip the size check. + return if !patch.delta.binary? && prune_large_patch(patch) + + @diff = encode!(strip_diff_headers(patch.to_s)) + end + + def init_from_hash(hash) + raw_diff = hash.symbolize_keys + + SERIALIZE_KEYS.each do |key| + send(:"#{key}=", raw_diff[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend + end + end + + def init_from_gitaly(diff) + @diff = encode!(diff.patch) if diff.respond_to?(:patch) + @new_path = encode!(diff.to_path.dup) + @old_path = encode!(diff.from_path.dup) + @a_mode = diff.old_mode.to_s(8) + @b_mode = diff.new_mode.to_s(8) + @new_file = diff.from_id == BLANK_SHA + @renamed_file = diff.from_path != diff.to_path + @deleted_file = diff.to_id == BLANK_SHA + + collapse! if diff.respond_to?(:collapsed) && diff.collapsed + end + + def prune_diff_if_eligible + if too_large? + too_large! + elsif collapsed? + collapse! + end + end + + # If the patch surpasses any of the diff limits it calls the appropiate + # prune method and returns true. Otherwise returns false. + def prune_large_patch(patch) + size = 0 + + patch.each_hunk do |hunk| + hunk.each_line do |line| + size += line.content.bytesize + + if size >= self.class.size_limit + too_large! + return true + end + end + end + + if !expanded && size >= self.class.collapse_limit + collapse! + return true + end + + false + end + + # Strip out the information at the beginning of the patch's text to match + # Grit's output + def strip_diff_headers(diff_text) + # Delete everything up to the first line that starts with '---' or + # 'Binary' + diff_text.sub!(/\A.*?^(---|Binary)/m, '\1') + + if diff_text.start_with?('---', 'Binary') + diff_text + else + # If the diff_text did not contain a line starting with '---' or + # 'Binary', return the empty string. No idea why; we are just + # preserving behavior from before the refactor. + '' + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/diff_collection.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/diff_collection.rb new file mode 100644 index 000000000..6a601561c --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/diff_collection.rb @@ -0,0 +1,156 @@ +# Gitaly note: JV: no RPC's here. + +module Gitlab + module Git + class DiffCollection + include Enumerable + + DEFAULT_LIMITS = { max_files: 100, max_lines: 5000 }.freeze + + attr_reader :limits + + delegate :max_files, :max_lines, :max_bytes, :safe_max_files, :safe_max_lines, :safe_max_bytes, to: :limits + + def self.collection_limits(options = {}) + limits = {} + limits[:max_files] = options.fetch(:max_files, DEFAULT_LIMITS[:max_files]) + limits[:max_lines] = options.fetch(:max_lines, DEFAULT_LIMITS[:max_lines]) + limits[:max_bytes] = limits[:max_files] * 5.kilobytes # Average 5 KB per file + limits[:safe_max_files] = [limits[:max_files], DEFAULT_LIMITS[:max_files]].min + limits[:safe_max_lines] = [limits[:max_lines], DEFAULT_LIMITS[:max_lines]].min + limits[:safe_max_bytes] = limits[:safe_max_files] * 5.kilobytes # Average 5 KB per file + + OpenStruct.new(limits) + end + + def initialize(iterator, options = {}) + @iterator = iterator + @limits = self.class.collection_limits(options) + @enforce_limits = !!options.fetch(:limits, true) + @expanded = !!options.fetch(:expanded, true) + + @line_count = 0 + @byte_count = 0 + @overflow = false + @empty = true + @array = Array.new + end + + def each(&block) + @array.each(&block) + + return if @overflow + return if @iterator.nil? + + Gitlab::GitalyClient.migrate(:commit_raw_diffs) do |is_enabled| + if is_enabled && @iterator.is_a?(Gitlab::GitalyClient::DiffStitcher) + each_gitaly_patch(&block) + else + each_rugged_patch(&block) + end + end + + @populated = true + + # Allow iterator to be garbage-collected. It cannot be reused anyway. + @iterator = nil + end + + def empty? + any? # Make sure the iterator has been exercised + @empty + end + + def overflow? + populate! + !!@overflow + end + + def size + @size ||= count # forces a loop using each method + end + + def real_size + populate! + + if @overflow + "#{size}+" + else + size.to_s + end + end + + def decorate! + collection = each_with_index do |element, i| + @array[i] = yield(element) + end + collection + end + + alias_method :to_ary, :to_a + + private + + def populate! + return if @populated + + each { nil } # force a loop through all diffs + nil + end + + def over_safe_limits?(files) + files >= safe_max_files || @line_count > safe_max_lines || @byte_count >= safe_max_bytes + end + + def each_gitaly_patch + i = @array.length + + @iterator.each do |raw| + diff = Gitlab::Git::Diff.new(raw, expanded: !@enforce_limits || @expanded) + + if raw.overflow_marker + @overflow = true + break + end + + yield @array[i] = diff + i += 1 + end + end + + def each_rugged_patch + i = @array.length + + @iterator.each do |raw| + @empty = false + + if @enforce_limits && i >= max_files + @overflow = true + break + end + + expanded = !@enforce_limits || @expanded + + diff = Gitlab::Git::Diff.new(raw, expanded: expanded) + + if !expanded && over_safe_limits?(i) && diff.line_count > 0 + diff.collapse! + end + + @line_count += diff.line_count + @byte_count += diff.diff.bytesize + + if @enforce_limits && (@line_count >= max_lines || @byte_count >= max_bytes) + # This last Diff instance pushes us over the lines limit. We stop and + # discard it. + @overflow = true + break + end + + yield @array[i] = diff + i += 1 + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/env.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/env.rb new file mode 100644 index 000000000..f80193ac5 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/env.rb @@ -0,0 +1,40 @@ +# Gitaly note: JV: no RPC's here. + +module Gitlab + module Git + # Ephemeral (per request) storage for environment variables that some Git + # commands may need. + # + # For example, in pre-receive hooks, new objects are put in a temporary + # $GIT_OBJECT_DIRECTORY. Without it set, the new objects cannot be retrieved + # (this would break push rules for instance). + # + # This class is thread-safe via RequestStore. + class Env + WHITELISTED_GIT_VARIABLES = %w[ + GIT_OBJECT_DIRECTORY + GIT_ALTERNATE_OBJECT_DIRECTORIES + ].freeze + + def self.set(env) + return unless RequestStore.active? + + RequestStore.store[:gitlab_git_env] = whitelist_git_env(env) + end + + def self.all + return {} unless RequestStore.active? + + RequestStore.fetch(:gitlab_git_env) { {} } + end + + def self.[](key) + all[key] + end + + def self.whitelist_git_env(env) + env.select { |key, _| WHITELISTED_GIT_VARIABLES.include?(key.to_s) }.with_indifferent_access + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/gitmodules_parser.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/gitmodules_parser.rb new file mode 100644 index 000000000..4a43b9b44 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/gitmodules_parser.rb @@ -0,0 +1,79 @@ +# Gitaly note: JV: no RPC's here. + +module Gitlab + module Git + class GitmodulesParser + def initialize(content) + @content = content + end + + # Parses the contents of a .gitmodules file and returns a hash of + # submodule information, indexed by path. + def parse + reindex_by_path(get_submodules_by_name) + end + + private + + class State + def initialize + @result = {} + @current_submodule = nil + end + + def start_section(section) + # In some .gitmodules files (e.g. nodegit's), a header + # with the same name appears multiple times; we want to + # accumulate the configs across these + @current_submodule = @result[section] || { 'name' => section } + @result[section] = @current_submodule + end + + def set_attribute(attr, value) + @current_submodule[attr] = value + end + + def section_started? + !@current_submodule.nil? + end + + def submodules_by_name + @result + end + end + + def get_submodules_by_name + iterator = State.new + + @content.split("\n").each_with_object(iterator) do |text, iterator| + next if text =~ /^\s*#/ + + if text =~ /\A\[submodule "(?<name>[^"]+)"\]\z/ + iterator.start_section($~[:name]) + else + next unless iterator.section_started? + + next unless text =~ /\A\s*(?<key>\w+)\s*=\s*(?<value>.*)\z/ + + value = $~[:value].chomp + iterator.set_attribute($~[:key], value) + end + end + + iterator.submodules_by_name + end + + def reindex_by_path(submodules_by_name) + # Convert from an indexed by name to an array indexed by path + # If a submodule doesn't have a path, it is considered bogus + # and is ignored + submodules_by_name.each_with_object({}) do |(name, data), results| + path = data.delete 'path' + next unless path + + results[path] = data + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/hook.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/hook.rb new file mode 100644 index 000000000..8f0c377ef --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/hook.rb @@ -0,0 +1,93 @@ +# Gitaly note: JV: looks like this is only used by GitHooksService in +# app/services. We shouldn't bother migrating this until we know how +# GitHooksService will be migrated. + +module Gitlab + module Git + class Hook + GL_PROTOCOL = 'web'.freeze + attr_reader :name, :repo_path, :path + + def initialize(name, project) + @name = name + @project = project + @repo_path = project.repository.path + @path = File.join(repo_path.strip, 'hooks', name) + end + + def exists? + File.exist?(path) + end + + def trigger(gl_id, oldrev, newrev, ref) + return [true, nil] unless exists? + + Bundler.with_clean_env do + case name + when "pre-receive", "post-receive" + call_receive_hook(gl_id, oldrev, newrev, ref) + when "update" + call_update_hook(gl_id, oldrev, newrev, ref) + end + end + end + + private + + def call_receive_hook(gl_id, oldrev, newrev, ref) + changes = [oldrev, newrev, ref].join(" ") + + exit_status = false + exit_message = nil + + vars = { + 'GL_ID' => gl_id, + 'PWD' => repo_path, + 'GL_PROTOCOL' => GL_PROTOCOL, + 'GL_REPOSITORY' => Gitlab::GlRepository.gl_repository(@project, false) + } + + options = { + chdir: repo_path + } + + Open3.popen3(vars, path, options) do |stdin, stdout, stderr, wait_thr| + exit_status = true + stdin.sync = true + + # in git, pre- and post- receive hooks may just exit without + # reading stdin. We catch the exception to avoid a broken pipe + # warning + begin + # inject all the changes as stdin to the hook + changes.lines do |line| + stdin.puts line + end + rescue Errno::EPIPE + end + + stdin.close + + unless wait_thr.value == 0 + exit_status = false + exit_message = retrieve_error_message(stderr, stdout) + end + end + + [exit_status, exit_message] + end + + def call_update_hook(gl_id, oldrev, newrev, ref) + Dir.chdir(repo_path) do + stdout, stderr, status = Open3.capture3({ 'GL_ID' => gl_id }, path, ref, oldrev, newrev) + [status.success?, stderr.presence || stdout] + end + end + + def retrieve_error_message(stderr, stdout) + err_message = stderr.gets + err_message.blank? ? stdout.gets : err_message + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/index.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/index.rb new file mode 100644 index 000000000..db532600d --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/index.rb @@ -0,0 +1,145 @@ +# Gitaly note: JV: When the time comes I think we will want to copy this +# class into Gitaly. None of its methods look like they should be RPC's. +# The RPC's will be at a higher level. + +module Gitlab + module Git + class Index + IndexError = Class.new(StandardError) + + DEFAULT_MODE = 0o100644 + + ACTIONS = %w(create create_dir update move delete).freeze + + attr_reader :repository, :raw_index + + def initialize(repository) + @repository = repository + @raw_index = repository.rugged.index + end + + delegate :read_tree, :get, to: :raw_index + + def write_tree + raw_index.write_tree(repository.rugged) + end + + def dir_exists?(path) + raw_index.find { |entry| entry[:path].start_with?("#{path}/") } + end + + def create(options) + options = normalize_options(options) + + if get(options[:file_path]) + raise IndexError, "A file with this name already exists" + end + + add_blob(options) + end + + def create_dir(options) + options = normalize_options(options) + + if get(options[:file_path]) + raise IndexError, "A file with this name already exists" + end + + if dir_exists?(options[:file_path]) + raise IndexError, "A directory with this name already exists" + end + + options = options.dup + options[:file_path] += '/.gitkeep' + options[:content] = '' + + add_blob(options) + end + + def update(options) + options = normalize_options(options) + + file_entry = get(options[:file_path]) + unless file_entry + raise IndexError, "A file with this name doesn't exist" + end + + add_blob(options, mode: file_entry[:mode]) + end + + def move(options) + options = normalize_options(options) + + file_entry = get(options[:previous_path]) + unless file_entry + raise IndexError, "A file with this name doesn't exist" + end + + if get(options[:file_path]) + raise IndexError, "A file with this name already exists" + end + + raw_index.remove(options[:previous_path]) + + add_blob(options, mode: file_entry[:mode]) + end + + def delete(options) + options = normalize_options(options) + + unless get(options[:file_path]) + raise IndexError, "A file with this name doesn't exist" + end + + raw_index.remove(options[:file_path]) + end + + private + + def normalize_options(options) + options = options.dup + options[:file_path] = normalize_path(options[:file_path]) if options[:file_path] + options[:previous_path] = normalize_path(options[:previous_path]) if options[:previous_path] + options + end + + def normalize_path(path) + unless path + raise IndexError, "You must provide a file path" + end + + pathname = Gitlab::Git::PathHelper.normalize_path(path.dup) + + pathname.each_filename do |segment| + if segment == '..' + raise IndexError, 'Path cannot include directory traversal' + end + end + + pathname.to_s + end + + def add_blob(options, mode: nil) + content = options[:content] + unless content + raise IndexError, "You must provide content" + end + + content = Base64.decode64(content) if options[:encoding] == 'base64' + + detect = CharlockHolmes::EncodingDetector.new.detect(content) + unless detect && detect[:type] == :binary + # When writing to the repo directly as we are doing here, + # the `core.autocrlf` config isn't taken into account. + content.gsub!("\r\n", "\n") if repository.autocrlf + end + + oid = repository.rugged.write(content, :blob) + + raw_index.add(path: options[:file_path], oid: oid, mode: mode || DEFAULT_MODE) + rescue Rugged::IndexError => e + raise IndexError, e.message + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/path_helper.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/path_helper.rb new file mode 100644 index 000000000..42c80aabd --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/path_helper.rb @@ -0,0 +1,18 @@ +# Gitaly note: JV: no RPC's here. + +module Gitlab + module Git + class PathHelper + class << self + def normalize_path(filename) + # Strip all leading slashes so that //foo -> foo + filename[/^\/*/] = '' + + # Expand relative paths (e.g. foo/../bar) + filename = Pathname.new(filename) + filename.relative_path_from(Pathname.new('')) + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/popen.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/popen.rb new file mode 100644 index 000000000..25fa62ce4 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/popen.rb @@ -0,0 +1,28 @@ +# Gitaly note: JV: no RPC's here. + +require 'open3' + +module Gitlab + module Git + module Popen + def popen(cmd, path) + unless cmd.is_a?(Array) + raise "System commands must be given as an array of strings" + end + + vars = { "PWD" => path } + options = { chdir: path } + + @cmd_output = "" + @cmd_status = 0 + Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr| + @cmd_output << stdout.read + @cmd_output << stderr.read + @cmd_status = wait_thr.value.exitstatus + end + + [@cmd_output, @cmd_status] + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/ref.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/ref.rb new file mode 100644 index 000000000..372ce005b --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/ref.rb @@ -0,0 +1,51 @@ +# Gitaly note: JV: probably no RPC's here (just one interaction with Rugged). + +module Gitlab + module Git + class Ref + include Gitlab::EncodingHelper + + # Branch or tag name + # without "refs/tags|heads" prefix + attr_reader :name + + # Target sha. + # Usually it is commit sha but in case + # when tag reference on other tag it can be tag sha + attr_reader :target + + # Dereferenced target + # Commit object to which the Ref points to + attr_reader :dereferenced_target + + # Extract branch name from full ref path + # + # Ex. + # Ref.extract_branch_name('refs/heads/master') #=> 'master' + def self.extract_branch_name(str) + str.gsub(/\Arefs\/heads\//, '') + end + + # Gitaly: this method will probably be migrated indirectly via its call sites. + def self.dereference_object(object) + object = object.target while object.is_a?(Rugged::Tag::Annotation) + + object + end + + def initialize(repository, name, target, derefenced_target) + @name = Gitlab::Git.ref_name(name) + @dereferenced_target = derefenced_target + @target = if target.respond_to?(:oid) + target.oid + elsif target.respond_to?(:name) + target.name + elsif target.is_a? String + target + else + nil + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/repository.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/repository.rb new file mode 100644 index 000000000..7000b1730 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/repository.rb @@ -0,0 +1,979 @@ +# Gitlab::Git::Repository is a wrapper around native Rugged::Repository object +require 'tempfile' +require 'forwardable' +require "rubygems/package" + +module Gitlab + module Git + class Repository + include Gitlab::Git::Popen + + ALLOWED_OBJECT_DIRECTORIES_VARIABLES = %w[ + GIT_OBJECT_DIRECTORY + GIT_ALTERNATE_OBJECT_DIRECTORIES + ].freeze + SEARCH_CONTEXT_LINES = 3 + + NoRepository = Class.new(StandardError) + InvalidBlobName = Class.new(StandardError) + InvalidRef = Class.new(StandardError) + + # Full path to repo + attr_reader :path + + # Directory name of repo + attr_reader :name + + # Rugged repo object + attr_reader :rugged + + attr_reader :storage + + # 'path' must be the path to a _bare_ git repository, e.g. + # /path/to/my-repo.git + def initialize(storage, relative_path) + @storage = storage + @relative_path = relative_path + + storage_path = Gitlab.config.repositories.storages[@storage]['path'] + @path = File.join(storage_path, @relative_path) + @name = @relative_path.split("/").last + @attributes = Gitlab::Git::Attributes.new(path) + end + + delegate :empty?, + :bare?, + to: :rugged + + delegate :exists?, to: :gitaly_repository_client + + # Default branch in the repository + def root_ref + @root_ref ||= gitaly_migrate(:root_ref) do |is_enabled| + if is_enabled + gitaly_ref_client.default_branch_name + else + discover_default_branch + end + end + end + + def rugged + @rugged ||= circuit_breaker.perform do + Rugged::Repository.new(path, alternates: alternate_object_directories) + end + rescue Rugged::RepositoryError, Rugged::OSError + raise NoRepository.new('no repository for such path') + end + + def circuit_breaker + @circuit_breaker ||= Gitlab::Git::Storage::CircuitBreaker.for_storage(storage) + end + + # Returns an Array of branch names + # sorted by name ASC + def branch_names + gitaly_migrate(:branch_names) do |is_enabled| + if is_enabled + gitaly_ref_client.branch_names + else + branches.map(&:name) + end + end + end + + # Returns an Array of Branches + def branches + gitaly_migrate(:branches) do |is_enabled| + if is_enabled + gitaly_ref_client.branches + else + branches_filter + end + end + end + + def reload_rugged + @rugged = nil + end + + # Directly find a branch with a simple name (e.g. master) + # + # force_reload causes a new Rugged repository to be instantiated + # + # This is to work around a bug in libgit2 that causes in-memory refs to + # be stale/invalid when packed-refs is changed. + # See https://gitlab.com/gitlab-org/gitlab-ce/issues/15392#note_14538333 + def find_branch(name, force_reload = false) + reload_rugged if force_reload + + rugged_ref = rugged.branches[name] + if rugged_ref + target_commit = Gitlab::Git::Commit.find(self, rugged_ref.target) + Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target, target_commit) + end + end + + def local_branches(sort_by: nil) + gitaly_migrate(:local_branches) do |is_enabled| + if is_enabled + gitaly_ref_client.local_branches(sort_by: sort_by) + else + branches_filter(filter: :local, sort_by: sort_by) + end + end + end + + # Returns the number of valid branches + def branch_count + gitaly_migrate(:branch_names) do |is_enabled| + if is_enabled + gitaly_ref_client.count_branch_names + else + rugged.branches.count do |ref| + begin + ref.name && ref.target # ensures the branch is valid + + true + rescue Rugged::ReferenceError + false + end + end + end + end + end + + # Returns the number of valid tags + def tag_count + gitaly_migrate(:tag_names) do |is_enabled| + if is_enabled + gitaly_ref_client.count_tag_names + else + rugged.tags.count + end + end + end + + # Returns an Array of tag names + def tag_names + gitaly_migrate(:tag_names) do |is_enabled| + if is_enabled + gitaly_ref_client.tag_names + else + rugged.tags.map { |t| t.name } + end + end + end + + # Returns an Array of Tags + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/390 + def tags + gitaly_migrate(:tags) do |is_enabled| + if is_enabled + tags_from_gitaly + else + tags_from_rugged + end + end + end + + # Returns true if the given tag exists + # + # name - The name of the tag as a String. + def tag_exists?(name) + !!rugged.tags[name] + end + + # Returns true if the given branch exists + # + # name - The name of the branch as a String. + def branch_exists?(name) + rugged.branches.exists?(name) + + # If the branch name is invalid (e.g. ".foo") Rugged will raise an error. + # Whatever code calls this method shouldn't have to deal with that so + # instead we just return `false` (which is true since a branch doesn't + # exist when it has an invalid name). + rescue Rugged::ReferenceError + false + end + + # Returns an Array of branch and tag names + def ref_names + branch_names + tag_names + end + + def has_commits? + !empty? + end + + # Discovers the default branch based on the repository's available branches + # + # - If no branches are present, returns nil + # - If one branch is present, returns its name + # - If two or more branches are present, returns current HEAD or master or first branch + def discover_default_branch + names = branch_names + + return if names.empty? + + return names[0] if names.length == 1 + + if rugged_head + extracted_name = Ref.extract_branch_name(rugged_head.name) + + return extracted_name if names.include?(extracted_name) + end + + if names.include?('master') + 'master' + else + names[0] + end + end + + def rugged_head + rugged.head + rescue Rugged::ReferenceError + nil + end + + def archive_prefix(ref, sha) + project_name = self.name.chomp('.git') + "#{project_name}-#{ref.tr('/', '-')}-#{sha}" + end + + def archive_metadata(ref, storage_path, format = "tar.gz") + ref ||= root_ref + commit = Gitlab::Git::Commit.find(self, ref) + return {} if commit.nil? + + prefix = archive_prefix(ref, commit.id) + + { + 'RepoPath' => path, + 'ArchivePrefix' => prefix, + 'ArchivePath' => archive_file_path(prefix, storage_path, format), + 'CommitId' => commit.id + } + end + + def archive_file_path(name, storage_path, format = "tar.gz") + # Build file path + return nil unless name + + extension = + case format + when "tar.bz2", "tbz", "tbz2", "tb2", "bz2" + "tar.bz2" + when "tar" + "tar" + when "zip" + "zip" + else + # everything else should fall back to tar.gz + "tar.gz" + end + + file_name = "#{name}.#{extension}" + File.join(storage_path, self.name, file_name) + end + + # Return repo size in megabytes + def size + size = gitaly_migrate(:repository_size) do |is_enabled| + if is_enabled + size_by_gitaly + else + size_by_shelling_out + end + end + + (size.to_f / 1024).round(2) + end + + # Use the Rugged Walker API to build an array of commits. + # + # Usage. + # repo.log( + # ref: 'master', + # path: 'app/models', + # limit: 10, + # offset: 5, + # after: Time.new(2016, 4, 21, 14, 32, 10) + # ) + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/446 + def log(options) + default_options = { + limit: 10, + offset: 0, + path: nil, + follow: false, + skip_merges: false, + disable_walk: false, + after: nil, + before: nil + } + + options = default_options.merge(options) + options[:limit] ||= 0 + options[:offset] ||= 0 + + raw_log(options).map { |c| Commit.decorate(self, c) } + end + + # Used in gitaly-ruby + def raw_log(options) + actual_ref = options[:ref] || root_ref + begin + sha = sha_from_ref(actual_ref) + rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError + # Return an empty array if the ref wasn't found + return [] + end + + if log_using_shell?(options) + log_by_shell(sha, options) + else + log_by_walk(sha, options) + end + end + + def count_commits(options) + gitaly_migrate(:count_commits) do |is_enabled| + if is_enabled + count_commits_by_gitaly(options) + else + count_commits_by_shelling_out(options) + end + end + end + + def sha_from_ref(ref) + rev_parse_target(ref).oid + end + + # Return the object that +revspec+ points to. If +revspec+ is an + # annotated tag, then return the tag's target instead. + def rev_parse_target(revspec) + obj = rugged.rev_parse(revspec) + Ref.dereference_object(obj) + end + + # Return a collection of Rugged::Commits between the two revspec arguments. + # See http://git-scm.com/docs/git-rev-parse.html#_specifying_revisions for + # a detailed list of valid arguments. + # + # Gitaly note: JV: to be deprecated in favor of Commit.between + def rugged_commits_between(from, to) + walker = Rugged::Walker.new(rugged) + walker.sorting(Rugged::SORT_NONE | Rugged::SORT_REVERSE) + + sha_from = sha_from_ref(from) + sha_to = sha_from_ref(to) + + walker.push(sha_to) + walker.hide(sha_from) + + commits = walker.to_a + walker.reset + + commits + end + + # Counts the amount of commits between `from` and `to`. + def count_commits_between(from, to) + Commit.between(self, from, to).size + end + + # Returns the SHA of the most recent common ancestor of +from+ and +to+ + def merge_base_commit(from, to) + rugged.merge_base(from, to) + end + + # Gitaly note: JV: check gitlab-ee before removing this method. + def rugged_is_ancestor?(ancestor_id, descendant_id) + return false if ancestor_id.nil? || descendant_id.nil? + + merge_base_commit(ancestor_id, descendant_id) == ancestor_id + end + + # Returns true is +from+ is direct ancestor to +to+, otherwise false + def is_ancestor?(from, to) + gitaly_commit_client.is_ancestor(from, to) + end + + # Return an array of Diff objects that represent the diff + # between +from+ and +to+. See Diff::filter_diff_options for the allowed + # diff options. The +options+ hash can also include :break_rewrites to + # split larger rewrites into delete/add pairs. + def diff(from, to, options = {}, *paths) + Gitlab::Git::DiffCollection.new(diff_patches(from, to, options, *paths), options) + end + + # Returns a RefName for a given SHA + def ref_name_for_sha(ref_path, sha) + raise ArgumentError, "sha can't be empty" unless sha.present? + + gitaly_migrate(:find_ref_name) do |is_enabled| + if is_enabled + gitaly_ref_client.find_ref_name(sha, ref_path) + else + args = %W(#{Gitlab.config.git.bin_path} for-each-ref --count=1 #{ref_path} --contains #{sha}) + + # Not found -> ["", 0] + # Found -> ["b8d95eb4969eefacb0a58f6a28f6803f8070e7b9 commit\trefs/environments/production/77\n", 0] + Gitlab::Popen.popen(args, @path).first.split.last + end + end + end + + # Returns branch names collection that contains the special commit(SHA1 + # or name) + # + # Ex. + # repo.branch_names_contains('master') + # + def branch_names_contains(commit) + branches_contains(commit).map { |c| c.name } + end + + # Returns branch collection that contains the special commit(SHA1 or name) + # + # Ex. + # repo.branch_names_contains('master') + # + def branches_contains(commit) + commit_obj = rugged.rev_parse(commit) + parent = commit_obj.parents.first unless commit_obj.parents.empty? + + walker = Rugged::Walker.new(rugged) + + rugged.branches.select do |branch| + walker.push(branch.target_id) + walker.hide(parent) if parent + result = walker.any? { |c| c.oid == commit_obj.oid } + walker.reset + + result + end + end + + # Get refs hash which key is SHA1 + # and value is a Rugged::Reference + def refs_hash + # Initialize only when first call + if @refs_hash.nil? + @refs_hash = Hash.new { |h, k| h[k] = [] } + + rugged.references.each do |r| + # Symbolic/remote references may not have an OID; skip over them + target_oid = r.target.try(:oid) + if target_oid + sha = rev_parse_target(target_oid).oid + @refs_hash[sha] << r + end + end + end + @refs_hash + end + + # Lookup for rugged object by oid or ref name + def lookup(oid_or_ref_name) + rugged.rev_parse(oid_or_ref_name) + end + + # Returns url for submodule + # + # Ex. + # @repository.submodule_url_for('master', 'rack') + # # => git@localhost:rack.git + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/329 + def submodule_url_for(ref, path) + Gitlab::GitalyClient.migrate(:submodule_url_for) do |is_enabled| + if is_enabled + gitaly_submodule_url_for(ref, path) + else + if submodules(ref).any? + submodule = submodules(ref)[path] + submodule['url'] if submodule + end + end + end + end + + # Return total commits count accessible from passed ref + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/330 + def commit_count(ref) + gitaly_migrate(:commit_count) do |is_enabled| + if is_enabled + gitaly_commit_client.commit_count(ref) + else + walker = Rugged::Walker.new(rugged) + walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE) + oid = rugged.rev_parse_oid(ref) + walker.push(oid) + walker.count + end + end + end + + # Mimic the `git clean` command and recursively delete untracked files. + # Valid keys that can be passed in the +options+ hash are: + # + # :d - Remove untracked directories + # :f - Remove untracked directories that are managed by a different + # repository + # :x - Remove ignored files + # + # The value in +options+ must evaluate to true for an option to take + # effect. + # + # Examples: + # + # repo.clean(d: true, f: true) # Enable the -d and -f options + # + # repo.clean(d: false, x: true) # -x is enabled, -d is not + def clean(options = {}) + strategies = [:remove_untracked] + strategies.push(:force) if options[:f] + strategies.push(:remove_ignored) if options[:x] + + # TODO: implement this method + end + + # Delete the specified branch from the repository + def delete_branch(branch_name) + rugged.branches.delete(branch_name) + end + + # Create a new branch named **ref+ based on **stat_point+, HEAD by default + # + # Examples: + # create_branch("feature") + # create_branch("other-feature", "master") + def create_branch(ref, start_point = "HEAD") + rugged_ref = rugged.branches.create(ref, start_point) + target_commit = Gitlab::Git::Commit.find(self, rugged_ref.target) + Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target, target_commit) + rescue Rugged::ReferenceError => e + raise InvalidRef.new("Branch #{ref} already exists") if e.to_s =~ /'refs\/heads\/#{ref}'/ + raise InvalidRef.new("Invalid reference #{start_point}") + end + + # Return an array of this repository's remote names + def remote_names + rugged.remotes.each_name.to_a + end + + # Delete the specified remote from this repository. + def remote_delete(remote_name) + rugged.remotes.delete(remote_name) + end + + # Add a new remote to this repository. Returns a Rugged::Remote object + def remote_add(remote_name, url) + rugged.remotes.create(remote_name, url) + end + + # Update the specified remote using the values in the +options+ hash + # + # Example + # repo.update_remote("origin", url: "path/to/repo") + def remote_update(remote_name, options = {}) + # TODO: Implement other remote options + rugged.remotes.set_url(remote_name, options[:url]) if options[:url] + end + + # Fetch the specified remote + def fetch(remote_name) + rugged.remotes[remote_name].fetch + end + + # Push +*refspecs+ to the remote identified by +remote_name+. + def push(remote_name, *refspecs) + rugged.remotes[remote_name].push(refspecs) + end + + AUTOCRLF_VALUES = { + "true" => true, + "false" => false, + "input" => :input + }.freeze + + def autocrlf + AUTOCRLF_VALUES[rugged.config['core.autocrlf']] + end + + def autocrlf=(value) + rugged.config['core.autocrlf'] = AUTOCRLF_VALUES.invert[value] + end + + # Returns result like "git ls-files" , recursive and full file path + # + # Ex. + # repo.ls_files('master') + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/327 + def ls_files(ref) + actual_ref = ref || root_ref + + begin + sha_from_ref(actual_ref) + rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError + # Return an empty array if the ref wasn't found + return [] + end + + cmd = %W(#{Gitlab.config.git.bin_path} --git-dir=#{path} ls-tree) + cmd += %w(-r) + cmd += %w(--full-tree) + cmd += %w(--full-name) + cmd += %W(-- #{actual_ref}) + + raw_output = IO.popen(cmd, &:read).split("\n").map do |f| + stuff, path = f.split("\t") + _mode, type, _sha = stuff.split(" ") + path if type == "blob" + # Contain only blob type + end + + raw_output.compact + end + + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/328 + def copy_gitattributes(ref) + begin + commit = lookup(ref) + rescue Rugged::ReferenceError + raise InvalidRef.new("Ref #{ref} is invalid") + end + + # Create the paths + info_dir_path = File.join(path, 'info') + info_attributes_path = File.join(info_dir_path, 'attributes') + + begin + # Retrieve the contents of the blob + gitattributes_content = blob_content(commit, '.gitattributes') + rescue InvalidBlobName + # No .gitattributes found. Should now remove any info/attributes and return + File.delete(info_attributes_path) if File.exist?(info_attributes_path) + return + end + + # Create the info directory if needed + Dir.mkdir(info_dir_path) unless File.directory?(info_dir_path) + + # Write the contents of the .gitattributes file to info/attributes + # Use binary mode to prevent Rails from converting ASCII-8BIT to UTF-8 + File.open(info_attributes_path, "wb") do |file| + file.write(gitattributes_content) + end + end + + # Returns the Git attributes for the given file path. + # + # See `Gitlab::Git::Attributes` for more information. + def attributes(path) + @attributes.attributes(path) + end + + def languages(ref = nil) + Gitlab::GitalyClient.migrate(:commit_languages) do |is_enabled| + if is_enabled + gitaly_commit_client.languages(ref) + else + ref ||= rugged.head.target_id + languages = Linguist::Repository.new(rugged, ref).languages + total = languages.map(&:last).sum + + languages = languages.map do |language| + name, share = language + color = Linguist::Language[name].color || "##{Digest::SHA256.hexdigest(name)[0...6]}" + { + value: (share.to_f * 100 / total).round(2), + label: name, + color: color, + highlight: color + } + end + + languages.sort do |x, y| + y[:value] <=> x[:value] + end + end + end + end + + def gitaly_repository + Gitlab::GitalyClient::Util.repository(@storage, @relative_path) + end + + def gitaly_ref_client + @gitaly_ref_client ||= Gitlab::GitalyClient::RefService.new(self) + end + + def gitaly_commit_client + @gitaly_commit_client ||= Gitlab::GitalyClient::CommitService.new(self) + end + + def gitaly_repository_client + @gitaly_repository_client ||= Gitlab::GitalyClient::RepositoryService.new(self) + end + + def gitaly_migrate(method, &block) + Gitlab::GitalyClient.migrate(method, &block) + rescue GRPC::NotFound => e + raise NoRepository.new(e) + rescue GRPC::BadStatus => e + raise CommandError.new(e) + end + + private + + # Gitaly note: JV: Trying to get rid of the 'filter' option so we can implement this with 'git'. + def branches_filter(filter: nil, sort_by: nil) + branches = rugged.branches.each(filter).map do |rugged_ref| + begin + target_commit = Gitlab::Git::Commit.find(self, rugged_ref.target) + Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target, target_commit) + rescue Rugged::ReferenceError + # Omit invalid branch + end + end.compact + + sort_branches(branches, sort_by) + end + + def log_using_shell?(options) + options[:path].present? || + options[:disable_walk] || + options[:skip_merges] || + options[:after] || + options[:before] + end + + def log_by_walk(sha, options) + walk_options = { + show: sha, + sort: Rugged::SORT_NONE, + limit: options[:limit], + offset: options[:offset] + } + Rugged::Walker.walk(rugged, walk_options).to_a + end + + # Gitaly note: JV: although #log_by_shell shells out to Git I think the + # complexity is such that we should migrate it as Ruby before trying to + # do it in Go. + def log_by_shell(sha, options) + limit = options[:limit].to_i + offset = options[:offset].to_i + use_follow_flag = options[:follow] && options[:path].present? + + # We will perform the offset in Ruby because --follow doesn't play well with --skip. + # See: https://gitlab.com/gitlab-org/gitlab-ce/issues/3574#note_3040520 + offset_in_ruby = use_follow_flag && options[:offset].present? + limit += offset if offset_in_ruby + + cmd = %W[#{Gitlab.config.git.bin_path} --git-dir=#{path} log] + cmd << "--max-count=#{limit}" + cmd << '--format=%H' + cmd << "--skip=#{offset}" unless offset_in_ruby + cmd << '--follow' if use_follow_flag + cmd << '--no-merges' if options[:skip_merges] + cmd << "--after=#{options[:after].iso8601}" if options[:after] + cmd << "--before=#{options[:before].iso8601}" if options[:before] + cmd << sha + + # :path can be a string or an array of strings + if options[:path].present? + cmd << '--' + cmd += Array(options[:path]) + end + + raw_output = IO.popen(cmd) { |io| io.read } + lines = offset_in_ruby ? raw_output.lines.drop(offset) : raw_output.lines + + lines.map! { |c| Rugged::Commit.new(rugged, c.strip) } + end + + # We are trying to deprecate this method because it does a lot of work + # but it seems to be used only to look up submodule URL's. + # https://gitlab.com/gitlab-org/gitaly/issues/329 + def submodules(ref) + commit = rev_parse_target(ref) + return {} unless commit + + begin + content = blob_content(commit, ".gitmodules") + rescue InvalidBlobName + return {} + end + + parser = GitmodulesParser.new(content) + fill_submodule_ids(commit, parser.parse) + end + + def gitaly_submodule_url_for(ref, path) + # We don't care about the contents so 1 byte is enough. Can't request 0 bytes, 0 means unlimited. + commit_object = gitaly_commit_client.tree_entry(ref, path, 1) + + return unless commit_object && commit_object.type == :COMMIT + + gitmodules = gitaly_commit_client.tree_entry(ref, '.gitmodules', Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE) + found_module = GitmodulesParser.new(gitmodules.data).parse[path] + + found_module && found_module['url'] + end + + def alternate_object_directories + Gitlab::Git::Env.all.values_at(*ALLOWED_OBJECT_DIRECTORIES_VARIABLES).compact + end + + # Get the content of a blob for a given commit. If the blob is a commit + # (for submodules) then return the blob's OID. + def blob_content(commit, blob_name) + blob_entry = tree_entry(commit, blob_name) + + unless blob_entry + raise InvalidBlobName.new("Invalid blob name: #{blob_name}") + end + + case blob_entry[:type] + when :commit + blob_entry[:oid] + when :tree + raise InvalidBlobName.new("#{blob_name} is a tree, not a blob") + when :blob + rugged.lookup(blob_entry[:oid]).content + end + end + + # Fill in the 'id' field of a submodule hash from its values + # as-of +commit+. Return a Hash consisting only of entries + # from the submodule hash for which the 'id' field is filled. + def fill_submodule_ids(commit, submodule_data) + submodule_data.each do |path, data| + id = begin + blob_content(commit, path) + rescue InvalidBlobName + nil + end + data['id'] = id + end + submodule_data.select { |path, data| data['id'] } + end + + # Find the entry for +path+ in the tree for +commit+ + def tree_entry(commit, path) + pathname = Pathname.new(path) + first = true + tmp_entry = nil + + pathname.each_filename do |dir| + if first + tmp_entry = commit.tree[dir] + first = false + elsif tmp_entry.nil? + return nil + else + begin + tmp_entry = rugged.lookup(tmp_entry[:oid]) + rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError + return nil + end + + return nil unless tmp_entry.type == :tree + tmp_entry = tmp_entry[dir] + end + end + + tmp_entry + end + + # Return the Rugged patches for the diff between +from+ and +to+. + def diff_patches(from, to, options = {}, *paths) + options ||= {} + break_rewrites = options[:break_rewrites] + actual_options = Gitlab::Git::Diff.filter_diff_options(options.merge(paths: paths)) + + diff = rugged.diff(from, to, actual_options) + diff.find_similar!(break_rewrites: break_rewrites) + diff.each_patch + end + + def sort_branches(branches, sort_by) + case sort_by + when 'name' + branches.sort_by(&:name) + when 'updated_desc' + branches.sort do |a, b| + b.dereferenced_target.committed_date <=> a.dereferenced_target.committed_date + end + when 'updated_asc' + branches.sort do |a, b| + a.dereferenced_target.committed_date <=> b.dereferenced_target.committed_date + end + else + branches + end + end + + def tags_from_rugged + rugged.references.each("refs/tags/*").map do |ref| + message = nil + + if ref.target.is_a?(Rugged::Tag::Annotation) + tag_message = ref.target.message + + if tag_message.respond_to?(:chomp) + message = tag_message.chomp + end + end + + target_commit = Gitlab::Git::Commit.find(self, ref.target) + Gitlab::Git::Tag.new(self, ref.name, ref.target, target_commit, message) + end.sort_by(&:name) + end + + def last_commit_for_path_by_rugged(sha, path) + sha = last_commit_id_for_path(sha, path) + commit(sha) + end + + def tags_from_gitaly + gitaly_ref_client.tags + end + + def size_by_shelling_out + popen(%w(du -sk), path).first.strip.to_i + end + + def size_by_gitaly + gitaly_repository_client.repository_size + end + + def count_commits_by_gitaly(options) + gitaly_commit_client.commit_count(options[:ref], options) + end + + def count_commits_by_shelling_out(options) + cmd = %W[#{Gitlab.config.git.bin_path} --git-dir=#{path} rev-list] + cmd << "--after=#{options[:after].iso8601}" if options[:after] + cmd << "--before=#{options[:before].iso8601}" if options[:before] + cmd += %W[--count #{options[:ref]}] + cmd += %W[-- #{options[:path]}] if options[:path].present? + + raw_output = IO.popen(cmd) { |io| io.read } + + raw_output.to_i + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/rev_list.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/rev_list.rb new file mode 100644 index 000000000..2b5785a1f --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/rev_list.rb @@ -0,0 +1,47 @@ +# Gitaly note: JV: will probably be migrated indirectly by migrating the call sites. + +module Gitlab + module Git + class RevList + attr_reader :oldrev, :newrev, :path_to_repo + + def initialize(path_to_repo:, newrev:, oldrev: nil) + @oldrev = oldrev + @newrev = newrev + @path_to_repo = path_to_repo + end + + # This method returns an array of new references + def new_refs + execute([*base_args, newrev, '--not', '--all']) + end + + # This methods returns an array of missed references + # + # Should become obsolete after https://gitlab.com/gitlab-org/gitaly/issues/348. + def missed_ref + execute([*base_args, '--max-count=1', oldrev, "^#{newrev}"]) + end + + private + + def execute(args) + output, status = Gitlab::Popen.popen(args, nil, Gitlab::Git::Env.all.stringify_keys) + + unless status.zero? + raise "Got a non-zero exit code while calling out `#{args.join(' ')}`." + end + + output.split("\n") + end + + def base_args + [ + Gitlab.config.git.bin_path, + "--git-dir=#{path_to_repo}", + 'rev-list' + ] + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/storage.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/storage.rb new file mode 100644 index 000000000..e28be4b8a --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/storage.rb @@ -0,0 +1,22 @@ +module Gitlab + module Git + module Storage + class Inaccessible < StandardError + attr_reader :retry_after + + def initialize(message = nil, retry_after = nil) + super(message) + @retry_after = retry_after + end + end + + CircuitOpen = Class.new(Inaccessible) + + REDIS_KEY_PREFIX = 'storage_accessible:'.freeze + + def self.redis + Gitlab::Redis::SharedState + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/storage/circuit_breaker.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/storage/circuit_breaker.rb new file mode 100644 index 000000000..9ea9367d4 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/storage/circuit_breaker.rb @@ -0,0 +1,144 @@ +module Gitlab + module Git + module Storage + class CircuitBreaker + FailureInfo = Struct.new(:last_failure, :failure_count) + + attr_reader :storage, + :hostname, + :storage_path, + :failure_count_threshold, + :failure_wait_time, + :failure_reset_time, + :storage_timeout + + delegate :last_failure, :failure_count, to: :failure_info + + def self.reset_all! + pattern = "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}*" + + Gitlab::Git::Storage.redis.with do |redis| + all_storage_keys = redis.keys(pattern) + redis.del(*all_storage_keys) unless all_storage_keys.empty? + end + + RequestStore.delete(:circuitbreaker_cache) + end + + def self.for_storage(storage) + cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do + Hash.new do |hash, storage_name| + hash[storage_name] = new(storage_name) + end + end + + cached_circuitbreakers[storage] + end + + def initialize(storage, hostname = Gitlab::Environment.hostname) + @storage = storage + @hostname = hostname + + config = Gitlab.config.repositories.storages[@storage] + @storage_path = config['path'] + @failure_count_threshold = config['failure_count_threshold'] + @failure_wait_time = config['failure_wait_time'] + @failure_reset_time = config['failure_reset_time'] + @storage_timeout = config['storage_timeout'] + end + + def perform + return yield unless Feature.enabled?('git_storage_circuit_breaker') + + check_storage_accessible! + + yield + end + + def circuit_broken? + return false if no_failures? + + recent_failure = last_failure > failure_wait_time.seconds.ago + too_many_failures = failure_count > failure_count_threshold + + recent_failure || too_many_failures + end + + # Memoizing the `storage_available` call means we only do it once per + # request when the storage is available. + # + # When the storage appears not available, and the memoized value is `false` + # we might want to try again. + def storage_available? + return @storage_available if @storage_available + + if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck + .storage_available?(storage_path, storage_timeout) + track_storage_accessible + else + track_storage_inaccessible + end + + @storage_available + end + + def check_storage_accessible! + if circuit_broken? + raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_wait_time) + end + + unless storage_available? + raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time) + end + end + + def no_failures? + last_failure.blank? && failure_count == 0 + end + + def track_storage_inaccessible + @failure_info = FailureInfo.new(Time.now, failure_count + 1) + + Gitlab::Git::Storage.redis.with do |redis| + redis.pipelined do + redis.hset(cache_key, :last_failure, last_failure.to_i) + redis.hincrby(cache_key, :failure_count, 1) + redis.expire(cache_key, failure_reset_time) + end + end + end + + def track_storage_accessible + return if no_failures? + + @failure_info = FailureInfo.new(nil, 0) + + Gitlab::Git::Storage.redis.with do |redis| + redis.pipelined do + redis.hset(cache_key, :last_failure, nil) + redis.hset(cache_key, :failure_count, 0) + end + end + end + + def failure_info + @failure_info ||= get_failure_info + end + + def get_failure_info + last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis| + redis.hmget(cache_key, :last_failure, :failure_count) + end + + last_failure = Time.at(last_failure.to_i) if last_failure.present? + + FailureInfo.new(last_failure, failure_count.to_i) + end + + def cache_key + @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}" + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/storage/forked_storage_check.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/storage/forked_storage_check.rb new file mode 100644 index 000000000..91d8241f1 --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/storage/forked_storage_check.rb @@ -0,0 +1,55 @@ +module Gitlab + module Git + module Storage + module ForkedStorageCheck + extend self + + def storage_available?(path, timeout_seconds = 5) + status = timeout_check(path, timeout_seconds) + + status.success? + end + + def timeout_check(path, timeout_seconds) + filesystem_check_pid = check_filesystem_in_process(path) + + deadline = timeout_seconds.seconds.from_now.utc + wait_time = 0.01 + status = nil + + while status.nil? + if deadline > Time.now.utc + sleep(wait_time) + _pid, status = Process.wait2(filesystem_check_pid, Process::WNOHANG) + else + Process.kill('KILL', filesystem_check_pid) + # Blocking wait, so we are sure the process is gone before continuing + _pid, status = Process.wait2(filesystem_check_pid) + end + end + + status + end + + # This will spawn a new 2 processes to do the check: + # The outer child (waiter) will spawn another child process (stater). + # + # The stater is the process is performing the actual filesystem check + # the check might hang if the filesystem is acting up. + # In this case we will send a `KILL` to the waiter, which will still + # be responsive while the stater is hanging. + def check_filesystem_in_process(path) + spawn('ruby', '-e', ruby_check, path, [:out, :err] => '/dev/null') + end + + def ruby_check + <<~RUBY_FILESYSTEM_CHECK + inner_pid = fork { File.stat(ARGV.first) } + Process.waitpid(inner_pid) + exit $?.exitstatus + RUBY_FILESYSTEM_CHECK + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/storage/health.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/storage/health.rb new file mode 100644 index 000000000..2d723147f --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/storage/health.rb @@ -0,0 +1,91 @@ +module Gitlab + module Git + module Storage + class Health + attr_reader :storage_name, :info + + def self.pattern_for_storage(storage_name) + "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage_name}:*" + end + + def self.for_all_storages + storage_names = Gitlab.config.repositories.storages.keys + results_per_storage = nil + + Gitlab::Git::Storage.redis.with do |redis| + keys_per_storage = all_keys_for_storages(storage_names, redis) + results_per_storage = load_for_keys(keys_per_storage, redis) + end + + results_per_storage.map do |name, info| + info.each { |i| i[:failure_count] = i[:failure_count].value.to_i } + new(name, info) + end + end + + def self.all_keys_for_storages(storage_names, redis) + keys_per_storage = {} + + redis.pipelined do + storage_names.each do |storage_name| + pattern = pattern_for_storage(storage_name) + + keys_per_storage[storage_name] = redis.keys(pattern) + end + end + + keys_per_storage + end + + def self.load_for_keys(keys_per_storage, redis) + info_for_keys = {} + + redis.pipelined do + keys_per_storage.each do |storage_name, keys_future| + info_for_storage = keys_future.value.map do |key| + { name: key, failure_count: redis.hget(key, :failure_count) } + end + + info_for_keys[storage_name] = info_for_storage + end + end + + info_for_keys + end + + def self.for_failing_storages + for_all_storages.select(&:failing?) + end + + def initialize(storage_name, info) + @storage_name = storage_name + @info = info + end + + def failing_info + @failing_info ||= info.select { |info_for_host| info_for_host[:failure_count] > 0 } + end + + def failing? + failing_info.any? + end + + def failing_on_hosts + @failing_on_hosts ||= failing_info.map do |info_for_host| + info_for_host[:name].split(':').last + end + end + + def failing_circuit_breakers + @failing_circuit_breakers ||= failing_on_hosts.map do |hostname| + CircuitBreaker.new(storage_name, hostname) + end + end + + def total_failures + @total_failures ||= failing_info.sum { |info_for_host| info_for_host[:failure_count] } + end + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/tag.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/tag.rb new file mode 100644 index 000000000..bc4e160dc --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/tag.rb @@ -0,0 +1,19 @@ +# Gitaly note: JV: no RPC's here. +# +module Gitlab + module Git + class Tag < Ref + attr_reader :object_sha + + def initialize(repository, name, target, target_commit, message = nil) + super(repository, name, target, target_commit) + + @message = message + end + + def message + encode! @message + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/tree.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/tree.rb new file mode 100644 index 000000000..8e959c57c --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/tree.rb @@ -0,0 +1,125 @@ +# Gitaly note: JV: needs 1 RPC, migration is in progress. + +module Gitlab + module Git + class Tree + include Gitlab::EncodingHelper + + attr_accessor :id, :root_id, :name, :path, :type, + :mode, :commit_id, :submodule_url + + class << self + # Get list of tree objects + # for repository based on commit sha and path + # Uses rugged for raw objects + # + # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/320 + def where(repository, sha, path = nil) + path = nil if path == '' || path == '/' + + Gitlab::GitalyClient.migrate(:tree_entries) do |is_enabled| + if is_enabled + client = Gitlab::GitalyClient::CommitService.new(repository) + client.tree_entries(repository, sha, path) + else + tree_entries_from_rugged(repository, sha, path) + end + end + end + + private + + # Recursive search of tree id for path + # + # Ex. + # blog/ # oid: 1a + # app/ # oid: 2a + # models/ # oid: 3a + # views/ # oid: 4a + # + # + # Tree.find_id_by_path(repo, '1a', 'app/models') # => '3a' + # + def find_id_by_path(repository, root_id, path) + root_tree = repository.lookup(root_id) + path_arr = path.split('/') + + entry = root_tree.find do |entry| + entry[:name] == path_arr[0] && entry[:type] == :tree + end + + return nil unless entry + + if path_arr.size > 1 + path_arr.shift + find_id_by_path(repository, entry[:oid], path_arr.join('/')) + else + entry[:oid] + end + end + + def tree_entries_from_rugged(repository, sha, path) + commit = repository.lookup(sha) + root_tree = commit.tree + + tree = if path + id = find_id_by_path(repository, root_tree.oid, path) + if id + repository.lookup(id) + else + [] + end + else + root_tree + end + + tree.map do |entry| + new( + id: entry[:oid], + root_id: root_tree.oid, + name: entry[:name], + type: entry[:type], + mode: entry[:filemode].to_s(8), + path: path ? File.join(path, entry[:name]) : entry[:name], + commit_id: sha + ) + end + end + end + + def initialize(options) + %w(id root_id name path type mode commit_id).each do |key| + self.send("#{key}=", options[key.to_sym]) + end + end + + def name + encode! @name + end + + def path + encode! @path + end + + def dir? + type == :tree + end + + def file? + type == :blob + end + + def submodule? + type == :commit + end + + def readme? + name =~ /^readme/i + end + + def contributing? + name =~ /^contributing/i + end + end + end +end diff --git a/ruby/vendor/gitlab_git/lib/gitlab/git/util.rb b/ruby/vendor/gitlab_git/lib/gitlab/git/util.rb new file mode 100644 index 000000000..4708f22dc --- /dev/null +++ b/ruby/vendor/gitlab_git/lib/gitlab/git/util.rb @@ -0,0 +1,20 @@ +# Gitaly note: JV: no RPC's here. + +module Gitlab + module Git + module Util + LINE_SEP = "\n".freeze + + def self.count_lines(string) + case string[-1] + when nil + 0 + when LINE_SEP + string.count(LINE_SEP) + else + string.count(LINE_SEP) + 1 + end + end + end + end +end |