From fb6a4e21d4a5142e72ac5864fae29192cfc59d64 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Thu, 21 Feb 2019 13:07:26 -0800 Subject: Bring back Rugged implementation of find_commit This brings back some of the changes in https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/20339. For users using Gitaly on top of NFS, accessing the Git data directly via Rugged is more performant than Gitaly. This merge request introduces the feature flag `rugged_find_commit` to activate Rugged paths. There are also Rake tasks `gitlab:features:enable_rugged` and `gitlab:features:disable_rugged` to enable/disable these feature flags altogether. Part of four Rugged changes identified in https://gitlab.com/gitlab-org/gitlab-ce/issues/57317. --- changelogs/unreleased/sh-rugged-find-commit.yml | 5 ++ doc/administration/high_availability/nfs.md | 22 ++++++++ doc/development/gitaly.md | 38 +++++++++++++ lib/gitlab/git/commit.rb | 17 +++++- lib/gitlab/git/ref.rb | 1 + lib/gitlab/git/repository.rb | 1 + lib/gitlab/git/rugged_impl/commit.rb | 65 ++++++++++++++++++++++ lib/gitlab/git/rugged_impl/ref.rb | 20 +++++++ lib/gitlab/git/rugged_impl/repository.rb | 48 ++++++++++++++++ lib/gitlab/gitaly_client/storage_settings.rb | 8 +++ lib/tasks/gitlab/features.rake | 24 ++++++++ scripts/lint-rugged | 8 ++- spec/lib/gitlab/git/commit_spec.rb | 16 +++++- .../gitlab/gitaly_client/storage_settings_spec.rb | 10 ++++ spec/spec_helper.rb | 9 ++- 15 files changed, 286 insertions(+), 6 deletions(-) create mode 100644 changelogs/unreleased/sh-rugged-find-commit.yml create mode 100644 lib/gitlab/git/rugged_impl/commit.rb create mode 100644 lib/gitlab/git/rugged_impl/ref.rb create mode 100644 lib/gitlab/git/rugged_impl/repository.rb create mode 100644 lib/tasks/gitlab/features.rake diff --git a/changelogs/unreleased/sh-rugged-find-commit.yml b/changelogs/unreleased/sh-rugged-find-commit.yml new file mode 100644 index 00000000000..85b5936c9ba --- /dev/null +++ b/changelogs/unreleased/sh-rugged-find-commit.yml @@ -0,0 +1,5 @@ +--- +title: Bring back Rugged implementation of find_commit +merge_request: 25477 +author: +type: fixed diff --git a/doc/administration/high_availability/nfs.md b/doc/administration/high_availability/nfs.md index 74b0e2c8184..78ebf8a083b 100644 --- a/doc/administration/high_availability/nfs.md +++ b/doc/administration/high_availability/nfs.md @@ -37,6 +37,28 @@ options: circumstances it could lead to data loss if a failure occurs before data has synced. +### Improving NFS performance with GitLab + +If you are using NFS to share Git data, we recommend that you enable a +number of feature flags that will allow GitLab application processes to +access Git data directly instead of going through the [Gitaly +service](../gitaly/index.md). Depending on your workload and disk +performance, these flags may help improve performance. See [the +issue](https://gitlab.com/gitlab-org/gitlab-ce/issues/57317) for more +details. + +To do this, run the Rake task: + +```sh +gitlab-rake gitlab:features:enable_rugged +``` + +If you need to undo this setting for some reason, run: + +```sh +gitlab-rake gitlab:features:disable_rugged +``` + ### Known issues On some customer systems, we have seen NFS clients slow precipitously due to diff --git a/doc/development/gitaly.md b/doc/development/gitaly.md index d5fc403bf8b..d52920b9a5d 100644 --- a/doc/development/gitaly.md +++ b/doc/development/gitaly.md @@ -56,6 +56,44 @@ If your test-suite is failing with Gitaly issues, as a first step, try running: rm -rf tmp/tests/gitaly ``` +## Legacy Rugged code + +While Gitaly can handle all Git access, many of GitLab customers still +run Gitaly atop NFS. The legacy Rugged implementation for Git calls may +be faster than the Gitaly RPC due to N+1 Gitaly calls and other +reasons. See [the +issue](https://gitlab.com/gitlab-org/gitlab-ce/issues/57317) for more +details. + +Until GitLab has eliminated most of these inefficiencies or the use of +NFS is discontinued for Git data, Rugged implementations of some of the +most commonly-used RPCs can be enabled via feature flags: + +* `rugged_find_commit` +* `rugged_get_tree_entries` +* `rugged_tree_entry` +* `rugged_commit_is_ancestor` + +A convenience Rake task can be used to enable or disable these flags +all together. To enable: + +```sh +bundle exec rake gitlab:features:enable_rugged +``` + +To disable: + +```sh +bundle exec rake gitlab:features:disable_rugged +``` + +Most of this code exists in the `lib/gitlab/git/rugged_impl` directory. + +NOTE: **Note:** You should NOT need to add or modify code related to +Rugged unless explicitly discussed with the [Gitaly +Team](https://gitlab.com/groups/gl-gitaly/group_members). This code will +NOT work on GitLab.com or other GitLab instances that do not use NFS. + ## `TooManyInvocationsError` errors During development and testing, you may experience `Gitlab::GitalyClient::TooManyInvocationsError` failures. diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index 5863815ca85..c5688cda565 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -5,6 +5,7 @@ module Gitlab module Git class Commit include Gitlab::EncodingHelper + prepend Gitlab::Git::RuggedImpl::Commit extend Gitlab::Git::WrapsGitalyErrors attr_accessor :raw_commit, :head @@ -62,15 +63,19 @@ module Gitlab # This saves us an RPC round trip. return nil if commit_id.include?(':') - commit = wrapped_gitaly_errors do - repo.gitaly_commit_client.find_commit(commit_id) - end + commit = find_commit(repo, commit_id) decorate(repo, commit) if commit rescue Gitlab::Git::CommandError, Gitlab::Git::Repository::NoRepository, ArgumentError nil end + def find_commit(repo, commit_id) + wrapped_gitaly_errors do + repo.gitaly_commit_client.find_commit(commit_id) + end + end + # Get last commit for HEAD # # Ex. @@ -185,6 +190,10 @@ module Gitlab @repository = repository @head = head + init_commit(raw_commit) + end + + def init_commit(raw_commit) case raw_commit when Hash init_from_hash(raw_commit) @@ -400,3 +409,5 @@ module Gitlab end end end + +Gitlab::Git::Commit.singleton_class.prepend Gitlab::Git::RuggedImpl::Commit::ClassMethods diff --git a/lib/gitlab/git/ref.rb b/lib/gitlab/git/ref.rb index eec91194949..47cfb483509 100644 --- a/lib/gitlab/git/ref.rb +++ b/lib/gitlab/git/ref.rb @@ -4,6 +4,7 @@ module Gitlab module Git class Ref include Gitlab::EncodingHelper + include Gitlab::Git::RuggedImpl::Ref # Branch or tag name # without "refs/tags|heads" prefix diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index aea132a3dd9..2bfff8397e8 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -11,6 +11,7 @@ module Gitlab include Gitlab::Git::WrapsGitalyErrors include Gitlab::EncodingHelper include Gitlab::Utils::StrongMemoize + include Gitlab::Git::RuggedImpl::Repository SEARCH_CONTEXT_LINES = 3 REV_LIST_COMMIT_LIMIT = 2_000 diff --git a/lib/gitlab/git/rugged_impl/commit.rb b/lib/gitlab/git/rugged_impl/commit.rb new file mode 100644 index 00000000000..251802878c3 --- /dev/null +++ b/lib/gitlab/git/rugged_impl/commit.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +# NOTE: This code is legacy. Do not add/modify code here unless you have +# discussed with the Gitaly team. See +# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code +# for more details. + +# rubocop:disable Gitlab/ModuleWithInstanceVariables +module Gitlab + module Git + module RuggedImpl + module Commit + module ClassMethods + extend ::Gitlab::Utils::Override + + def rugged_find(repo, commit_id) + obj = repo.rev_parse_target(commit_id) + + obj.is_a?(::Rugged::Commit) ? obj : nil + rescue ::Rugged::Error + nil + end + + override :find_commit + def find_commit(repo, commit_id) + if Feature.enabled?(:rugged_find_commit) + rugged_find(repo, commit_id) + else + super + end + end + end + + extend ::Gitlab::Utils::Override + + override :init_commit + def init_commit(raw_commit) + case raw_commit + when ::Rugged::Commit + init_from_rugged(raw_commit) + else + super + end + end + + def init_from_rugged(commit) + author = commit.author + committer = commit.committer + + @raw_commit = commit + @id = commit.oid + @message = commit.message + @authored_date = author[:time] + @committed_date = committer[:time] + @author_name = author[:name] + @author_email = author[:email] + @committer_name = committer[:name] + @committer_email = committer[:email] + @parent_ids = commit.parents.map(&:oid) + end + end + end + end +end +# rubocop:enable Gitlab/ModuleWithInstanceVariables diff --git a/lib/gitlab/git/rugged_impl/ref.rb b/lib/gitlab/git/rugged_impl/ref.rb new file mode 100644 index 00000000000..b553e82dc47 --- /dev/null +++ b/lib/gitlab/git/rugged_impl/ref.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +# NOTE: This code is legacy. Do not add/modify code here unless you have +# discussed with the Gitaly team. See +# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code +# for more details. + +module Gitlab + module Git + module RuggedImpl + module Ref + def self.dereference_object(object) + object = object.target while object.is_a?(::Rugged::Tag::Annotation) + + object + end + end + end + end +end diff --git a/lib/gitlab/git/rugged_impl/repository.rb b/lib/gitlab/git/rugged_impl/repository.rb new file mode 100644 index 00000000000..135c47017b3 --- /dev/null +++ b/lib/gitlab/git/rugged_impl/repository.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +# NOTE: This code is legacy. Do not add/modify code here unless you have +# discussed with the Gitaly team. See +# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code +# for more details. + +# rubocop:disable Gitlab/ModuleWithInstanceVariables +module Gitlab + module Git + module RuggedImpl + module Repository + FEATURE_FLAGS = %i(rugged_find_commit).freeze + + def alternate_object_directories + relative_object_directories.map { |d| File.join(path, d) } + end + + ALLOWED_OBJECT_RELATIVE_DIRECTORIES_VARIABLES = %w[ + GIT_OBJECT_DIRECTORY_RELATIVE + GIT_ALTERNATE_OBJECT_DIRECTORIES_RELATIVE + ].freeze + + def relative_object_directories + Gitlab::Git::HookEnv.all(gl_repository).values_at(*ALLOWED_OBJECT_RELATIVE_DIRECTORIES_VARIABLES).flatten.compact + end + + def rugged + @rugged ||= ::Rugged::Repository.new(path, alternates: alternate_object_directories) + rescue ::Rugged::RepositoryError, ::Rugged::OSError + raise ::Gitlab::Git::Repository::NoRepository.new('no repository for such path') + end + + def cleanup + @rugged&.close + end + + # Return the object that +revspec+ points to. If +revspec+ is an + # annotated tag, then return the tag's target instead. + def rev_parse_target(revspec) + obj = rugged.rev_parse(revspec) + Ref.dereference_object(obj) + end + end + end + end +end +# rubocop:enable Gitlab/ModuleWithInstanceVariables diff --git a/lib/gitlab/gitaly_client/storage_settings.rb b/lib/gitlab/gitaly_client/storage_settings.rb index 754cccb6b3f..78ef6bfc0ec 100644 --- a/lib/gitlab/gitaly_client/storage_settings.rb +++ b/lib/gitlab/gitaly_client/storage_settings.rb @@ -32,11 +32,19 @@ module Gitlab end def self.disk_access_denied? + return false if rugged_enabled? + !temporarily_allowed?(ALLOW_KEY) && GitalyClient.feature_enabled?(DISK_ACCESS_DENIED_FLAG) rescue false # Err on the side of caution, don't break gitlab for people end + def self.rugged_enabled? + Gitlab::Git::RuggedImpl::Repository::FEATURE_FLAGS.any? do |flag| + Feature.enabled?(flag) + end + end + def initialize(storage) raise InvalidConfigurationError, "expected a Hash, got a #{storage.class.name}" unless storage.is_a?(Hash) raise InvalidConfigurationError, INVALID_STORAGE_MESSAGE unless storage.has_key?('path') diff --git a/lib/tasks/gitlab/features.rake b/lib/tasks/gitlab/features.rake new file mode 100644 index 00000000000..d115961108e --- /dev/null +++ b/lib/tasks/gitlab/features.rake @@ -0,0 +1,24 @@ +namespace :gitlab do + namespace :features do + desc 'GitLab | Features | Enable direct Git access via Rugged for NFS' + task enable_rugged: :environment do + set_rugged_feature_flags(true) + puts 'All Rugged feature flags were enabled.' + end + + task disable_rugged: :environment do + set_rugged_feature_flags(false) + puts 'All Rugged feature flags were disabled.' + end + end + + def set_rugged_feature_flags(status) + Gitlab::Git::RuggedImpl::Repository::FEATURE_FLAGS.each do |flag| + if status + Feature.enable(flag) + else + Feature.disable(flag) + end + end + end +end diff --git a/scripts/lint-rugged b/scripts/lint-rugged index 22e3e1f1505..9466c62a415 100755 --- a/scripts/lint-rugged +++ b/scripts/lint-rugged @@ -5,12 +5,18 @@ ALLOWED = [ 'lib/gitlab/bare_repository_import/repository.rb', # Needed to avoid using the git binary to validate a branch name - 'lib/gitlab/git_ref_validator.rb' + 'lib/gitlab/git_ref_validator.rb', + + # Reverted Rugged calls due to Gitaly atop NFS performance + # See https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code. + 'lib/gitlab/git/rugged_impl/', + 'lib/gitlab/gitaly_client/storage_settings.rb' ].freeze rugged_lines = IO.popen(%w[git grep -i -n rugged -- app config lib], &:read).lines rugged_lines = rugged_lines.select { |l| /^[^:]*\.rb:/ =~ l } rugged_lines = rugged_lines.reject { |l| l.start_with?(*ALLOWED) } +rugged_lines = rugged_lines.reject { |l| /(include|prepend) Gitlab::Git::RuggedImpl/ =~ l} rugged_lines = rugged_lines.reject do |line| code, _comment = line.split('# ', 2) code !~ /rugged/i diff --git a/spec/lib/gitlab/git/commit_spec.rb b/spec/lib/gitlab/git/commit_spec.rb index 2611ebed25b..3fb41a626b2 100644 --- a/spec/lib/gitlab/git/commit_spec.rb +++ b/spec/lib/gitlab/git/commit_spec.rb @@ -112,7 +112,7 @@ describe Gitlab::Git::Commit, :seed_helper do end context 'Class methods' do - describe '.find' do + shared_examples '.find' do it "should return first head commit if without params" do expect(described_class.last(repository).id).to eq( rugged_repo.head.target.oid @@ -154,6 +154,20 @@ describe Gitlab::Git::Commit, :seed_helper do end end + describe '.find with Gitaly enabled' do + it_should_behave_like '.find' + end + + describe '.find with Rugged enabled', :enable_rugged do + it 'calls out to the Rugged implementation' do + allow_any_instance_of(Rugged).to receive(:rev_parse).with(SeedRepo::Commit::ID).and_call_original + + described_class.find(repository, SeedRepo::Commit::ID) + end + + it_should_behave_like '.find' + end + describe '.last_for_path' do context 'no path' do subject { described_class.last_for_path(repository, 'master') } diff --git a/spec/lib/gitlab/gitaly_client/storage_settings_spec.rb b/spec/lib/gitlab/gitaly_client/storage_settings_spec.rb index c89913ec8e9..bb10be2a4dc 100644 --- a/spec/lib/gitlab/gitaly_client/storage_settings_spec.rb +++ b/spec/lib/gitlab/gitaly_client/storage_settings_spec.rb @@ -26,4 +26,14 @@ describe Gitlab::GitalyClient::StorageSettings do end end end + + describe '.disk_access_denied?' do + it 'return false when Rugged is enabled', :enable_rugged do + expect(described_class.disk_access_denied?).to be_falsey + end + + it 'returns true' do + expect(described_class.disk_access_denied?).to be_truthy + end + end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 97e7a019222..e8d7b18bf04 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -115,10 +115,17 @@ RSpec.configure do |config| TestEnv.clean_test_path end - config.before do + config.before do |example| # Enable all features by default for testing allow(Feature).to receive(:enabled?) { true } + enabled = example.metadata[:enable_rugged].present? + + # Disable Rugged features by default + Gitlab::Git::RuggedImpl::Repository::FEATURE_FLAGS.each do |flag| + allow(Feature).to receive(:enabled?).with(flag).and_return(enabled) + end + # The following can be removed when we remove the staged rollout strategy # and we can just enable it using instance wide settings # (ie. ApplicationSetting#auto_devops_enabled) -- cgit v1.2.3