Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBob Van Landuyt <bob@vanlanduyt.co>2017-11-13 18:52:07 +0300
committerBob Van Landuyt <bob@vanlanduyt.co>2017-12-08 11:11:39 +0300
commitf1ae1e39ce6b7578c5697c977bc3b52b119301ab (patch)
tree1d01033287e4e15e505c7b8b3f69ced4e6cf21c8 /lib
parent12d33b883adda7093f0f4b838532871036af3925 (diff)
Move the circuitbreaker check out in a separate process
Moving the check out of the general requests, makes sure we don't have any slowdown in the regular requests. To keep the process performing this checks small, the check is still performed inside a unicorn. But that is called from a process running on the same server. Because the checks are now done outside normal request, we can have a simpler failure strategy: The check is now performed in the background every `circuitbreaker_check_interval`. Failures are logged in redis. The failures are reset when the check succeeds. Per check we will try `circuitbreaker_access_retries` times within `circuitbreaker_storage_timeout` seconds. When the number of failures exceeds `circuitbreaker_failure_count_threshold`, we will block access to the storage. After `failure_reset_time` of no checks, we will clear the stored failures. This could happen when the process that performs the checks is not running.
Diffstat (limited to 'lib')
-rw-r--r--lib/api/circuit_breakers.rb2
-rw-r--r--lib/gitlab/git/storage/checker.rb98
-rw-r--r--lib/gitlab/git/storage/circuit_breaker.rb106
-rw-r--r--lib/gitlab/git/storage/circuit_breaker_settings.rb12
-rw-r--r--lib/gitlab/git/storage/failure_info.rb39
-rw-r--r--lib/gitlab/git/storage/null_circuit_breaker.rb22
-rw-r--r--lib/gitlab/storage_check.rb11
-rw-r--r--lib/gitlab/storage_check/cli.rb69
-rw-r--r--lib/gitlab/storage_check/gitlab_caller.rb39
-rw-r--r--lib/gitlab/storage_check/option_parser.rb39
-rw-r--r--lib/gitlab/storage_check/response.rb77
11 files changed, 396 insertions, 118 deletions
diff --git a/lib/api/circuit_breakers.rb b/lib/api/circuit_breakers.rb
index 118883f5ea5..598c76f6168 100644
--- a/lib/api/circuit_breakers.rb
+++ b/lib/api/circuit_breakers.rb
@@ -41,7 +41,7 @@ module API
detail 'This feature was introduced in GitLab 9.5'
end
delete do
- Gitlab::Git::Storage::CircuitBreaker.reset_all!
+ Gitlab::Git::Storage::FailureInfo.reset_all!
end
end
end
diff --git a/lib/gitlab/git/storage/checker.rb b/lib/gitlab/git/storage/checker.rb
new file mode 100644
index 00000000000..de63cb4b40c
--- /dev/null
+++ b/lib/gitlab/git/storage/checker.rb
@@ -0,0 +1,98 @@
+module Gitlab
+ module Git
+ module Storage
+ class Checker
+ include CircuitBreakerSettings
+
+ attr_reader :storage_path, :storage, :hostname, :logger
+
+ def self.check_all(logger = Rails.logger)
+ threads = Gitlab.config.repositories.storages.keys.map do |storage_name|
+ Thread.new do
+ Thread.current[:result] = new(storage_name, logger).check_with_lease
+ end
+ end
+
+ threads.map do |thread|
+ thread.join
+ thread[:result]
+ end
+ end
+
+ def initialize(storage, logger = Rails.logger)
+ @storage = storage
+ config = Gitlab.config.repositories.storages[@storage]
+ @storage_path = config['path']
+ @logger = logger
+
+ @hostname = Gitlab::Environment.hostname
+ end
+
+ def check_with_lease
+ lease_key = "storage_check:#{cache_key}"
+ lease = Gitlab::ExclusiveLease.new(lease_key, timeout: storage_timeout)
+ result = { storage: storage, success: nil }
+
+ if uuid = lease.try_obtain
+ result[:success] = check
+
+ Gitlab::ExclusiveLease.cancel(lease_key, uuid)
+ else
+ logger.warn("#{hostname}: #{storage}: Skipping check, previous check still running")
+ end
+
+ result
+ end
+
+ def check
+ if Gitlab::Git::Storage::ForkedStorageCheck.storage_available?(storage_path, storage_timeout, access_retries)
+ track_storage_accessible
+ true
+ else
+ track_storage_inaccessible
+ logger.error("#{hostname}: #{storage}: Not accessible.")
+ false
+ end
+ end
+
+ private
+
+ def track_storage_inaccessible
+ first_failure = current_failure_info.first_failure || Time.now
+ last_failure = Time.now
+
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.pipelined do
+ redis.hset(cache_key, :first_failure, first_failure.to_i)
+ redis.hset(cache_key, :last_failure, last_failure.to_i)
+ redis.hincrby(cache_key, :failure_count, 1)
+ redis.expire(cache_key, failure_reset_time)
+ maintain_known_keys(redis)
+ end
+ end
+ end
+
+ def track_storage_accessible
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.pipelined do
+ redis.hset(cache_key, :first_failure, nil)
+ redis.hset(cache_key, :last_failure, nil)
+ redis.hset(cache_key, :failure_count, 0)
+ maintain_known_keys(redis)
+ end
+ end
+ end
+
+ def maintain_known_keys(redis)
+ expire_time = Time.now.to_i + failure_reset_time
+ redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key)
+ redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i)
+ end
+
+ def current_failure_info
+ FailureInfo.load(cache_key)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/storage/circuit_breaker.rb b/lib/gitlab/git/storage/circuit_breaker.rb
index 4328c0ea29b..898bb1b65be 100644
--- a/lib/gitlab/git/storage/circuit_breaker.rb
+++ b/lib/gitlab/git/storage/circuit_breaker.rb
@@ -4,22 +4,11 @@ module Gitlab
class CircuitBreaker
include CircuitBreakerSettings
- FailureInfo = Struct.new(:last_failure, :failure_count)
-
attr_reader :storage,
- :hostname,
- :storage_path
-
- delegate :last_failure, :failure_count, to: :failure_info
-
- def self.reset_all!
- Gitlab::Git::Storage.redis.with do |redis|
- all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
- redis.del(*all_storage_keys) unless all_storage_keys.empty?
- end
+ :hostname
- RequestStore.delete(:circuitbreaker_cache)
- end
+ delegate :last_failure, :failure_count, :no_failures?,
+ to: :failure_info
def self.for_storage(storage)
cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do
@@ -46,9 +35,6 @@ module Gitlab
def initialize(storage, hostname)
@storage = storage
@hostname = hostname
-
- config = Gitlab.config.repositories.storages[@storage]
- @storage_path = config['path']
end
def perform
@@ -65,15 +51,6 @@ module Gitlab
failure_count > failure_count_threshold
end
- def backing_off?
- return false if no_failures?
-
- recent_failure = last_failure > failure_wait_time.seconds.ago
- too_many_failures = failure_count > backoff_threshold
-
- recent_failure && too_many_failures
- end
-
private
# The circuitbreaker can be enabled for the entire fleet using a Feature
@@ -86,88 +63,13 @@ module Gitlab
end
def failure_info
- @failure_info ||= get_failure_info
- end
-
- # Memoizing the `storage_available` call means we only do it once per
- # request when the storage is available.
- #
- # When the storage appears not available, and the memoized value is `false`
- # we might want to try again.
- def storage_available?
- return @storage_available if @storage_available
-
- if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck
- .storage_available?(storage_path, storage_timeout, access_retries)
- track_storage_accessible
- else
- track_storage_inaccessible
- end
-
- @storage_available
+ @failure_info ||= FailureInfo.load(cache_key)
end
def check_storage_accessible!
if circuit_broken?
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time)
end
-
- if backing_off?
- raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time)
- end
-
- unless storage_available?
- raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time)
- end
- end
-
- def no_failures?
- last_failure.blank? && failure_count == 0
- end
-
- def track_storage_inaccessible
- @failure_info = FailureInfo.new(Time.now, failure_count + 1)
-
- Gitlab::Git::Storage.redis.with do |redis|
- redis.pipelined do
- redis.hset(cache_key, :last_failure, last_failure.to_i)
- redis.hincrby(cache_key, :failure_count, 1)
- redis.expire(cache_key, failure_reset_time)
- maintain_known_keys(redis)
- end
- end
- end
-
- def track_storage_accessible
- @failure_info = FailureInfo.new(nil, 0)
-
- Gitlab::Git::Storage.redis.with do |redis|
- redis.pipelined do
- redis.hset(cache_key, :last_failure, nil)
- redis.hset(cache_key, :failure_count, 0)
- maintain_known_keys(redis)
- end
- end
- end
-
- def maintain_known_keys(redis)
- expire_time = Time.now.to_i + failure_reset_time
- redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key)
- redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i)
- end
-
- def get_failure_info
- last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
- redis.hmget(cache_key, :last_failure, :failure_count)
- end
-
- last_failure = Time.at(last_failure.to_i) if last_failure.present?
-
- FailureInfo.new(last_failure, failure_count.to_i)
- end
-
- def cache_key
- @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
end
end
diff --git a/lib/gitlab/git/storage/circuit_breaker_settings.rb b/lib/gitlab/git/storage/circuit_breaker_settings.rb
index 257fe8cd8f0..c9e225f187d 100644
--- a/lib/gitlab/git/storage/circuit_breaker_settings.rb
+++ b/lib/gitlab/git/storage/circuit_breaker_settings.rb
@@ -6,10 +6,6 @@ module Gitlab
application_settings.circuitbreaker_failure_count_threshold
end
- def failure_wait_time
- application_settings.circuitbreaker_failure_wait_time
- end
-
def failure_reset_time
application_settings.circuitbreaker_failure_reset_time
end
@@ -22,8 +18,12 @@ module Gitlab
application_settings.circuitbreaker_access_retries
end
- def backoff_threshold
- application_settings.circuitbreaker_backoff_threshold
+ def check_interval
+ application_settings.circuitbreaker_check_interval
+ end
+
+ def cache_key
+ @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
private
diff --git a/lib/gitlab/git/storage/failure_info.rb b/lib/gitlab/git/storage/failure_info.rb
new file mode 100644
index 00000000000..387279c110d
--- /dev/null
+++ b/lib/gitlab/git/storage/failure_info.rb
@@ -0,0 +1,39 @@
+module Gitlab
+ module Git
+ module Storage
+ class FailureInfo
+ attr_accessor :first_failure, :last_failure, :failure_count
+
+ def self.reset_all!
+ Gitlab::Git::Storage.redis.with do |redis|
+ all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
+ redis.del(*all_storage_keys) unless all_storage_keys.empty?
+ end
+
+ RequestStore.delete(:circuitbreaker_cache)
+ end
+
+ def self.load(cache_key)
+ first_failure, last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
+ redis.hmget(cache_key, :first_failure, :last_failure, :failure_count)
+ end
+
+ last_failure = Time.at(last_failure.to_i) if last_failure.present?
+ first_failure = Time.at(first_failure.to_i) if first_failure.present?
+
+ new(first_failure, last_failure, failure_count.to_i)
+ end
+
+ def initialize(first_failure, last_failure, failure_count)
+ @first_failure = first_failure
+ @last_failure = last_failure
+ @failure_count = failure_count
+ end
+
+ def no_failures?
+ first_failure.blank? && last_failure.blank? && failure_count == 0
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/storage/null_circuit_breaker.rb b/lib/gitlab/git/storage/null_circuit_breaker.rb
index a12d52d295f..261c936c689 100644
--- a/lib/gitlab/git/storage/null_circuit_breaker.rb
+++ b/lib/gitlab/git/storage/null_circuit_breaker.rb
@@ -11,6 +11,9 @@ module Gitlab
# These will always have nil values
attr_reader :storage_path
+ delegate :last_failure, :failure_count, :no_failures?,
+ to: :failure_info
+
def initialize(storage, hostname, error: nil)
@storage = storage
@hostname = hostname
@@ -29,16 +32,17 @@ module Gitlab
false
end
- def last_failure
- circuit_broken? ? Time.now : nil
- end
-
- def failure_count
- circuit_broken? ? failure_count_threshold : 0
- end
-
def failure_info
- Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(last_failure, failure_count)
+ @failure_info ||=
+ if circuit_broken?
+ Gitlab::Git::Storage::FailureInfo.new(Time.now,
+ Time.now,
+ failure_count_threshold)
+ else
+ Gitlab::Git::Storage::FailureInfo.new(nil,
+ nil,
+ 0)
+ end
end
end
end
diff --git a/lib/gitlab/storage_check.rb b/lib/gitlab/storage_check.rb
new file mode 100644
index 00000000000..fe81513c9ec
--- /dev/null
+++ b/lib/gitlab/storage_check.rb
@@ -0,0 +1,11 @@
+require_relative 'storage_check/cli'
+require_relative 'storage_check/gitlab_caller'
+require_relative 'storage_check/option_parser'
+require_relative 'storage_check/response'
+
+module Gitlab
+ module StorageCheck
+ ENDPOINT = '/-/storage_check'.freeze
+ Options = Struct.new(:target, :token, :interval, :dryrun)
+ end
+end
diff --git a/lib/gitlab/storage_check/cli.rb b/lib/gitlab/storage_check/cli.rb
new file mode 100644
index 00000000000..04bf1bf1d26
--- /dev/null
+++ b/lib/gitlab/storage_check/cli.rb
@@ -0,0 +1,69 @@
+module Gitlab
+ module StorageCheck
+ class CLI
+ def self.start!(args)
+ runner = new(Gitlab::StorageCheck::OptionParser.parse!(args))
+ runner.start_loop
+ end
+
+ attr_reader :logger, :options
+
+ def initialize(options)
+ @options = options
+ @logger = Logger.new(STDOUT)
+ end
+
+ def start_loop
+ logger.info "Checking #{options.target} every #{options.interval} seconds"
+
+ if options.dryrun
+ logger.info "Dryrun, exiting..."
+ return
+ end
+
+ begin
+ loop do
+ response = GitlabCaller.new(options).call!
+ log_response(response)
+ update_settings(response)
+
+ sleep options.interval
+ end
+ rescue Interrupt
+ logger.info "Ending storage-check"
+ end
+ end
+
+ def update_settings(response)
+ previous_interval = options.interval
+
+ if response.valid?
+ options.interval = response.check_interval || previous_interval
+ end
+
+ if previous_interval != options.interval
+ logger.info "Interval changed: #{options.interval} seconds"
+ end
+ end
+
+ def log_response(response)
+ unless response.valid?
+ return logger.error("Invalid response checking nfs storage: #{response.http_response.inspect}")
+ end
+
+ if response.responsive_shards.any?
+ logger.debug("Responsive shards: #{response.responsive_shards.join(', ')}")
+ end
+
+ warnings = []
+ if response.skipped_shards.any?
+ warnings << "Skipped shards: #{response.skipped_shards.join(', ')}"
+ end
+ if response.failing_shards.any?
+ warnings << "Failing shards: #{response.failing_shards.join(', ')}"
+ end
+ logger.warn(warnings.join(' - ')) if warnings.any?
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/gitlab_caller.rb b/lib/gitlab/storage_check/gitlab_caller.rb
new file mode 100644
index 00000000000..44952b68844
--- /dev/null
+++ b/lib/gitlab/storage_check/gitlab_caller.rb
@@ -0,0 +1,39 @@
+require 'excon'
+
+module Gitlab
+ module StorageCheck
+ class GitlabCaller
+ def initialize(options)
+ @options = options
+ end
+
+ def call!
+ Gitlab::StorageCheck::Response.new(get_response)
+ rescue Errno::ECONNREFUSED, Excon::Error
+ # Server not ready, treated as invalid response.
+ Gitlab::StorageCheck::Response.new(nil)
+ end
+
+ def get_response
+ scheme, *other_parts = URI.split(@options.target)
+ socket_path = if scheme == 'unix'
+ other_parts.compact.join
+ end
+
+ connection = Excon.new(@options.target, socket: socket_path)
+ connection.post(path: Gitlab::StorageCheck::ENDPOINT,
+ headers: headers)
+ end
+
+ def headers
+ @headers ||= begin
+ headers = {}
+ headers['Content-Type'] = headers['Accept'] = 'application/json'
+ headers['TOKEN'] = @options.token if @options.token
+
+ headers
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/option_parser.rb b/lib/gitlab/storage_check/option_parser.rb
new file mode 100644
index 00000000000..66ed7906f97
--- /dev/null
+++ b/lib/gitlab/storage_check/option_parser.rb
@@ -0,0 +1,39 @@
+module Gitlab
+ module StorageCheck
+ class OptionParser
+ def self.parse!(args)
+ # Start out with some defaults
+ options = Gitlab::StorageCheck::Options.new(nil, nil, 1, false)
+
+ parser = ::OptionParser.new do |opts|
+ opts.banner = "Usage: bin/storage_check [options]"
+
+ opts.on('-t=string', '--target string', 'URL or socket to trigger storage check') do |value|
+ options.target = value
+ end
+
+ opts.on('-T=string', '--token string', 'Health token to use') { |value| options.token = value }
+
+ opts.on('-i=n', '--interval n', ::OptionParser::DecimalInteger, 'Seconds between checks') do |value|
+ options.interval = value
+ end
+
+ opts.on('-d', '--dryrun', "Output what will be performed, but don't start the process") do |value|
+ options.dryrun = value
+ end
+ end
+ parser.parse!(args)
+
+ unless options.target
+ raise ::OptionParser::InvalidArgument.new('Provide a URI to provide checks')
+ end
+
+ if URI.parse(options.target).scheme.nil?
+ raise ::OptionParser::InvalidArgument.new('Add the scheme to the target, `unix://`, `https://` or `http://` are supported')
+ end
+
+ options
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/response.rb b/lib/gitlab/storage_check/response.rb
new file mode 100644
index 00000000000..326ab236e3e
--- /dev/null
+++ b/lib/gitlab/storage_check/response.rb
@@ -0,0 +1,77 @@
+require 'json'
+
+module Gitlab
+ module StorageCheck
+ class Response
+ attr_reader :http_response
+
+ def initialize(http_response)
+ @http_response = http_response
+ end
+
+ def valid?
+ @http_response && (200...299).cover?(@http_response.status) &&
+ @http_response.headers['Content-Type'].include?('application/json') &&
+ parsed_response
+ end
+
+ def check_interval
+ return nil unless parsed_response
+
+ parsed_response['check_interval']
+ end
+
+ def responsive_shards
+ divided_results[:responsive_shards]
+ end
+
+ def skipped_shards
+ divided_results[:skipped_shards]
+ end
+
+ def failing_shards
+ divided_results[:failing_shards]
+ end
+
+ private
+
+ def results
+ return [] unless parsed_response
+
+ parsed_response['results']
+ end
+
+ def divided_results
+ return @divided_results if @divided_results
+
+ @divided_results = {}
+ @divided_results[:responsive_shards] = []
+ @divided_results[:skipped_shards] = []
+ @divided_results[:failing_shards] = []
+
+ results.each do |info|
+ name = info['storage']
+
+ case info['success']
+ when true
+ @divided_results[:responsive_shards] << name
+ when false
+ @divided_results[:failing_shards] << name
+ else
+ @divided_results[:skipped_shards] << name
+ end
+ end
+
+ @divided_results
+ end
+
+ def parsed_response
+ return @parsed_response if defined?(@parsed_response)
+
+ @parsed_response = JSON.parse(@http_response.body)
+ rescue JSON::JSONError
+ @parsed_response = nil
+ end
+ end
+ end
+end