diff options
author | Bob Van Landuyt <bob@vanlanduyt.co> | 2017-11-13 18:52:07 +0300 |
---|---|---|
committer | Bob Van Landuyt <bob@vanlanduyt.co> | 2017-12-08 11:11:39 +0300 |
commit | f1ae1e39ce6b7578c5697c977bc3b52b119301ab (patch) | |
tree | 1d01033287e4e15e505c7b8b3f69ced4e6cf21c8 /lib/gitlab/git | |
parent | 12d33b883adda7093f0f4b838532871036af3925 (diff) |
Move the circuitbreaker check out in a separate process
Moving the check out of the general requests, makes sure we don't have
any slowdown in the regular requests.
To keep the process performing this checks small, the check is still
performed inside a unicorn. But that is called from a process running
on the same server.
Because the checks are now done outside normal request, we can have a
simpler failure strategy:
The check is now performed in the background every
`circuitbreaker_check_interval`. Failures are logged in redis. The
failures are reset when the check succeeds. Per check we will try
`circuitbreaker_access_retries` times within
`circuitbreaker_storage_timeout` seconds.
When the number of failures exceeds
`circuitbreaker_failure_count_threshold`, we will block access to the
storage.
After `failure_reset_time` of no checks, we will clear the stored
failures. This could happen when the process that performs the checks
is not running.
Diffstat (limited to 'lib/gitlab/git')
-rw-r--r-- | lib/gitlab/git/storage/checker.rb | 98 | ||||
-rw-r--r-- | lib/gitlab/git/storage/circuit_breaker.rb | 106 | ||||
-rw-r--r-- | lib/gitlab/git/storage/circuit_breaker_settings.rb | 12 | ||||
-rw-r--r-- | lib/gitlab/git/storage/failure_info.rb | 39 | ||||
-rw-r--r-- | lib/gitlab/git/storage/null_circuit_breaker.rb | 22 |
5 files changed, 160 insertions, 117 deletions
diff --git a/lib/gitlab/git/storage/checker.rb b/lib/gitlab/git/storage/checker.rb new file mode 100644 index 00000000000..de63cb4b40c --- /dev/null +++ b/lib/gitlab/git/storage/checker.rb @@ -0,0 +1,98 @@ +module Gitlab + module Git + module Storage + class Checker + include CircuitBreakerSettings + + attr_reader :storage_path, :storage, :hostname, :logger + + def self.check_all(logger = Rails.logger) + threads = Gitlab.config.repositories.storages.keys.map do |storage_name| + Thread.new do + Thread.current[:result] = new(storage_name, logger).check_with_lease + end + end + + threads.map do |thread| + thread.join + thread[:result] + end + end + + def initialize(storage, logger = Rails.logger) + @storage = storage + config = Gitlab.config.repositories.storages[@storage] + @storage_path = config['path'] + @logger = logger + + @hostname = Gitlab::Environment.hostname + end + + def check_with_lease + lease_key = "storage_check:#{cache_key}" + lease = Gitlab::ExclusiveLease.new(lease_key, timeout: storage_timeout) + result = { storage: storage, success: nil } + + if uuid = lease.try_obtain + result[:success] = check + + Gitlab::ExclusiveLease.cancel(lease_key, uuid) + else + logger.warn("#{hostname}: #{storage}: Skipping check, previous check still running") + end + + result + end + + def check + if Gitlab::Git::Storage::ForkedStorageCheck.storage_available?(storage_path, storage_timeout, access_retries) + track_storage_accessible + true + else + track_storage_inaccessible + logger.error("#{hostname}: #{storage}: Not accessible.") + false + end + end + + private + + def track_storage_inaccessible + first_failure = current_failure_info.first_failure || Time.now + last_failure = Time.now + + Gitlab::Git::Storage.redis.with do |redis| + redis.pipelined do + redis.hset(cache_key, :first_failure, first_failure.to_i) + redis.hset(cache_key, :last_failure, last_failure.to_i) + redis.hincrby(cache_key, :failure_count, 1) + redis.expire(cache_key, failure_reset_time) + maintain_known_keys(redis) + end + end + end + + def track_storage_accessible + Gitlab::Git::Storage.redis.with do |redis| + redis.pipelined do + redis.hset(cache_key, :first_failure, nil) + redis.hset(cache_key, :last_failure, nil) + redis.hset(cache_key, :failure_count, 0) + maintain_known_keys(redis) + end + end + end + + def maintain_known_keys(redis) + expire_time = Time.now.to_i + failure_reset_time + redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key) + redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i) + end + + def current_failure_info + FailureInfo.load(cache_key) + end + end + end + end +end diff --git a/lib/gitlab/git/storage/circuit_breaker.rb b/lib/gitlab/git/storage/circuit_breaker.rb index 4328c0ea29b..898bb1b65be 100644 --- a/lib/gitlab/git/storage/circuit_breaker.rb +++ b/lib/gitlab/git/storage/circuit_breaker.rb @@ -4,22 +4,11 @@ module Gitlab class CircuitBreaker include CircuitBreakerSettings - FailureInfo = Struct.new(:last_failure, :failure_count) - attr_reader :storage, - :hostname, - :storage_path - - delegate :last_failure, :failure_count, to: :failure_info - - def self.reset_all! - Gitlab::Git::Storage.redis.with do |redis| - all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1) - redis.del(*all_storage_keys) unless all_storage_keys.empty? - end + :hostname - RequestStore.delete(:circuitbreaker_cache) - end + delegate :last_failure, :failure_count, :no_failures?, + to: :failure_info def self.for_storage(storage) cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do @@ -46,9 +35,6 @@ module Gitlab def initialize(storage, hostname) @storage = storage @hostname = hostname - - config = Gitlab.config.repositories.storages[@storage] - @storage_path = config['path'] end def perform @@ -65,15 +51,6 @@ module Gitlab failure_count > failure_count_threshold end - def backing_off? - return false if no_failures? - - recent_failure = last_failure > failure_wait_time.seconds.ago - too_many_failures = failure_count > backoff_threshold - - recent_failure && too_many_failures - end - private # The circuitbreaker can be enabled for the entire fleet using a Feature @@ -86,88 +63,13 @@ module Gitlab end def failure_info - @failure_info ||= get_failure_info - end - - # Memoizing the `storage_available` call means we only do it once per - # request when the storage is available. - # - # When the storage appears not available, and the memoized value is `false` - # we might want to try again. - def storage_available? - return @storage_available if @storage_available - - if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck - .storage_available?(storage_path, storage_timeout, access_retries) - track_storage_accessible - else - track_storage_inaccessible - end - - @storage_available + @failure_info ||= FailureInfo.load(cache_key) end def check_storage_accessible! if circuit_broken? raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time) end - - if backing_off? - raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time) - end - - unless storage_available? - raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time) - end - end - - def no_failures? - last_failure.blank? && failure_count == 0 - end - - def track_storage_inaccessible - @failure_info = FailureInfo.new(Time.now, failure_count + 1) - - Gitlab::Git::Storage.redis.with do |redis| - redis.pipelined do - redis.hset(cache_key, :last_failure, last_failure.to_i) - redis.hincrby(cache_key, :failure_count, 1) - redis.expire(cache_key, failure_reset_time) - maintain_known_keys(redis) - end - end - end - - def track_storage_accessible - @failure_info = FailureInfo.new(nil, 0) - - Gitlab::Git::Storage.redis.with do |redis| - redis.pipelined do - redis.hset(cache_key, :last_failure, nil) - redis.hset(cache_key, :failure_count, 0) - maintain_known_keys(redis) - end - end - end - - def maintain_known_keys(redis) - expire_time = Time.now.to_i + failure_reset_time - redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key) - redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i) - end - - def get_failure_info - last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis| - redis.hmget(cache_key, :last_failure, :failure_count) - end - - last_failure = Time.at(last_failure.to_i) if last_failure.present? - - FailureInfo.new(last_failure, failure_count.to_i) - end - - def cache_key - @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}" end end end diff --git a/lib/gitlab/git/storage/circuit_breaker_settings.rb b/lib/gitlab/git/storage/circuit_breaker_settings.rb index 257fe8cd8f0..c9e225f187d 100644 --- a/lib/gitlab/git/storage/circuit_breaker_settings.rb +++ b/lib/gitlab/git/storage/circuit_breaker_settings.rb @@ -6,10 +6,6 @@ module Gitlab application_settings.circuitbreaker_failure_count_threshold end - def failure_wait_time - application_settings.circuitbreaker_failure_wait_time - end - def failure_reset_time application_settings.circuitbreaker_failure_reset_time end @@ -22,8 +18,12 @@ module Gitlab application_settings.circuitbreaker_access_retries end - def backoff_threshold - application_settings.circuitbreaker_backoff_threshold + def check_interval + application_settings.circuitbreaker_check_interval + end + + def cache_key + @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}" end private diff --git a/lib/gitlab/git/storage/failure_info.rb b/lib/gitlab/git/storage/failure_info.rb new file mode 100644 index 00000000000..387279c110d --- /dev/null +++ b/lib/gitlab/git/storage/failure_info.rb @@ -0,0 +1,39 @@ +module Gitlab + module Git + module Storage + class FailureInfo + attr_accessor :first_failure, :last_failure, :failure_count + + def self.reset_all! + Gitlab::Git::Storage.redis.with do |redis| + all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1) + redis.del(*all_storage_keys) unless all_storage_keys.empty? + end + + RequestStore.delete(:circuitbreaker_cache) + end + + def self.load(cache_key) + first_failure, last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis| + redis.hmget(cache_key, :first_failure, :last_failure, :failure_count) + end + + last_failure = Time.at(last_failure.to_i) if last_failure.present? + first_failure = Time.at(first_failure.to_i) if first_failure.present? + + new(first_failure, last_failure, failure_count.to_i) + end + + def initialize(first_failure, last_failure, failure_count) + @first_failure = first_failure + @last_failure = last_failure + @failure_count = failure_count + end + + def no_failures? + first_failure.blank? && last_failure.blank? && failure_count == 0 + end + end + end + end +end diff --git a/lib/gitlab/git/storage/null_circuit_breaker.rb b/lib/gitlab/git/storage/null_circuit_breaker.rb index a12d52d295f..261c936c689 100644 --- a/lib/gitlab/git/storage/null_circuit_breaker.rb +++ b/lib/gitlab/git/storage/null_circuit_breaker.rb @@ -11,6 +11,9 @@ module Gitlab # These will always have nil values attr_reader :storage_path + delegate :last_failure, :failure_count, :no_failures?, + to: :failure_info + def initialize(storage, hostname, error: nil) @storage = storage @hostname = hostname @@ -29,16 +32,17 @@ module Gitlab false end - def last_failure - circuit_broken? ? Time.now : nil - end - - def failure_count - circuit_broken? ? failure_count_threshold : 0 - end - def failure_info - Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(last_failure, failure_count) + @failure_info ||= + if circuit_broken? + Gitlab::Git::Storage::FailureInfo.new(Time.now, + Time.now, + failure_count_threshold) + else + Gitlab::Git::Storage::FailureInfo.new(nil, + nil, + 0) + end end end end |