Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2017-10-24 12:10:42 +0300
committerBob Van Landuyt <bob@vanlanduyt.co>2017-10-30 14:07:19 +0300
commite32712e3a613106121a2afe8ea1fb1f9dad93783 (patch)
tree36aaa28460b24124727bbd79a270f00300ac443c
parenteb28364b68ce73a6dca254de8fb0ce0eeee37283 (diff)
Merge branch 'bvl-circuitbreaker-backoff' into 'master'
Circuitbreaker backoff and retries Closes #37383 and #38231 See merge request gitlab-org/gitlab-ce!14933
-rw-r--r--app/helpers/application_settings_helper.rb11
-rw-r--r--app/helpers/storage_health_helper.rb5
-rw-r--r--app/models/application_setting.rb14
-rw-r--r--app/views/admin/application_settings/_form.html.haml30
-rw-r--r--changelogs/unreleased/bvl-circuitbreaker-backoff.yml6
-rw-r--r--db/migrate/20171017145932_add_new_circuitbreaker_settings_to_application_settings.rb16
-rw-r--r--db/schema.rb4
-rw-r--r--doc/administration/img/circuitbreaker_config.pngbin213210 -> 335073 bytes
-rw-r--r--doc/administration/repository_storage_paths.md14
-rw-r--r--doc/api/settings.md2
-rw-r--r--lib/gitlab/git/storage.rb1
-rw-r--r--lib/gitlab/git/storage/circuit_breaker.rb41
-rw-r--r--lib/gitlab/git/storage/circuit_breaker_settings.rb8
-rw-r--r--lib/gitlab/git/storage/forked_storage_check.rb13
-rw-r--r--lib/gitlab/git/storage/null_circuit_breaker.rb4
-rw-r--r--spec/lib/gitlab/git/storage/circuit_breaker_spec.rb261
-rw-r--r--spec/lib/gitlab/git/storage/forked_storage_check_spec.rb15
-rw-r--r--spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb13
-rw-r--r--spec/models/application_setting_spec.rb13
19 files changed, 275 insertions, 196 deletions
diff --git a/app/helpers/application_settings_helper.rb b/app/helpers/application_settings_helper.rb
index 1ee8911bb1a..cd1ecaadb85 100644
--- a/app/helpers/application_settings_helper.rb
+++ b/app/helpers/application_settings_helper.rb
@@ -120,6 +120,15 @@ module ApplicationSettingsHelper
message.html_safe
end
+ def circuitbreaker_access_retries_help_text
+ _('The number of attempts GitLab will make to access a storage.')
+ end
+
+ def circuitbreaker_backoff_threshold_help_text
+ _("The number of failures after which GitLab will start temporarily "\
+ "disabling access to a storage shard on a host")
+ end
+
def circuitbreaker_failure_wait_time_help_text
_("When access to a storage fails. GitLab will prevent access to the "\
"storage for the time specified here. This allows the filesystem to "\
@@ -144,6 +153,8 @@ module ApplicationSettingsHelper
:akismet_api_key,
:akismet_enabled,
:auto_devops_enabled,
+ :circuitbreaker_access_retries,
+ :circuitbreaker_backoff_threshold,
:circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_reset_time,
:circuitbreaker_failure_wait_time,
diff --git a/app/helpers/storage_health_helper.rb b/app/helpers/storage_health_helper.rb
index 544c9efb845..4d2180f7eee 100644
--- a/app/helpers/storage_health_helper.rb
+++ b/app/helpers/storage_health_helper.rb
@@ -16,17 +16,16 @@ module StorageHealthHelper
def message_for_circuit_breaker(circuit_breaker)
maximum_failures = circuit_breaker.failure_count_threshold
current_failures = circuit_breaker.failure_count
- permanently_broken = circuit_breaker.circuit_broken? && current_failures >= maximum_failures
translation_params = { number_of_failures: current_failures,
maximum_failures: maximum_failures,
number_of_seconds: circuit_breaker.failure_wait_time }
- if permanently_broken
+ if circuit_breaker.circuit_broken?
s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\
"retry automatically. Reset storage information when the problem is "\
"resolved.") % translation_params
- elsif circuit_breaker.circuit_broken?
+ elsif circuit_breaker.backing_off?
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
"block access for %{number_of_seconds} seconds.") % translation_params
else
diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb
index 4dda276bb41..f266e7db6da 100644
--- a/app/models/application_setting.rb
+++ b/app/models/application_setting.rb
@@ -153,13 +153,25 @@ class ApplicationSetting < ActiveRecord::Base
presence: true,
numericality: { greater_than_or_equal_to: 0 }
- validates :circuitbreaker_failure_count_threshold,
+ validates :circuitbreaker_backoff_threshold,
+ :circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_wait_time,
:circuitbreaker_failure_reset_time,
:circuitbreaker_storage_timeout,
presence: true,
numericality: { only_integer: true, greater_than_or_equal_to: 0 }
+ validates :circuitbreaker_access_retries,
+ presence: true,
+ numericality: { only_integer: true, greater_than_or_equal_to: 1 }
+
+ validates_each :circuitbreaker_backoff_threshold do |record, attr, value|
+ if value.to_i >= record.circuitbreaker_failure_count_threshold
+ record.errors.add(attr, _("The circuitbreaker backoff threshold should be "\
+ "lower than the failure count threshold"))
+ end
+ end
+
SUPPORTED_KEY_TYPES.each do |type|
validates :"#{type}_key_restriction", presence: true, key_restriction: { type: type }
end
diff --git a/app/views/admin/application_settings/_form.html.haml b/app/views/admin/application_settings/_form.html.haml
index 2b23af9212e..3a4d5ce0b5c 100644
--- a/app/views/admin/application_settings/_form.html.haml
+++ b/app/views/admin/application_settings/_form.html.haml
@@ -533,11 +533,23 @@
%fieldset
%legend Git Storage Circuitbreaker settings
.form-group
- = f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2'
+ = f.label :circuitbreaker_access_retries, _('Number of access attempts'), class: 'control-label col-sm-2'
.col-sm-10
- = f.number_field :circuitbreaker_failure_count_threshold, class: 'form-control'
+ = f.number_field :circuitbreaker_access_retries, class: 'form-control'
.help-block
- = circuitbreaker_failure_count_help_text
+ = circuitbreaker_access_retries_help_text
+ .form-group
+ = f.label :circuitbreaker_storage_timeout, _('Seconds to wait for a storage access attempt'), class: 'control-label col-sm-2'
+ .col-sm-10
+ = f.number_field :circuitbreaker_storage_timeout, class: 'form-control'
+ .help-block
+ = circuitbreaker_storage_timeout_help_text
+ .form-group
+ = f.label :circuitbreaker_backoff_threshold, _('Number of failures before backing off'), class: 'control-label col-sm-2'
+ .col-sm-10
+ = f.number_field :circuitbreaker_backoff_threshold, class: 'form-control'
+ .help-block
+ = circuitbreaker_backoff_threshold_help_text
.form-group
= f.label :circuitbreaker_failure_wait_time, _('Seconds to wait after a storage failure'), class: 'control-label col-sm-2'
.col-sm-10
@@ -545,17 +557,17 @@
.help-block
= circuitbreaker_failure_wait_time_help_text
.form-group
- = f.label :circuitbreaker_failure_reset_time, _('Seconds before reseting failure information'), class: 'control-label col-sm-2'
+ = f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2'
.col-sm-10
- = f.number_field :circuitbreaker_failure_reset_time, class: 'form-control'
+ = f.number_field :circuitbreaker_failure_count_threshold, class: 'form-control'
.help-block
- = circuitbreaker_failure_reset_time_help_text
+ = circuitbreaker_failure_count_help_text
.form-group
- = f.label :circuitbreaker_storage_timeout, _('Seconds to wait for a storage access attempt'), class: 'control-label col-sm-2'
+ = f.label :circuitbreaker_failure_reset_time, _('Seconds before reseting failure information'), class: 'control-label col-sm-2'
.col-sm-10
- = f.number_field :circuitbreaker_storage_timeout, class: 'form-control'
+ = f.number_field :circuitbreaker_failure_reset_time, class: 'form-control'
.help-block
- = circuitbreaker_storage_timeout_help_text
+ = circuitbreaker_failure_reset_time_help_text
%fieldset
%legend Repository Checks
diff --git a/changelogs/unreleased/bvl-circuitbreaker-backoff.yml b/changelogs/unreleased/bvl-circuitbreaker-backoff.yml
new file mode 100644
index 00000000000..5cb90e7c085
--- /dev/null
+++ b/changelogs/unreleased/bvl-circuitbreaker-backoff.yml
@@ -0,0 +1,6 @@
+---
+title: Make the circuitbreaker more robust by adding higher thresholds, and multiple
+ access attempts.
+merge_request: 14933
+author:
+type: fixed
diff --git a/db/migrate/20171017145932_add_new_circuitbreaker_settings_to_application_settings.rb b/db/migrate/20171017145932_add_new_circuitbreaker_settings_to_application_settings.rb
new file mode 100644
index 00000000000..07eb25c0b0f
--- /dev/null
+++ b/db/migrate/20171017145932_add_new_circuitbreaker_settings_to_application_settings.rb
@@ -0,0 +1,16 @@
+class AddNewCircuitbreakerSettingsToApplicationSettings < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ def change
+ add_column :application_settings,
+ :circuitbreaker_access_retries,
+ :integer,
+ default: 3
+ add_column :application_settings,
+ :circuitbreaker_backoff_threshold,
+ :integer,
+ default: 80
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index d64e0fbf60a..d26481625c7 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20171012101043) do
+ActiveRecord::Schema.define(version: 20171017145932) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@@ -138,6 +138,8 @@ ActiveRecord::Schema.define(version: 20171012101043) do
t.integer "circuitbreaker_failure_wait_time", default: 30
t.integer "circuitbreaker_failure_reset_time", default: 1800
t.integer "circuitbreaker_storage_timeout", default: 30
+ t.integer "circuitbreaker_access_retries", default: 3
+ t.integer "circuitbreaker_backoff_threshold", default: 80
end
create_table "audit_events", force: :cascade do |t|
diff --git a/doc/administration/img/circuitbreaker_config.png b/doc/administration/img/circuitbreaker_config.png
index 9250d38297c..e811d173634 100644
--- a/doc/administration/img/circuitbreaker_config.png
+++ b/doc/administration/img/circuitbreaker_config.png
Binary files differ
diff --git a/doc/administration/repository_storage_paths.md b/doc/administration/repository_storage_paths.md
index efcabd69822..96f436fa7c3 100644
--- a/doc/administration/repository_storage_paths.md
+++ b/doc/administration/repository_storage_paths.md
@@ -109,6 +109,11 @@ This can be configured from the admin interface:
![circuitbreaker configuration](img/circuitbreaker_config.png)
+**Number of access attempts**: The number of attempts GitLab will make to access a
+storage when probing a shard.
+
+**Number of failures before backing off**: The number of failures after which
+GitLab will start temporarily disabling access to a storage shard on a host.
**Maximum git storage failures:** The number of failures of after which GitLab will
completely prevent access to the storage. The number of failures can be reset in
@@ -126,6 +131,15 @@ mount is reset.
**Seconds to wait for a storage access attempt:** The time in seconds GitLab will
try to access storage. After this time a timeout error will be raised.
+To enable the circuitbreaker for repository storage you can flip the feature flag from a rails console:
+
+```
+Feature.enable('git_storage_circuit_breaker')
+```
+
+Alternatively it can be enabled by setting `true` in the `GIT_STORAGE_CIRCUIT_BREAKER` environment variable.
+This approach would be used when enabling the circuit breaker on a single host.
+
When storage failures occur, this will be visible in the admin interface like this:
![failing storage](img/failing_storage.png)
diff --git a/doc/api/settings.md b/doc/api/settings.md
index 5ec96b8a6a2..5fce6ac7810 100644
--- a/doc/api/settings.md
+++ b/doc/api/settings.md
@@ -96,6 +96,8 @@ PUT /application/settings
| `dsa_key_restriction` | integer | no | The minimum allowed bit length of an uploaded DSA key. Default is `0` (no restriction). `-1` disables DSA keys.
| `ecdsa_key_restriction` | integer | no | The minimum allowed curve size (in bits) of an uploaded ECDSA key. Default is `0` (no restriction). `-1` disables ECDSA keys.
| `ed25519_key_restriction` | integer | no | The minimum allowed curve size (in bits) of an uploaded ED25519 key. Default is `0` (no restriction). `-1` disables ED25519 keys.
+| `circuitbreaker_access_retries | integer | no | The number of attempts GitLab will make to access a storage. |
+| `circuitbreaker_backoff_threshold | integer | no | The number of failures after which GitLab will start temporarily disabling access to a storage shard on a host. |
| `circuitbreaker_failure_count_threshold` | integer | no | The number of failures of after which GitLab will completely prevent access to the storage. |
| `circuitbreaker_failure_reset_time` | integer | no | Time in seconds GitLab will keep storage failure information. When no failures occur during this time, the failure information is reset. |
| `circuitbreaker_failure_wait_time` | integer | no | Time in seconds GitLab will block access to a failing storage to allow it to recover. |
diff --git a/lib/gitlab/git/storage.rb b/lib/gitlab/git/storage.rb
index 08e6c29abad..99518c9b1e4 100644
--- a/lib/gitlab/git/storage.rb
+++ b/lib/gitlab/git/storage.rb
@@ -12,6 +12,7 @@ module Gitlab
CircuitOpen = Class.new(Inaccessible)
Misconfiguration = Class.new(Inaccessible)
+ Failing = Class.new(Inaccessible)
REDIS_KEY_PREFIX = 'storage_accessible:'.freeze
diff --git a/lib/gitlab/git/storage/circuit_breaker.rb b/lib/gitlab/git/storage/circuit_breaker.rb
index 0456ad9a1f3..be7598ef011 100644
--- a/lib/gitlab/git/storage/circuit_breaker.rb
+++ b/lib/gitlab/git/storage/circuit_breaker.rb
@@ -54,7 +54,7 @@ module Gitlab
end
def perform
- return yield unless Feature.enabled?('git_storage_circuit_breaker')
+ return yield unless enabled?
check_storage_accessible!
@@ -64,10 +64,27 @@ module Gitlab
def circuit_broken?
return false if no_failures?
+ failure_count > failure_count_threshold
+ end
+
+ def backing_off?
+ return false if no_failures?
+
recent_failure = last_failure > failure_wait_time.seconds.ago
- too_many_failures = failure_count > failure_count_threshold
+ too_many_failures = failure_count > backoff_threshold
- recent_failure || too_many_failures
+ recent_failure && too_many_failures
+ end
+
+ private
+
+ # The circuitbreaker can be enabled for the entire fleet using a Feature
+ # flag.
+ #
+ # Enabling it for a single host can be done setting the
+ # `GIT_STORAGE_CIRCUIT_BREAKER` environment variable.
+ def enabled?
+ ENV['GIT_STORAGE_CIRCUIT_BREAKER'].present? || Feature.enabled?('git_storage_circuit_breaker')
end
def failure_info
@@ -83,7 +100,7 @@ module Gitlab
return @storage_available if @storage_available
if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck
- .storage_available?(storage_path, storage_timeout)
+ .storage_available?(storage_path, storage_timeout, access_retries)
track_storage_accessible
else
track_storage_inaccessible
@@ -94,7 +111,11 @@ module Gitlab
def check_storage_accessible!
if circuit_broken?
- raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_wait_time)
+ raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time)
+ end
+
+ if backing_off?
+ raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time)
end
unless storage_available?
@@ -131,12 +152,6 @@ module Gitlab
end
end
- def cache_key
- @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
- end
-
- private
-
def get_failure_info
last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, :last_failure, :failure_count)
@@ -146,6 +161,10 @@ module Gitlab
FailureInfo.new(last_failure, failure_count.to_i)
end
+
+ def cache_key
+ @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
+ end
end
end
end
diff --git a/lib/gitlab/git/storage/circuit_breaker_settings.rb b/lib/gitlab/git/storage/circuit_breaker_settings.rb
index d2313fe7c1b..257fe8cd8f0 100644
--- a/lib/gitlab/git/storage/circuit_breaker_settings.rb
+++ b/lib/gitlab/git/storage/circuit_breaker_settings.rb
@@ -18,6 +18,14 @@ module Gitlab
application_settings.circuitbreaker_storage_timeout
end
+ def access_retries
+ application_settings.circuitbreaker_access_retries
+ end
+
+ def backoff_threshold
+ application_settings.circuitbreaker_backoff_threshold
+ end
+
private
def application_settings
diff --git a/lib/gitlab/git/storage/forked_storage_check.rb b/lib/gitlab/git/storage/forked_storage_check.rb
index 91d8241f17b..1307f400700 100644
--- a/lib/gitlab/git/storage/forked_storage_check.rb
+++ b/lib/gitlab/git/storage/forked_storage_check.rb
@@ -4,8 +4,17 @@ module Gitlab
module ForkedStorageCheck
extend self
- def storage_available?(path, timeout_seconds = 5)
- status = timeout_check(path, timeout_seconds)
+ def storage_available?(path, timeout_seconds = 5, retries = 1)
+ partial_timeout = timeout_seconds / retries
+ status = timeout_check(path, partial_timeout)
+
+ # If the status check did not succeed the first time, we retry a few
+ # more times to avoid one-off failures
+ current_attempts = 1
+ while current_attempts < retries && !status.success?
+ status = timeout_check(path, partial_timeout)
+ current_attempts += 1
+ end
status.success?
end
diff --git a/lib/gitlab/git/storage/null_circuit_breaker.rb b/lib/gitlab/git/storage/null_circuit_breaker.rb
index 60c6791a7e4..a12d52d295f 100644
--- a/lib/gitlab/git/storage/null_circuit_breaker.rb
+++ b/lib/gitlab/git/storage/null_circuit_breaker.rb
@@ -25,6 +25,10 @@ module Gitlab
!!@error
end
+ def backing_off?
+ false
+ end
+
def last_failure
circuit_broken? ? Time.now : nil
end
diff --git a/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb b/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb
index c8d532df059..72dabca793a 100644
--- a/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb
+++ b/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb
@@ -79,7 +79,9 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
stub_application_setting(circuitbreaker_failure_count_threshold: 0,
circuitbreaker_failure_wait_time: 1,
circuitbreaker_failure_reset_time: 2,
- circuitbreaker_storage_timeout: 3)
+ circuitbreaker_storage_timeout: 3,
+ circuitbreaker_access_retries: 4,
+ circuitbreaker_backoff_threshold: 5)
end
describe '#failure_count_threshold' do
@@ -105,14 +107,43 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
expect(circuit_breaker.storage_timeout).to eq(3)
end
end
+
+ describe '#access_retries' do
+ it 'reads the value from settings' do
+ expect(circuit_breaker.access_retries).to eq(4)
+ end
+ end
+
+ describe '#backoff_threshold' do
+ it 'reads the value from settings' do
+ expect(circuit_breaker.backoff_threshold).to eq(5)
+ end
+ end
end
describe '#perform' do
- it 'raises an exception with retry time when the circuit is open' do
- allow(circuit_breaker).to receive(:circuit_broken?).and_return(true)
+ it 'raises the correct exception when the circuit is open' do
+ set_in_redis(:last_failure, 1.day.ago.to_f)
+ set_in_redis(:failure_count, 999)
expect { |b| circuit_breaker.perform(&b) }
- .to raise_error(Gitlab::Git::Storage::CircuitOpen)
+ .to raise_error do |exception|
+ expect(exception).to be_kind_of(Gitlab::Git::Storage::CircuitOpen)
+ expect(exception.retry_after).to eq(1800)
+ end
+ end
+
+ it 'raises the correct exception when backing off' do
+ Timecop.freeze do
+ set_in_redis(:last_failure, 1.second.ago.to_f)
+ set_in_redis(:failure_count, 90)
+
+ expect { |b| circuit_breaker.perform(&b) }
+ .to raise_error do |exception|
+ expect(exception).to be_kind_of(Gitlab::Git::Storage::Failing)
+ expect(exception.retry_after).to eq(30)
+ end
+ end
end
it 'yields the block' do
@@ -122,6 +153,7 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
it 'checks if the storage is available' do
expect(circuit_breaker).to receive(:check_storage_accessible!)
+ .and_call_original
circuit_breaker.perform { 'hello world' }
end
@@ -137,201 +169,124 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
.to raise_error(Rugged::OSError)
end
- context 'with the feature disabled' do
- it 'returns the block without checking accessibility' do
- stub_feature_flags(git_storage_circuit_breaker: false)
-
- expect(circuit_breaker).not_to receive(:circuit_broken?)
+ it 'tracks that the storage was accessible' do
+ set_in_redis(:failure_count, 10)
+ set_in_redis(:last_failure, Time.now.to_f)
- result = circuit_breaker.perform { 'hello' }
+ circuit_breaker.perform { '' }
- expect(result).to eq('hello')
- end
+ expect(value_from_redis(:failure_count).to_i).to eq(0)
+ expect(value_from_redis(:last_failure)).to be_empty
+ expect(circuit_breaker.failure_count).to eq(0)
+ expect(circuit_breaker.last_failure).to be_nil
end
- end
- describe '#circuit_broken?' do
- it 'is working when there is no last failure' do
- set_in_redis(:last_failure, nil)
- set_in_redis(:failure_count, 0)
+ it 'only performs the accessibility check once' do
+ expect(Gitlab::Git::Storage::ForkedStorageCheck)
+ .to receive(:storage_available?).once.and_call_original
- expect(circuit_breaker.circuit_broken?).to be_falsey
+ 2.times { circuit_breaker.perform { '' } }
end
- it 'is broken when there was a recent failure' do
- Timecop.freeze do
- set_in_redis(:last_failure, 1.second.ago.to_f)
- set_in_redis(:failure_count, 1)
+ it 'calls the check with the correct arguments' do
+ stub_application_setting(circuitbreaker_storage_timeout: 30,
+ circuitbreaker_access_retries: 3)
- expect(circuit_breaker.circuit_broken?).to be_truthy
- end
- end
+ expect(Gitlab::Git::Storage::ForkedStorageCheck)
+ .to receive(:storage_available?).with(TestEnv.repos_path, 30, 3)
+ .and_call_original
- it 'is broken when there are too many failures' do
- set_in_redis(:last_failure, 1.day.ago.to_f)
- set_in_redis(:failure_count, 200)
-
- expect(circuit_breaker.circuit_broken?).to be_truthy
+ circuit_breaker.perform { '' }
end
- context 'the `failure_wait_time` is set to 0' do
+ context 'with the feature disabled' do
before do
- stub_application_setting(circuitbreaker_failure_wait_time: 0)
- end
-
- it 'is working even when there is a recent failure' do
- Timecop.freeze do
- set_in_redis(:last_failure, 0.seconds.ago.to_f)
- set_in_redis(:failure_count, 1)
-
- expect(circuit_breaker.circuit_broken?).to be_falsey
- end
+ stub_feature_flags(git_storage_circuit_breaker: false)
end
- end
- end
- describe "storage_available?" do
- context 'the storage is available' do
- it 'tracks that the storage was accessible an raises the error' do
- expect(circuit_breaker).to receive(:track_storage_accessible)
-
- circuit_breaker.storage_available?
- end
+ it 'returns the block without checking accessibility' do
+ expect(circuit_breaker).not_to receive(:check_storage_accessible!)
- it 'only performs the check once' do
- expect(Gitlab::Git::Storage::ForkedStorageCheck)
- .to receive(:storage_available?).once.and_call_original
+ result = circuit_breaker.perform { 'hello' }
- 2.times { circuit_breaker.storage_available? }
+ expect(result).to eq('hello')
end
- end
-
- context 'storage is not available' do
- let(:storage_name) { 'broken' }
-
- it 'tracks that the storage was inaccessible' do
- expect(circuit_breaker).to receive(:track_storage_inaccessible)
- circuit_breaker.storage_available?
- end
- end
- end
+ it 'allows enabling the feature using an ENV var' do
+ stub_env('GIT_STORAGE_CIRCUIT_BREAKER', 'true')
+ expect(circuit_breaker).to receive(:check_storage_accessible!)
- describe '#check_storage_accessible!' do
- it 'raises an exception with retry time when the circuit is open' do
- allow(circuit_breaker).to receive(:circuit_broken?).and_return(true)
+ result = circuit_breaker.perform { 'hello' }
- expect { circuit_breaker.check_storage_accessible! }
- .to raise_error do |exception|
- expect(exception).to be_kind_of(Gitlab::Git::Storage::CircuitOpen)
- expect(exception.retry_after).to eq(30)
+ expect(result).to eq('hello')
end
end
context 'the storage is not available' do
let(:storage_name) { 'broken' }
- it 'raises an error' do
+ it 'raises the correct exception' do
expect(circuit_breaker).to receive(:track_storage_inaccessible)
- expect { circuit_breaker.check_storage_accessible! }
+ expect { circuit_breaker.perform { '' } }
.to raise_error do |exception|
expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible)
expect(exception.retry_after).to eq(30)
end
end
- end
- end
-
- describe '#track_storage_inaccessible' do
- around do |example|
- Timecop.freeze { example.run }
- end
-
- it 'records the failure time in redis' do
- circuit_breaker.track_storage_inaccessible
-
- failure_time = value_from_redis(:last_failure)
- expect(Time.at(failure_time.to_i)).to be_within(1.second).of(Time.now)
- end
-
- it 'sets the failure time on the breaker without reloading' do
- circuit_breaker.track_storage_inaccessible
-
- expect(circuit_breaker).not_to receive(:get_failure_info)
- expect(circuit_breaker.last_failure).to eq(Time.now)
- end
-
- it 'increments the failure count in redis' do
- set_in_redis(:failure_count, 10)
-
- circuit_breaker.track_storage_inaccessible
-
- expect(value_from_redis(:failure_count).to_i).to be(11)
- end
-
- it 'increments the failure count on the breaker without reloading' do
- set_in_redis(:failure_count, 10)
-
- circuit_breaker.track_storage_inaccessible
+ it 'tracks that the storage was inaccessible' do
+ Timecop.freeze do
+ expect { circuit_breaker.perform { '' } }.to raise_error(Gitlab::Git::Storage::Inaccessible)
- expect(circuit_breaker).not_to receive(:get_failure_info)
- expect(circuit_breaker.failure_count).to eq(11)
+ expect(value_from_redis(:failure_count).to_i).to eq(1)
+ expect(value_from_redis(:last_failure)).not_to be_empty
+ expect(circuit_breaker.failure_count).to eq(1)
+ expect(circuit_breaker.last_failure).to be_within(1.second).of(Time.now)
+ end
+ end
end
end
- describe '#track_storage_accessible' do
- it 'sets the failure count to zero in redis' do
- set_in_redis(:failure_count, 10)
-
- circuit_breaker.track_storage_accessible
-
- expect(value_from_redis(:failure_count).to_i).to be(0)
- end
-
- it 'sets the failure count to zero on the breaker without reloading' do
- set_in_redis(:failure_count, 10)
-
- circuit_breaker.track_storage_accessible
+ describe '#circuit_broken?' do
+ it 'is working when there is no last failure' do
+ set_in_redis(:last_failure, nil)
+ set_in_redis(:failure_count, 0)
- expect(circuit_breaker).not_to receive(:get_failure_info)
- expect(circuit_breaker.failure_count).to eq(0)
+ expect(circuit_breaker.circuit_broken?).to be_falsey
end
- it 'removes the last failure time from redis' do
- set_in_redis(:last_failure, Time.now.to_i)
-
- circuit_breaker.track_storage_accessible
+ it 'is broken when there are too many failures' do
+ set_in_redis(:last_failure, 1.day.ago.to_f)
+ set_in_redis(:failure_count, 200)
- expect(circuit_breaker).not_to receive(:get_failure_info)
- expect(circuit_breaker.last_failure).to be_nil
+ expect(circuit_breaker.circuit_broken?).to be_truthy
end
+ end
- it 'removes the last failure time from the breaker without reloading' do
- set_in_redis(:last_failure, Time.now.to_i)
-
- circuit_breaker.track_storage_accessible
+ describe '#backing_off?' do
+ it 'is true when there was a recent failure' do
+ Timecop.freeze do
+ set_in_redis(:last_failure, 1.second.ago.to_f)
+ set_in_redis(:failure_count, 90)
- expect(value_from_redis(:last_failure)).to be_empty
+ expect(circuit_breaker.backing_off?).to be_truthy
+ end
end
- it 'wont connect to redis when there are no failures' do
- expect(Gitlab::Git::Storage.redis).to receive(:with).once
- .and_call_original
- expect(circuit_breaker).to receive(:track_storage_accessible)
- .and_call_original
-
- circuit_breaker.track_storage_accessible
- end
- end
+ context 'the `failure_wait_time` is set to 0' do
+ before do
+ stub_application_setting(circuitbreaker_failure_wait_time: 0)
+ end
- describe '#no_failures?' do
- it 'is false when a failure was tracked' do
- set_in_redis(:last_failure, Time.now.to_i)
- set_in_redis(:failure_count, 1)
+ it 'is working even when there are failures' do
+ Timecop.freeze do
+ set_in_redis(:last_failure, 0.seconds.ago.to_f)
+ set_in_redis(:failure_count, 90)
- expect(circuit_breaker.no_failures?).to be_falsey
+ expect(circuit_breaker.backing_off?).to be_falsey
+ end
+ end
end
end
@@ -351,10 +306,4 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
expect(circuit_breaker.failure_count).to eq(7)
end
end
-
- describe '#cache_key' do
- it 'includes storage and host' do
- expect(circuit_breaker.cache_key).to eq(cache_key)
- end
- end
end
diff --git a/spec/lib/gitlab/git/storage/forked_storage_check_spec.rb b/spec/lib/gitlab/git/storage/forked_storage_check_spec.rb
index c708b15853a..39a5d020bb4 100644
--- a/spec/lib/gitlab/git/storage/forked_storage_check_spec.rb
+++ b/spec/lib/gitlab/git/storage/forked_storage_check_spec.rb
@@ -33,6 +33,21 @@ describe Gitlab::Git::Storage::ForkedStorageCheck, broken_storage: true, skip_da
expect(runtime).to be < 1.0
end
+ it 'will try the specified amount of times before failing' do
+ allow(described_class).to receive(:check_filesystem_in_process) do
+ Process.spawn("sleep 10")
+ end
+
+ expect(Process).to receive(:spawn).with('sleep 10').twice
+ .and_call_original
+
+ runtime = Benchmark.realtime do
+ described_class.storage_available?(existing_path, 0.5, 2)
+ end
+
+ expect(runtime).to be < 1.0
+ end
+
describe 'when using paths with spaces' do
let(:test_dir) { Rails.root.join('tmp', 'tests', 'storage_check') }
let(:path_with_spaces) { File.join(test_dir, 'path with spaces') }
diff --git a/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb b/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb
index 7ee6d2f3709..5db37f55e03 100644
--- a/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb
+++ b/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb
@@ -65,17 +65,6 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do
ours = described_class.public_instance_methods
theirs = Gitlab::Git::Storage::CircuitBreaker.public_instance_methods
- # These methods are not part of the public API, but are public to allow the
- # CircuitBreaker specs to operate. They should be made private over time.
- exceptions = %i[
- cache_key
- check_storage_accessible!
- no_failures?
- storage_available?
- track_storage_accessible
- track_storage_inaccessible
- ]
-
- expect(theirs - ours).to contain_exactly(*exceptions)
+ expect(theirs - ours).to be_empty
end
end
diff --git a/spec/models/application_setting_spec.rb b/spec/models/application_setting_spec.rb
index 30495fd4f5e..47b7150d36f 100644
--- a/spec/models/application_setting_spec.rb
+++ b/spec/models/application_setting_spec.rb
@@ -115,7 +115,8 @@ describe ApplicationSetting do
end
context 'circuitbreaker settings' do
- [:circuitbreaker_failure_count_threshold,
+ [:circuitbreaker_backoff_threshold,
+ :circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_wait_time,
:circuitbreaker_failure_reset_time,
:circuitbreaker_storage_timeout].each do |field|
@@ -125,6 +126,16 @@ describe ApplicationSetting do
.is_greater_than_or_equal_to(0)
end
end
+
+ it 'requires the `backoff_threshold` to be lower than the `failure_count_threshold`' do
+ setting.circuitbreaker_failure_count_threshold = 10
+ setting.circuitbreaker_backoff_threshold = 15
+ failure_message = "The circuitbreaker backoff threshold should be lower "\
+ "than the failure count threshold"
+
+ expect(setting).not_to be_valid
+ expect(setting.errors[:circuitbreaker_backoff_threshold]).to include(failure_message)
+ end
end
context 'repository storages' do