Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'spec/lib/gitlab/memory/watchdog_spec.rb')
-rw-r--r--spec/lib/gitlab/memory/watchdog_spec.rb396
1 files changed, 135 insertions, 261 deletions
diff --git a/spec/lib/gitlab/memory/watchdog_spec.rb b/spec/lib/gitlab/memory/watchdog_spec.rb
index beb49660022..84e9a577afb 100644
--- a/spec/lib/gitlab/memory/watchdog_spec.rb
+++ b/spec/lib/gitlab/memory/watchdog_spec.rb
@@ -1,35 +1,35 @@
# frozen_string_literal: true
require 'spec_helper'
-require_relative '../../../../lib/gitlab/cluster/lifecycle_events'
-RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures, :prometheus do
+RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do
context 'watchdog' do
- let(:logger) { instance_double(::Logger) }
+ let(:configuration) { instance_double(described_class::Configuration) }
let(:handler) { instance_double(described_class::NullHandler) }
-
- let(:heap_frag_limit_gauge) { instance_double(::Prometheus::Client::Gauge) }
+ let(:logger) { instance_double(::Logger) }
+ let(:sleep_time_seconds) { 60 }
+ let(:threshold_violated) { false }
let(:violations_counter) { instance_double(::Prometheus::Client::Counter) }
let(:violations_handled_counter) { instance_double(::Prometheus::Client::Counter) }
-
- let(:sleep_time) { 0.1 }
- let(:max_heap_fragmentation) { 0.2 }
- let(:max_mem_growth) { 2 }
-
- # Defaults that will not trigger any events.
- let(:fragmentation) { 0 }
- let(:worker_memory) { 0 }
- let(:primary_memory) { 0 }
- let(:max_strikes) { 0 }
-
- # Tests should set this to control the number of loop iterations in `call`.
let(:watchdog_iterations) { 1 }
+ let(:name) { :monitor_name }
+ let(:payload) { { message: 'dummy_text' } }
+ let(:max_strikes) { 2 }
+ let(:monitor_class) do
+ Struct.new(:threshold_violated, :payload) do
+ def call
+ { threshold_violated: threshold_violated, payload: payload }
+ end
+
+ def self.name
+ 'MonitorName'
+ end
+ end
+ end
subject(:watchdog) do
- described_class.new(handler: handler, logger: logger, sleep_time_seconds: sleep_time,
- max_strikes: max_strikes, max_mem_growth: max_mem_growth,
- max_heap_fragmentation: max_heap_fragmentation).tap do |instance|
- # We need to defuse `sleep` and stop the internal loop after N iterations.
+ described_class.new.tap do |instance|
+ # We need to defuse `sleep` and stop the internal loop after 1 iteration
iterations = 0
allow(instance).to receive(:sleep) do
instance.stop if (iterations += 1) > watchdog_iterations
@@ -38,9 +38,6 @@ RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures, :prometheus do
end
def stub_prometheus_metrics
- allow(Gitlab::Metrics).to receive(:gauge)
- .with(:gitlab_memwd_heap_frag_limit, anything)
- .and_return(heap_frag_limit_gauge)
allow(Gitlab::Metrics).to receive(:counter)
.with(:gitlab_memwd_violations_total, anything, anything)
.and_return(violations_counter)
@@ -48,318 +45,195 @@ RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures, :prometheus do
.with(:gitlab_memwd_violations_handled_total, anything, anything)
.and_return(violations_handled_counter)
- allow(heap_frag_limit_gauge).to receive(:set)
allow(violations_counter).to receive(:increment)
allow(violations_handled_counter).to receive(:increment)
end
- before do
- stub_prometheus_metrics
-
- allow(handler).to receive(:call).and_return(true)
-
- allow(logger).to receive(:warn)
- allow(logger).to receive(:info)
-
- allow(Gitlab::Metrics::Memory).to receive(:gc_heap_fragmentation).and_return(fragmentation)
- allow(Gitlab::Metrics::System).to receive(:memory_usage_uss_pss).and_return({ uss: worker_memory })
- allow(Gitlab::Metrics::System).to receive(:memory_usage_uss_pss).with(
- pid: Gitlab::Cluster::PRIMARY_PID
- ).and_return({ uss: primary_memory })
-
- allow(::Prometheus::PidProvider).to receive(:worker_id).and_return('worker_1')
- end
-
- context 'when created' do
- it 'sets the heap fragmentation limit gauge' do
- expect(heap_frag_limit_gauge).to receive(:set).with({}, max_heap_fragmentation)
+ describe '#initialize' do
+ it 'initialize new configuration' do
+ expect(described_class::Configuration).to receive(:new)
watchdog
end
-
- context 'when no settings are set in the environment' do
- it 'initializes with defaults' do
- watchdog = described_class.new(handler: handler, logger: logger)
-
- expect(watchdog.max_heap_fragmentation).to eq(described_class::DEFAULT_MAX_HEAP_FRAG)
- expect(watchdog.max_mem_growth).to eq(described_class::DEFAULT_MAX_MEM_GROWTH)
- expect(watchdog.max_strikes).to eq(described_class::DEFAULT_MAX_STRIKES)
- expect(watchdog.sleep_time_seconds).to eq(described_class::DEFAULT_SLEEP_TIME_SECONDS)
- end
- end
-
- context 'when settings are passed through the environment' do
- before do
- stub_env('GITLAB_MEMWD_MAX_HEAP_FRAG', 1)
- stub_env('GITLAB_MEMWD_MAX_STRIKES', 2)
- stub_env('GITLAB_MEMWD_SLEEP_TIME_SEC', 3)
- stub_env('GITLAB_MEMWD_MAX_MEM_GROWTH', 4)
- end
-
- it 'initializes with these settings' do
- watchdog = described_class.new(handler: handler, logger: logger)
-
- expect(watchdog.max_heap_fragmentation).to eq(1)
- expect(watchdog.max_strikes).to eq(2)
- expect(watchdog.sleep_time_seconds).to eq(3)
- expect(watchdog.max_mem_growth).to eq(4)
- end
- end
end
- shared_examples 'has strikes left' do |stat|
- context 'when process has not exceeded allowed number of strikes' do
- let(:watchdog_iterations) { max_strikes }
-
- it 'does not signal the handler' do
- expect(handler).not_to receive(:call)
-
- watchdog.call
- end
-
- it 'does not log any events' do
- expect(logger).not_to receive(:warn)
-
- watchdog.call
- end
-
- it 'increments the violations counter' do
- expect(violations_counter).to receive(:increment).with(reason: stat).exactly(watchdog_iterations)
-
- watchdog.call
+ describe '#call' do
+ before do
+ stub_prometheus_metrics
+ allow(Gitlab::Metrics::System).to receive(:memory_usage_rss).at_least(:once).and_return(1024)
+ allow(::Prometheus::PidProvider).to receive(:worker_id).and_return('worker_1')
+
+ watchdog.configure do |config|
+ config.handler = handler
+ config.logger = logger
+ config.sleep_time_seconds = sleep_time_seconds
+ config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes
end
- it 'does not increment violations handled counter' do
- expect(violations_handled_counter).not_to receive(:increment)
-
- watchdog.call
- end
+ allow(handler).to receive(:call).and_return(true)
+ allow(logger).to receive(:info)
+ allow(logger).to receive(:warn)
end
- end
- shared_examples 'no strikes left' do |stat|
- it 'signals the handler and resets strike counter' do
- expect(handler).to receive(:call).and_return(true)
+ it 'logs start message once' do
+ expect(logger).to receive(:info).once
+ .with(
+ pid: Process.pid,
+ worker_id: 'worker_1',
+ memwd_handler_class: handler.class.name,
+ memwd_sleep_time_s: sleep_time_seconds,
+ memwd_rss_bytes: 1024,
+ message: 'started')
watchdog.call
-
- expect(watchdog.strikes(stat.to_sym)).to eq(0)
end
- it 'increments both the violations and violations handled counters' do
- expect(violations_counter).to receive(:increment).with(reason: stat).exactly(watchdog_iterations)
- expect(violations_handled_counter).to receive(:increment).with(reason: stat)
+ it 'waits for check interval seconds' do
+ expect(watchdog).to receive(:sleep).with(sleep_time_seconds)
watchdog.call
end
- context 'when enforce_memory_watchdog ops toggle is off' do
+ context 'when gitlab_memory_watchdog ops toggle is off' do
before do
- stub_feature_flags(enforce_memory_watchdog: false)
+ stub_feature_flags(gitlab_memory_watchdog: false)
end
- it 'always uses the NullHandler' do
- expect(handler).not_to receive(:call)
- expect(described_class::NullHandler.instance).to receive(:call).and_return(true)
-
- watchdog.call
+ it 'does not trigger any monitor' do
+ expect(configuration).not_to receive(:monitors)
end
end
- context 'when handler result is true' do
- it 'considers the event handled and stops itself' do
- expect(handler).to receive(:call).once.and_return(true)
- expect(logger).to receive(:info).with(hash_including(message: 'stopped'))
+ context 'when process does not exceed threshold' do
+ it 'does not increment violations counters' do
+ expect(violations_counter).not_to receive(:increment)
+ expect(violations_handled_counter).not_to receive(:increment)
watchdog.call
end
- end
-
- context 'when handler result is false' do
- let(:max_strikes) { 0 } # to make sure the handler fires each iteration
- let(:watchdog_iterations) { 3 }
- it 'keeps running' do
- expect(violations_counter).to receive(:increment).exactly(watchdog_iterations)
- expect(violations_handled_counter).to receive(:increment).exactly(watchdog_iterations)
- # Return true the third time to terminate the daemon.
- expect(handler).to receive(:call).and_return(false, false, true)
+ it 'does not log violation' do
+ expect(logger).not_to receive(:warn)
watchdog.call
end
- end
- end
-
- context 'when monitoring memory growth' do
- let(:primary_memory) { 2048 }
-
- context 'when process does not exceed threshold' do
- let(:worker_memory) { max_mem_growth * primary_memory - 1 }
- it 'does not signal the handler' do
+ it 'does not execute handler' do
expect(handler).not_to receive(:call)
watchdog.call
end
end
- context 'when process exceeds threshold permanently' do
- let(:worker_memory) { max_mem_growth * primary_memory + 1 }
- let(:max_strikes) { 3 }
-
- it_behaves_like 'has strikes left', 'mem_growth'
+ context 'when process exceeds threshold' do
+ let(:threshold_violated) { true }
- context 'when process exceeds the allowed number of strikes' do
- let(:watchdog_iterations) { max_strikes + 1 }
+ it 'increments violations counter' do
+ expect(violations_counter).to receive(:increment).with(reason: name)
- it_behaves_like 'no strikes left', 'mem_growth'
+ watchdog.call
+ end
- it 'only reads reference memory once' do
- expect(Gitlab::Metrics::System).to receive(:memory_usage_uss_pss)
- .with(pid: Gitlab::Cluster::PRIMARY_PID)
- .once
+ context 'when process does not exceed the allowed number of strikes' do
+ it 'does not increment handled violations counter' do
+ expect(violations_handled_counter).not_to receive(:increment)
watchdog.call
end
- it 'logs the event' do
- expect(Gitlab::Metrics::System).to receive(:memory_usage_rss).at_least(:once).and_return(1024)
- expect(logger).to receive(:warn).with({
- message: 'memory limit exceeded',
- pid: Process.pid,
- worker_id: 'worker_1',
- memwd_handler_class: 'RSpec::Mocks::InstanceVerifyingDouble',
- memwd_sleep_time_s: sleep_time,
- memwd_max_uss_bytes: max_mem_growth * primary_memory,
- memwd_ref_uss_bytes: primary_memory,
- memwd_uss_bytes: worker_memory,
- memwd_rss_bytes: 1024,
- memwd_max_strikes: max_strikes,
- memwd_cur_strikes: max_strikes + 1
- })
+ it 'does not log violation' do
+ expect(logger).not_to receive(:warn)
watchdog.call
end
- end
- end
- context 'when process exceeds threshold temporarily' do
- let(:worker_memory) { max_mem_growth * primary_memory }
- let(:max_strikes) { 1 }
- let(:watchdog_iterations) { 4 }
+ it 'does not execute handler' do
+ expect(handler).not_to receive(:call)
- before do
- allow(Gitlab::Metrics::System).to receive(:memory_usage_uss_pss).and_return(
- { uss: worker_memory - 0.1 },
- { uss: worker_memory + 0.2 },
- { uss: worker_memory - 0.1 },
- { uss: worker_memory + 0.1 }
- )
- allow(Gitlab::Metrics::System).to receive(:memory_usage_uss_pss).with(
- pid: Gitlab::Cluster::PRIMARY_PID
- ).and_return({ uss: primary_memory })
+ watchdog.call
+ end
end
- it 'does not signal the handler' do
- expect(handler).not_to receive(:call)
+ context 'when monitor exceeds the allowed number of strikes' do
+ let(:max_strikes) { 0 }
- watchdog.call
- end
- end
- end
+ it 'increments handled violations counter' do
+ expect(violations_handled_counter).to receive(:increment).with(reason: name)
- context 'when monitoring heap fragmentation' do
- context 'when process does not exceed threshold' do
- let(:fragmentation) { max_heap_fragmentation - 0.1 }
-
- it 'does not signal the handler' do
- expect(handler).not_to receive(:call)
-
- watchdog.call
- end
- end
-
- context 'when process exceeds threshold permanently' do
- let(:fragmentation) { max_heap_fragmentation + 0.1 }
- let(:max_strikes) { 3 }
-
- it_behaves_like 'has strikes left', 'heap_frag'
+ watchdog.call
+ end
- context 'when process exceeds the allowed number of strikes' do
- let(:watchdog_iterations) { max_strikes + 1 }
+ it 'logs violation' do
+ expect(logger).to receive(:warn)
+ .with(
+ pid: Process.pid,
+ worker_id: 'worker_1',
+ memwd_handler_class: handler.class.name,
+ memwd_sleep_time_s: sleep_time_seconds,
+ memwd_rss_bytes: 1024,
+ memwd_cur_strikes: 1,
+ memwd_max_strikes: max_strikes,
+ message: 'dummy_text')
- it_behaves_like 'no strikes left', 'heap_frag'
+ watchdog.call
+ end
- it 'logs the event' do
- expect(Gitlab::Metrics::System).to receive(:memory_usage_rss).at_least(:once).and_return(1024)
- expect(logger).to receive(:warn).with({
- message: 'heap fragmentation limit exceeded',
- pid: Process.pid,
- worker_id: 'worker_1',
- memwd_handler_class: 'RSpec::Mocks::InstanceVerifyingDouble',
- memwd_sleep_time_s: sleep_time,
- memwd_max_heap_frag: max_heap_fragmentation,
- memwd_cur_heap_frag: fragmentation,
- memwd_max_strikes: max_strikes,
- memwd_cur_strikes: max_strikes + 1,
- memwd_rss_bytes: 1024
- })
+ it 'executes handler' do
+ expect(handler).to receive(:call)
watchdog.call
end
- end
- end
- context 'when process exceeds threshold temporarily' do
- let(:fragmentation) { max_heap_fragmentation }
- let(:max_strikes) { 1 }
- let(:watchdog_iterations) { 4 }
+ context 'when enforce_memory_watchdog ops toggle is off' do
+ before do
+ stub_feature_flags(enforce_memory_watchdog: false)
+ end
- before do
- allow(Gitlab::Metrics::Memory).to receive(:gc_heap_fragmentation).and_return(
- fragmentation - 0.1,
- fragmentation + 0.2,
- fragmentation - 0.1,
- fragmentation + 0.1
- )
- end
+ it 'always uses the NullHandler' do
+ expect(handler).not_to receive(:call)
+ expect(described_class::NullHandler.instance).to receive(:call).and_return(true)
- it 'does not signal the handler' do
- expect(handler).not_to receive(:call)
+ watchdog.call
+ end
+ end
- watchdog.call
+ context 'when multiple monitors exceeds allowed number of strikes' do
+ before do
+ watchdog.configure do |config|
+ config.handler = handler
+ config.logger = logger
+ config.sleep_time_seconds = sleep_time_seconds
+ config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes
+ config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes
+ end
+ end
+
+ it 'only calls the handler once' do
+ expect(handler).to receive(:call).once.and_return(true)
+
+ watchdog.call
+ end
+ end
end
end
- end
-
- context 'when both memory fragmentation and growth exceed thresholds' do
- let(:fragmentation) { max_heap_fragmentation + 0.1 }
- let(:primary_memory) { 2048 }
- let(:worker_memory) { max_mem_growth * primary_memory + 1 }
- let(:watchdog_iterations) { max_strikes + 1 }
- it 'only calls the handler once' do
- expect(handler).to receive(:call).once.and_return(true)
+ it 'logs stop message once' do
+ expect(logger).to receive(:info).once
+ .with(
+ pid: Process.pid,
+ worker_id: 'worker_1',
+ memwd_handler_class: handler.class.name,
+ memwd_sleep_time_s: sleep_time_seconds,
+ memwd_rss_bytes: 1024,
+ message: 'stopped')
watchdog.call
end
end
- context 'when gitlab_memory_watchdog ops toggle is off' do
- before do
- stub_feature_flags(gitlab_memory_watchdog: false)
- end
-
- it 'does not monitor heap fragmentation' do
- expect(Gitlab::Metrics::Memory).not_to receive(:gc_heap_fragmentation)
-
- watchdog.call
- end
-
- it 'does not monitor memory growth' do
- expect(Gitlab::Metrics::System).not_to receive(:memory_usage_uss_pss)
-
- watchdog.call
+ describe '#configure' do
+ it 'yields block' do
+ expect { |b| watchdog.configure(&b) }.to yield_control
end
end
end