# frozen_string_literal: true require 'spec_helper' RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do context 'watchdog' do let(:configuration) { instance_double(described_class::Configuration) } let(:handler) { instance_double(described_class::NullHandler) } let(:logger) { instance_double(::Logger) } let(:sleep_time_seconds) { 60 } let(:threshold_violated) { false } let(:violations_counter) { instance_double(::Prometheus::Client::Counter) } let(:violations_handled_counter) { instance_double(::Prometheus::Client::Counter) } let(:watchdog_iterations) { 1 } let(:name) { :monitor_name } let(:payload) { { message: 'dummy_text' } } let(:max_strikes) { 2 } let(:monitor_class) do Struct.new(:threshold_violated, :payload) do def call { threshold_violated: threshold_violated, payload: payload } end def self.name 'MonitorName' end end end subject(:watchdog) do described_class.new.tap do |instance| # We need to defuse `sleep` and stop the internal loop after 1 iteration iterations = 0 allow(instance).to receive(:sleep) do instance.stop if (iterations += 1) > watchdog_iterations end end end def stub_prometheus_metrics allow(Gitlab::Metrics).to receive(:counter) .with(:gitlab_memwd_violations_total, anything, anything) .and_return(violations_counter) allow(Gitlab::Metrics).to receive(:counter) .with(:gitlab_memwd_violations_handled_total, anything, anything) .and_return(violations_handled_counter) allow(violations_counter).to receive(:increment) allow(violations_handled_counter).to receive(:increment) end describe '#initialize' do it 'initialize new configuration' do expect(described_class::Configuration).to receive(:new) watchdog end end describe '#call' do before do stub_prometheus_metrics allow(Gitlab::Metrics::System).to receive(:memory_usage_rss).at_least(:once).and_return(1024) allow(::Prometheus::PidProvider).to receive(:worker_id).and_return('worker_1') watchdog.configure do |config| config.handler = handler config.logger = logger config.sleep_time_seconds = sleep_time_seconds config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes end allow(handler).to receive(:call).and_return(true) allow(logger).to receive(:info) allow(logger).to receive(:warn) end it 'logs start message once' do expect(logger).to receive(:info).once .with( pid: Process.pid, worker_id: 'worker_1', memwd_handler_class: handler.class.name, memwd_sleep_time_s: sleep_time_seconds, memwd_rss_bytes: 1024, message: 'started') watchdog.call end it 'waits for check interval seconds' do expect(watchdog).to receive(:sleep).with(sleep_time_seconds) watchdog.call end context 'when gitlab_memory_watchdog ops toggle is off' do before do stub_feature_flags(gitlab_memory_watchdog: false) end it 'does not trigger any monitor' do expect(configuration).not_to receive(:monitors) end end context 'when process does not exceed threshold' do it 'does not increment violations counters' do expect(violations_counter).not_to receive(:increment) expect(violations_handled_counter).not_to receive(:increment) watchdog.call end it 'does not log violation' do expect(logger).not_to receive(:warn) watchdog.call end it 'does not execute handler' do expect(handler).not_to receive(:call) watchdog.call end end context 'when process exceeds threshold' do let(:threshold_violated) { true } it 'increments violations counter' do expect(violations_counter).to receive(:increment).with(reason: name) watchdog.call end context 'when process does not exceed the allowed number of strikes' do it 'does not increment handled violations counter' do expect(violations_handled_counter).not_to receive(:increment) watchdog.call end it 'does not log violation' do expect(logger).not_to receive(:warn) watchdog.call end it 'does not execute handler' do expect(handler).not_to receive(:call) watchdog.call end end context 'when monitor exceeds the allowed number of strikes' do let(:max_strikes) { 0 } it 'increments handled violations counter' do expect(violations_handled_counter).to receive(:increment).with(reason: name) watchdog.call end it 'logs violation' do expect(logger).to receive(:warn) .with( pid: Process.pid, worker_id: 'worker_1', memwd_handler_class: handler.class.name, memwd_sleep_time_s: sleep_time_seconds, memwd_rss_bytes: 1024, memwd_cur_strikes: 1, memwd_max_strikes: max_strikes, message: 'dummy_text') watchdog.call end it 'executes handler' do expect(handler).to receive(:call) watchdog.call end context 'when enforce_memory_watchdog ops toggle is off' do before do stub_feature_flags(enforce_memory_watchdog: false) end it 'always uses the NullHandler' do expect(handler).not_to receive(:call) expect(described_class::NullHandler.instance).to receive(:call).and_return(true) watchdog.call end end context 'when multiple monitors exceeds allowed number of strikes' do before do watchdog.configure do |config| config.handler = handler config.logger = logger config.sleep_time_seconds = sleep_time_seconds config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes end end it 'only calls the handler once' do expect(handler).to receive(:call).once.and_return(true) watchdog.call end end end end it 'logs stop message once' do expect(logger).to receive(:info).once .with( pid: Process.pid, worker_id: 'worker_1', memwd_handler_class: handler.class.name, memwd_sleep_time_s: sleep_time_seconds, memwd_rss_bytes: 1024, message: 'stopped') watchdog.call end end describe '#configure' do it 'yields block' do expect { |b| watchdog.configure(&b) }.to yield_control end end end context 'handlers' do context 'NullHandler' do subject(:handler) { described_class::NullHandler.instance } describe '#call' do it 'does nothing' do expect(handler.call).to be(false) end end end context 'TermProcessHandler' do subject(:handler) { described_class::TermProcessHandler.new(42) } describe '#call' do it 'sends SIGTERM to the current process' do expect(Process).to receive(:kill).with(:TERM, 42) expect(handler.call).to be(true) end end end context 'PumaHandler' do # rubocop: disable RSpec/VerifiedDoubles # In tests, the Puma constant is not loaded so we cannot make this an instance_double. let(:puma_worker_handle_class) { double('Puma::Cluster::WorkerHandle') } let(:puma_worker_handle) { double('worker') } # rubocop: enable RSpec/VerifiedDoubles subject(:handler) { described_class::PumaHandler.new({}) } before do stub_const('::Puma::Cluster::WorkerHandle', puma_worker_handle_class) end describe '#call' do it 'invokes orderly termination via Puma API' do expect(puma_worker_handle_class).to receive(:new).and_return(puma_worker_handle) expect(puma_worker_handle).to receive(:term) expect(handler.call).to be(true) end end end end end