Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/memory/watchdog.rb')
-rw-r--r--lib/gitlab/memory/watchdog.rb192
1 files changed, 192 insertions, 0 deletions
diff --git a/lib/gitlab/memory/watchdog.rb b/lib/gitlab/memory/watchdog.rb
new file mode 100644
index 00000000000..db75ba8a47d
--- /dev/null
+++ b/lib/gitlab/memory/watchdog.rb
@@ -0,0 +1,192 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Memory
+ # A background thread that observes Ruby heap fragmentation and calls
+ # into a handler when the Ruby heap has been fragmented for an extended
+ # period of time.
+ #
+ # See Gitlab::Metrics::Memory for how heap fragmentation is defined.
+ #
+ # To decide whether a given fragmentation level is being exceeded,
+ # the watchdog regularly polls the GC. Whenever a violation occurs
+ # a strike is issued. If the maximum number of strikes are reached,
+ # a handler is invoked to deal with the situation.
+ #
+ # The duration for which a process may be above a given fragmentation
+ # threshold is computed as `max_strikes * sleep_time_seconds`.
+ class Watchdog < Daemon
+ DEFAULT_SLEEP_TIME_SECONDS = 60
+ DEFAULT_HEAP_FRAG_THRESHOLD = 0.5
+ DEFAULT_MAX_STRIKES = 5
+
+ # This handler does nothing. It returns `false` to indicate to the
+ # caller that the situation has not been dealt with so it will
+ # receive calls repeatedly if fragmentation remains high.
+ #
+ # This is useful for "dress rehearsals" in production since it allows
+ # us to observe how frequently the handler is invoked before taking action.
+ class NullHandler
+ include Singleton
+
+ def on_high_heap_fragmentation(value)
+ # NOP
+ false
+ end
+ end
+
+ # This handler sends SIGTERM and considers the situation handled.
+ class TermProcessHandler
+ def initialize(pid = $$)
+ @pid = pid
+ end
+
+ def on_high_heap_fragmentation(value)
+ Process.kill(:TERM, @pid)
+ true
+ end
+ end
+
+ # This handler invokes Puma's graceful termination handler, which takes
+ # into account a configurable grace period during which a process may
+ # remain unresponsive to a SIGTERM.
+ class PumaHandler
+ def initialize(puma_options = ::Puma.cli_config.options)
+ @worker = ::Puma::Cluster::WorkerHandle.new(0, $$, 0, puma_options)
+ end
+
+ def on_high_heap_fragmentation(value)
+ @worker.term
+ true
+ end
+ end
+
+ # max_heap_fragmentation:
+ # The degree to which the Ruby heap is allowed to be fragmented. Range [0,1].
+ # max_strikes:
+ # How many times the process is allowed to be above max_heap_fragmentation before
+ # a handler is invoked.
+ # sleep_time_seconds:
+ # Used to control the frequency with which the watchdog will wake up and poll the GC.
+ def initialize(
+ handler: NullHandler.instance,
+ logger: Logger.new($stdout),
+ max_heap_fragmentation: ENV['GITLAB_MEMWD_MAX_HEAP_FRAG']&.to_f || DEFAULT_HEAP_FRAG_THRESHOLD,
+ max_strikes: ENV['GITLAB_MEMWD_MAX_STRIKES']&.to_i || DEFAULT_MAX_STRIKES,
+ sleep_time_seconds: ENV['GITLAB_MEMWD_SLEEP_TIME_SEC']&.to_i || DEFAULT_SLEEP_TIME_SECONDS,
+ **options)
+ super(**options)
+
+ @handler = handler
+ @logger = logger
+ @max_heap_fragmentation = max_heap_fragmentation
+ @sleep_time_seconds = sleep_time_seconds
+ @max_strikes = max_strikes
+
+ @alive = true
+ @strikes = 0
+
+ init_prometheus_metrics(max_heap_fragmentation)
+ end
+
+ attr_reader :strikes, :max_heap_fragmentation, :max_strikes, :sleep_time_seconds
+
+ def run_thread
+ @logger.info(log_labels.merge(message: 'started'))
+
+ while @alive
+ sleep(@sleep_time_seconds)
+
+ monitor_heap_fragmentation if Feature.enabled?(:gitlab_memory_watchdog, type: :ops)
+ end
+
+ @logger.info(log_labels.merge(message: 'stopped'))
+ end
+
+ private
+
+ def monitor_heap_fragmentation
+ heap_fragmentation = Gitlab::Metrics::Memory.gc_heap_fragmentation
+
+ if heap_fragmentation > @max_heap_fragmentation
+ @strikes += 1
+ @heap_frag_violations.increment
+ else
+ @strikes = 0
+ end
+
+ if @strikes > @max_strikes
+ # If the handler returns true, it means the event is handled and we can shut down.
+ @alive = !handle_heap_fragmentation_limit_exceeded(heap_fragmentation)
+ @strikes = 0
+ end
+ end
+
+ def handle_heap_fragmentation_limit_exceeded(value)
+ @logger.warn(
+ log_labels.merge(
+ message: 'heap fragmentation limit exceeded',
+ memwd_cur_heap_frag: value
+ ))
+ @heap_frag_violations_handled.increment
+
+ handler.on_high_heap_fragmentation(value)
+ end
+
+ def handler
+ # This allows us to keep the watchdog running but turn it into "friendly mode" where
+ # all that happens is we collect logs and Prometheus events for fragmentation violations.
+ return NullHandler.instance unless Feature.enabled?(:enforce_memory_watchdog, type: :ops)
+
+ @handler
+ end
+
+ def stop_working
+ @alive = false
+ end
+
+ def log_labels
+ {
+ pid: $$,
+ worker_id: worker_id,
+ memwd_handler_class: handler.class.name,
+ memwd_sleep_time_s: @sleep_time_seconds,
+ memwd_max_heap_frag: @max_heap_fragmentation,
+ memwd_max_strikes: @max_strikes,
+ memwd_cur_strikes: @strikes,
+ memwd_rss_bytes: process_rss_bytes
+ }
+ end
+
+ def worker_id
+ ::Prometheus::PidProvider.worker_id
+ end
+
+ def process_rss_bytes
+ Gitlab::Metrics::System.memory_usage_rss
+ end
+
+ def init_prometheus_metrics(max_heap_fragmentation)
+ default_labels = { pid: worker_id }
+
+ @heap_frag_limit = Gitlab::Metrics.gauge(
+ :gitlab_memwd_heap_frag_limit,
+ 'The configured limit for how fragmented the Ruby heap is allowed to be',
+ default_labels
+ )
+ @heap_frag_limit.set({}, max_heap_fragmentation)
+
+ @heap_frag_violations = Gitlab::Metrics.counter(
+ :gitlab_memwd_heap_frag_violations_total,
+ 'Total number of times heap fragmentation in a Ruby process exceeded its allowed maximum',
+ default_labels
+ )
+ @heap_frag_violations_handled = Gitlab::Metrics.counter(
+ :gitlab_memwd_heap_frag_violations_handled_total,
+ 'Total number of times heap fragmentation violations in a Ruby process were handled',
+ default_labels
+ )
+ end
+ end
+ end
+end