Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/memory/watchdog.rb')
-rw-r--r--lib/gitlab/memory/watchdog.rb139
1 files changed, 103 insertions, 36 deletions
diff --git a/lib/gitlab/memory/watchdog.rb b/lib/gitlab/memory/watchdog.rb
index 91edb68ad66..38231fa933b 100644
--- a/lib/gitlab/memory/watchdog.rb
+++ b/lib/gitlab/memory/watchdog.rb
@@ -16,8 +16,9 @@ module Gitlab
# The duration for which a process may be above a given fragmentation
# threshold is computed as `max_strikes * sleep_time_seconds`.
class Watchdog
- DEFAULT_SLEEP_TIME_SECONDS = 60
- DEFAULT_HEAP_FRAG_THRESHOLD = 0.5
+ DEFAULT_SLEEP_TIME_SECONDS = 60 * 5
+ DEFAULT_MAX_HEAP_FRAG = 0.5
+ DEFAULT_MAX_MEM_GROWTH = 3.0
DEFAULT_MAX_STRIKES = 5
# This handler does nothing. It returns `false` to indicate to the
@@ -29,7 +30,7 @@ module Gitlab
class NullHandler
include Singleton
- def on_high_heap_fragmentation(value)
+ def call
# NOP
false
end
@@ -41,7 +42,7 @@ module Gitlab
@pid = pid
end
- def on_high_heap_fragmentation(value)
+ def call
Process.kill(:TERM, @pid)
true
end
@@ -55,7 +56,7 @@ module Gitlab
@worker = ::Puma::Cluster::WorkerHandle.new(0, $$, 0, puma_options)
end
- def on_high_heap_fragmentation(value)
+ def call
@worker.term
true
end
@@ -63,6 +64,9 @@ module Gitlab
# max_heap_fragmentation:
# The degree to which the Ruby heap is allowed to be fragmented. Range [0,1].
+ # max_mem_growth:
+ # A multiplier for how much excess private memory a worker can map compared to a reference process
+ # (itself or the primary in a pre-fork server.)
# max_strikes:
# How many times the process is allowed to be above max_heap_fragmentation before
# a handler is invoked.
@@ -71,7 +75,8 @@ module Gitlab
def initialize(
handler: NullHandler.instance,
logger: Logger.new($stdout),
- max_heap_fragmentation: ENV['GITLAB_MEMWD_MAX_HEAP_FRAG']&.to_f || DEFAULT_HEAP_FRAG_THRESHOLD,
+ max_heap_fragmentation: ENV['GITLAB_MEMWD_MAX_HEAP_FRAG']&.to_f || DEFAULT_MAX_HEAP_FRAG,
+ max_mem_growth: ENV['GITLAB_MEMWD_MAX_MEM_GROWTH']&.to_f || DEFAULT_MAX_MEM_GROWTH,
max_strikes: ENV['GITLAB_MEMWD_MAX_STRIKES']&.to_i || DEFAULT_MAX_STRIKES,
sleep_time_seconds: ENV['GITLAB_MEMWD_SLEEP_TIME_SEC']&.to_i || DEFAULT_SLEEP_TIME_SECONDS,
**options)
@@ -79,17 +84,37 @@ module Gitlab
@handler = handler
@logger = logger
- @max_heap_fragmentation = max_heap_fragmentation
@sleep_time_seconds = sleep_time_seconds
@max_strikes = max_strikes
+ @stats = {
+ heap_frag: {
+ max: max_heap_fragmentation,
+ strikes: 0
+ },
+ mem_growth: {
+ max: max_mem_growth,
+ strikes: 0
+ }
+ }
@alive = true
- @strikes = 0
init_prometheus_metrics(max_heap_fragmentation)
end
- attr_reader :strikes, :max_heap_fragmentation, :max_strikes, :sleep_time_seconds
+ attr_reader :max_strikes, :sleep_time_seconds
+
+ def max_heap_fragmentation
+ @stats[:heap_frag][:max]
+ end
+
+ def max_mem_growth
+ @stats[:mem_growth][:max]
+ end
+
+ def strikes(stat)
+ @stats[stat][:strikes]
+ end
def call
@logger.info(log_labels.merge(message: 'started'))
@@ -97,7 +122,10 @@ module Gitlab
while @alive
sleep(@sleep_time_seconds)
- monitor_heap_fragmentation if Feature.enabled?(:gitlab_memory_watchdog, type: :ops)
+ next unless Feature.enabled?(:gitlab_memory_watchdog, type: :ops)
+
+ monitor_heap_fragmentation
+ monitor_memory_growth
end
@logger.info(log_labels.merge(message: 'stopped'))
@@ -109,32 +137,73 @@ module Gitlab
private
- def monitor_heap_fragmentation
- heap_fragmentation = Gitlab::Metrics::Memory.gc_heap_fragmentation
+ def monitor_memory_condition(stat_key)
+ return unless @alive
+
+ stat = @stats[stat_key]
+
+ ok, labels = yield(stat)
- if heap_fragmentation > @max_heap_fragmentation
- @strikes += 1
- @heap_frag_violations.increment
+ if ok
+ stat[:strikes] = 0
else
- @strikes = 0
+ stat[:strikes] += 1
+ @counter_violations.increment(reason: stat_key.to_s)
end
- if @strikes > @max_strikes
- # If the handler returns true, it means the event is handled and we can shut down.
- @alive = !handle_heap_fragmentation_limit_exceeded(heap_fragmentation)
- @strikes = 0
+ if stat[:strikes] > @max_strikes
+ @alive = !memory_limit_exceeded_callback(stat_key, labels)
+ stat[:strikes] = 0
end
end
- def handle_heap_fragmentation_limit_exceeded(value)
- @logger.warn(
- log_labels.merge(
- message: 'heap fragmentation limit exceeded',
- memwd_cur_heap_frag: value
- ))
- @heap_frag_violations_handled.increment
+ def monitor_heap_fragmentation
+ monitor_memory_condition(:heap_frag) do |stat|
+ heap_fragmentation = Gitlab::Metrics::Memory.gc_heap_fragmentation
+ [
+ heap_fragmentation <= stat[:max],
+ {
+ message: 'heap fragmentation limit exceeded',
+ memwd_cur_heap_frag: heap_fragmentation,
+ memwd_max_heap_frag: stat[:max]
+ }
+ ]
+ end
+ end
+
+ def monitor_memory_growth
+ monitor_memory_condition(:mem_growth) do |stat|
+ worker_uss = Gitlab::Metrics::System.memory_usage_uss_pss[:uss]
+ reference_uss = reference_mem[:uss]
+ memory_limit = stat[:max] * reference_uss
+ [
+ worker_uss <= memory_limit,
+ {
+ message: 'memory limit exceeded',
+ memwd_uss_bytes: worker_uss,
+ memwd_ref_uss_bytes: reference_uss,
+ memwd_max_uss_bytes: memory_limit
+ }
+ ]
+ end
+ end
+
+ # On pre-fork systems this would be the primary process memory from which workers fork.
+ # Otherwise it is the current process' memory.
+ #
+ # We initialize this lazily because in the initializer the application may not have
+ # finished booting yet, which would yield an incorrect baseline.
+ def reference_mem
+ @reference_mem ||= Gitlab::Metrics::System.memory_usage_uss_pss(pid: Gitlab::Cluster::PRIMARY_PID)
+ end
+
+ def memory_limit_exceeded_callback(stat_key, handler_labels)
+ all_labels = log_labels.merge(handler_labels)
+ .merge(memwd_cur_strikes: strikes(stat_key))
+ @logger.warn(all_labels)
+ @counter_violations_handled.increment(reason: stat_key.to_s)
- handler.on_high_heap_fragmentation(value)
+ handler.call
end
def handler
@@ -151,9 +220,7 @@ module Gitlab
worker_id: worker_id,
memwd_handler_class: handler.class.name,
memwd_sleep_time_s: @sleep_time_seconds,
- memwd_max_heap_frag: @max_heap_fragmentation,
memwd_max_strikes: @max_strikes,
- memwd_cur_strikes: @strikes,
memwd_rss_bytes: process_rss_bytes
}
end
@@ -174,14 +241,14 @@ module Gitlab
@heap_frag_limit.set({}, max_heap_fragmentation)
default_labels = { pid: worker_id }
- @heap_frag_violations = Gitlab::Metrics.counter(
- :gitlab_memwd_heap_frag_violations_total,
- 'Total number of times heap fragmentation in a Ruby process exceeded its allowed maximum',
+ @counter_violations = Gitlab::Metrics.counter(
+ :gitlab_memwd_violations_total,
+ 'Total number of times a Ruby process violated a memory threshold',
default_labels
)
- @heap_frag_violations_handled = Gitlab::Metrics.counter(
- :gitlab_memwd_heap_frag_violations_handled_total,
- 'Total number of times heap fragmentation violations in a Ruby process were handled',
+ @counter_violations_handled = Gitlab::Metrics.counter(
+ :gitlab_memwd_violations_handled_total,
+ 'Total number of times Ruby process memory violations were handled',
default_labels
)
end