Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2022-08-18 11:17:02 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2022-08-18 11:17:02 +0300
commitb39512ed755239198a9c294b6a45e65c05900235 (patch)
treed234a3efade1de67c46b9e5a38ce813627726aa7 /lib/gitlab/memory
parentd31474cf3b17ece37939d20082b07f6657cc79a9 (diff)
Add latest changes from gitlab-org/gitlab@15-3-stable-eev15.3.0-rc42
Diffstat (limited to 'lib/gitlab/memory')
-rw-r--r--lib/gitlab/memory/jemalloc.rb22
-rw-r--r--lib/gitlab/memory/reports/jemalloc_stats.rb67
-rw-r--r--lib/gitlab/memory/reports_daemon.rb106
-rw-r--r--lib/gitlab/memory/watchdog.rb18
4 files changed, 197 insertions, 16 deletions
diff --git a/lib/gitlab/memory/jemalloc.rb b/lib/gitlab/memory/jemalloc.rb
index 454c54569de..7163a70a5cb 100644
--- a/lib/gitlab/memory/jemalloc.rb
+++ b/lib/gitlab/memory/jemalloc.rb
@@ -14,6 +14,8 @@ module Gitlab
STATS_DEFAULT_FORMAT = :json
+ FILENAME_PREFIX = 'jemalloc_stats'
+
# Return jemalloc stats as a string.
def stats(format: STATS_DEFAULT_FORMAT)
verify_format!(format)
@@ -23,16 +25,24 @@ module Gitlab
end
end
- # Write jemalloc stats to the given directory.
- def dump_stats(path:, format: STATS_DEFAULT_FORMAT)
+ # Write jemalloc stats to the given directory
+ # @param [String] path Directory path the dump will be put into
+ # @param [String] format `json` or `txt`
+ # @param [String] filename_label Optional custom string that will be injected into the file name, e.g. `worker_0`
+ # @return [String] Full path to the resulting dump file
+ def dump_stats(path:, format: STATS_DEFAULT_FORMAT, filename_label: nil)
verify_format!(format)
+ format_settings = STATS_FORMATS[format]
+ file_path = File.join(path, file_name(format_settings[:extension], filename_label))
+
with_malloc_stats_print do |stats_print|
- format_settings = STATS_FORMATS[format]
- File.open(File.join(path, file_name(format_settings[:extension])), 'wb') do |io|
+ File.open(file_path, 'wb') do |io|
write_stats(stats_print, io, format_settings)
end
end
+
+ file_path
end
private
@@ -80,8 +90,8 @@ module Gitlab
stats_print.call(callback, nil, format[:options])
end
- def file_name(extension)
- "jemalloc_stats.#{$$}.#{Time.current.to_i}.#{extension}"
+ def file_name(extension, filename_label)
+ [FILENAME_PREFIX, $$, filename_label, Time.current.to_i, extension].reject(&:blank?).join('.')
end
end
end
diff --git a/lib/gitlab/memory/reports/jemalloc_stats.rb b/lib/gitlab/memory/reports/jemalloc_stats.rb
new file mode 100644
index 00000000000..b99bec4ac3e
--- /dev/null
+++ b/lib/gitlab/memory/reports/jemalloc_stats.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Memory
+ module Reports
+ class JemallocStats
+ # On prod, Jemalloc reports sizes were ~2.5 MB:
+ # https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/15993#note_1014767214
+ # We configured 1GB emptyDir per pod:
+ # https://gitlab.com/gitlab-com/gl-infra/k8s-workloads/gitlab-com/-/merge_requests/1949
+ # The pod will be evicted when the size limit is exceeded. We never want this to happen, for availability.
+ #
+ # With the default, we have a headroom (250*2.5MB=625<1000 MB) to fit into configured emptyDir.
+ # It would allow us to keep 3+ days worth of reports for 6 workers running every 2 hours: 3*6*12=216<250
+ #
+ # The cleanup logic will be redundant after we'll implement the uploads, which would perform the cleanup.
+ DEFAULT_MAX_REPORTS_STORED = 250
+
+ def initialize(reports_path:)
+ @reports_path = reports_path
+ end
+
+ def run
+ return unless active?
+
+ Gitlab::Memory::Jemalloc.dump_stats(path: reports_path, filename_label: worker_id).tap { cleanup }
+ end
+
+ def active?
+ Feature.enabled?(:report_jemalloc_stats, type: :ops)
+ end
+
+ private
+
+ attr_reader :reports_path
+
+ def cleanup
+ reports_files_modified_order[0...-max_reports_stored].each do |f|
+ File.unlink(f) if File.exist?(f)
+ rescue Errno::ENOENT
+ # Path does not exist: Ignore. We already check `File.exist?`
+ # Rescue to be extra safe, because each worker could perform a cleanup
+ end
+ end
+
+ def reports_files_modified_order
+ pattern = File.join(reports_path, "#{Gitlab::Memory::Jemalloc::FILENAME_PREFIX}*")
+
+ Dir.glob(pattern).sort_by do |f|
+ test('M', f)
+ rescue Errno::ENOENT
+ # Path does not exist: Return any timestamp to proceed with the sort
+ Time.current
+ end
+ end
+
+ def worker_id
+ ::Prometheus::PidProvider.worker_id
+ end
+
+ def max_reports_stored
+ ENV["GITLAB_DIAGNOSTIC_REPORTS_JEMALLOC_MAX_REPORTS_STORED"] || DEFAULT_MAX_REPORTS_STORED
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/memory/reports_daemon.rb b/lib/gitlab/memory/reports_daemon.rb
new file mode 100644
index 00000000000..ed1da8baab5
--- /dev/null
+++ b/lib/gitlab/memory/reports_daemon.rb
@@ -0,0 +1,106 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Memory
+ class ReportsDaemon < Daemon
+ DEFAULT_SLEEP_S = 7200 # 2 hours
+ DEFAULT_SLEEP_MAX_DELTA_S = 600 # 0..10 minutes
+ DEFAULT_SLEEP_BETWEEN_REPORTS_S = 120 # 2 minutes
+
+ DEFAULT_REPORTS_PATH = '/tmp'
+
+ def initialize(**options)
+ super
+
+ @alive = true
+
+ @sleep_s =
+ ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_S']&.to_i || DEFAULT_SLEEP_S
+ @sleep_max_delta_s =
+ ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_MAX_DELTA_S']&.to_i || DEFAULT_SLEEP_MAX_DELTA_S
+ @sleep_between_reports_s =
+ ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_BETWEEN_REPORTS_S']&.to_i || DEFAULT_SLEEP_BETWEEN_REPORTS_S
+
+ @reports_path =
+ ENV["GITLAB_DIAGNOSTIC_REPORTS_PATH"] || DEFAULT_REPORTS_PATH
+
+ @reports = [Gitlab::Memory::Reports::JemallocStats.new(reports_path: reports_path)]
+
+ init_prometheus_metrics
+ end
+
+ attr_reader :sleep_s, :sleep_max_delta_s, :sleep_between_reports_s, :reports_path
+
+ def run_thread
+ while alive
+ sleep interval_with_jitter
+
+ reports.select(&:active?).each do |report|
+ start_monotonic_time = Gitlab::Metrics::System.monotonic_time
+ start_thread_cpu_time = Gitlab::Metrics::System.thread_cpu_time
+
+ file_path = report.run
+
+ cpu_s = Gitlab::Metrics::System.thread_cpu_duration(start_thread_cpu_time)
+ duration_s = Gitlab::Metrics::System.monotonic_time - start_monotonic_time
+
+ log_report(label: report_label(report), cpu_s: cpu_s, duration_s: duration_s, size: file_size(file_path))
+ @report_duration_counter.increment({ report: report_label(report) }, duration_s)
+
+ sleep sleep_between_reports_s
+ end
+ end
+ end
+
+ private
+
+ attr_reader :alive, :reports
+
+ # Returns the sleep interval with a random adjustment.
+ # The random adjustment is put in place to ensure continued availability.
+ def interval_with_jitter
+ sleep_s + rand(sleep_max_delta_s)
+ end
+
+ def log_report(label:, duration_s:, cpu_s:, size:)
+ Gitlab::AppLogger.info(
+ message: 'finished',
+ pid: $$,
+ worker_id: worker_id,
+ perf_report: label,
+ duration_s: duration_s.round(2),
+ cpu_s: cpu_s.round(2),
+ perf_report_size_bytes: size
+ )
+ end
+
+ def worker_id
+ ::Prometheus::PidProvider.worker_id
+ end
+
+ def report_label(report)
+ report.class.to_s.demodulize.underscore
+ end
+
+ def stop_working
+ @alive = false
+ end
+
+ def init_prometheus_metrics
+ default_labels = { pid: worker_id }
+
+ @report_duration_counter = Gitlab::Metrics.counter(
+ :gitlab_diag_report_duration_seconds_total,
+ 'Total time elapsed for running diagnostic report',
+ default_labels
+ )
+ end
+
+ def file_size(file_path)
+ File.size(file_path.to_s)
+ rescue Errno::ENOENT
+ 0
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/memory/watchdog.rb b/lib/gitlab/memory/watchdog.rb
index db75ba8a47d..91edb68ad66 100644
--- a/lib/gitlab/memory/watchdog.rb
+++ b/lib/gitlab/memory/watchdog.rb
@@ -15,7 +15,7 @@ module Gitlab
#
# The duration for which a process may be above a given fragmentation
# threshold is computed as `max_strikes * sleep_time_seconds`.
- class Watchdog < Daemon
+ class Watchdog
DEFAULT_SLEEP_TIME_SECONDS = 60
DEFAULT_HEAP_FRAG_THRESHOLD = 0.5
DEFAULT_MAX_STRIKES = 5
@@ -91,7 +91,7 @@ module Gitlab
attr_reader :strikes, :max_heap_fragmentation, :max_strikes, :sleep_time_seconds
- def run_thread
+ def call
@logger.info(log_labels.merge(message: 'started'))
while @alive
@@ -103,6 +103,10 @@ module Gitlab
@logger.info(log_labels.merge(message: 'stopped'))
end
+ def stop
+ @alive = false
+ end
+
private
def monitor_heap_fragmentation
@@ -141,10 +145,6 @@ module Gitlab
@handler
end
- def stop_working
- @alive = false
- end
-
def log_labels
{
pid: $$,
@@ -167,15 +167,13 @@ module Gitlab
end
def init_prometheus_metrics(max_heap_fragmentation)
- default_labels = { pid: worker_id }
-
@heap_frag_limit = Gitlab::Metrics.gauge(
:gitlab_memwd_heap_frag_limit,
- 'The configured limit for how fragmented the Ruby heap is allowed to be',
- default_labels
+ 'The configured limit for how fragmented the Ruby heap is allowed to be'
)
@heap_frag_limit.set({}, max_heap_fragmentation)
+ default_labels = { pid: worker_id }
@heap_frag_violations = Gitlab::Metrics.counter(
:gitlab_memwd_heap_frag_violations_total,
'Total number of times heap fragmentation in a Ruby process exceeded its allowed maximum',