Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2022-07-20 18:08:38 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2022-07-20 18:08:38 +0300
commit93b6ee78bf98cbc42712b7c5486ab0e78adb339f (patch)
tree29efc441f06368d2ec05196fcba070ee37c765b7 /lib/gitlab/memory
parent96add3eb957ee4256910087070e27850dd61cfe9 (diff)
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'lib/gitlab/memory')
-rw-r--r--lib/gitlab/memory/jemalloc.rb16
-rw-r--r--lib/gitlab/memory/reports/jemalloc_stats.rb68
-rw-r--r--lib/gitlab/memory/reports_daemon.rb96
3 files changed, 175 insertions, 5 deletions
diff --git a/lib/gitlab/memory/jemalloc.rb b/lib/gitlab/memory/jemalloc.rb
index 454c54569de..fbe5ae656b9 100644
--- a/lib/gitlab/memory/jemalloc.rb
+++ b/lib/gitlab/memory/jemalloc.rb
@@ -14,6 +14,8 @@ module Gitlab
STATS_DEFAULT_FORMAT = :json
+ FILENAME_PREFIX = 'jemalloc_stats'
+
# Return jemalloc stats as a string.
def stats(format: STATS_DEFAULT_FORMAT)
verify_format!(format)
@@ -23,13 +25,17 @@ module Gitlab
end
end
- # Write jemalloc stats to the given directory.
- def dump_stats(path:, format: STATS_DEFAULT_FORMAT)
+ # Write jemalloc stats to the given directory
+ # @param [String] path Directory path the dump will be put into
+ # @param [String] format `json` or `txt`
+ # @param [String] filename_label Optional custom string that will be injected into the file name, e.g. `worker_0`
+ # @return [void]
+ def dump_stats(path:, format: STATS_DEFAULT_FORMAT, filename_label: nil)
verify_format!(format)
with_malloc_stats_print do |stats_print|
format_settings = STATS_FORMATS[format]
- File.open(File.join(path, file_name(format_settings[:extension])), 'wb') do |io|
+ File.open(File.join(path, file_name(format_settings[:extension], filename_label)), 'wb') do |io|
write_stats(stats_print, io, format_settings)
end
end
@@ -80,8 +86,8 @@ module Gitlab
stats_print.call(callback, nil, format[:options])
end
- def file_name(extension)
- "jemalloc_stats.#{$$}.#{Time.current.to_i}.#{extension}"
+ def file_name(extension, filename_label)
+ [FILENAME_PREFIX, $$, filename_label, Time.current.to_i, extension].reject(&:blank?).join('.')
end
end
end
diff --git a/lib/gitlab/memory/reports/jemalloc_stats.rb b/lib/gitlab/memory/reports/jemalloc_stats.rb
new file mode 100644
index 00000000000..b3848d40770
--- /dev/null
+++ b/lib/gitlab/memory/reports/jemalloc_stats.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Memory
+ module Reports
+ class JemallocStats
+ # On prod, Jemalloc reports sizes were ~2.5 MB:
+ # https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/15993#note_1014767214
+ # We configured 1GB emptyDir per pod:
+ # https://gitlab.com/gitlab-com/gl-infra/k8s-workloads/gitlab-com/-/merge_requests/1949
+ # The pod will be evicted when the size limit is exceeded. We never want this to happen, for availability.
+ #
+ # With the default, we have a headroom (250*2.5MB=625<1000 MB) to fit into configured emptyDir.
+ # It would allow us to keep 3+ days worth of reports for 6 workers running every 2 hours: 3*6*12=216<250
+ #
+ # The cleanup logic will be redundant after we'll implement the uploads, which would perform the cleanup.
+ DEFAULT_MAX_REPORTS_STORED = 250
+
+ def initialize(reports_path:)
+ @reports_path = reports_path
+ end
+
+ def run
+ return unless active?
+
+ Gitlab::Memory::Jemalloc.dump_stats(path: reports_path, filename_label: worker_id)
+ cleanup
+ end
+
+ def active?
+ Feature.enabled?(:report_jemalloc_stats, type: :ops)
+ end
+
+ private
+
+ attr_reader :reports_path
+
+ def cleanup
+ reports_files_modified_order[0...-max_reports_stored].each do |f|
+ File.unlink(f) if File.exist?(f)
+ rescue Errno::ENOENT
+ # Path does not exist: Ignore. We already check `File.exist?`
+ # Rescue to be extra safe, because each worker could perform a cleanup
+ end
+ end
+
+ def reports_files_modified_order
+ pattern = File.join(reports_path, "#{Gitlab::Memory::Jemalloc::FILENAME_PREFIX}*")
+
+ Dir.glob(pattern).sort_by do |f|
+ test('M', f)
+ rescue Errno::ENOENT
+ # Path does not exist: Return any timestamp to proceed with the sort
+ Time.current
+ end
+ end
+
+ def worker_id
+ ::Prometheus::PidProvider.worker_id
+ end
+
+ def max_reports_stored
+ ENV["GITLAB_DIAGNOSTIC_REPORTS_JEMALLOC_MAX_REPORTS_STORED"] || DEFAULT_MAX_REPORTS_STORED
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/memory/reports_daemon.rb b/lib/gitlab/memory/reports_daemon.rb
new file mode 100644
index 00000000000..2b2e0915e72
--- /dev/null
+++ b/lib/gitlab/memory/reports_daemon.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Memory
+ class ReportsDaemon < Daemon
+ DEFAULT_SLEEP_S = 7200 # 2 hours
+ DEFAULT_SLEEP_MAX_DELTA_S = 600 # 0..10 minutes
+ DEFAULT_SLEEP_BETWEEN_REPORTS_S = 120 # 2 minutes
+
+ DEFAULT_REPORTS_PATH = '/tmp'
+
+ def initialize(**options)
+ super
+
+ @alive = true
+
+ @sleep_s =
+ ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_S']&.to_i || DEFAULT_SLEEP_S
+ @sleep_max_delta_s =
+ ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_MAX_DELTA_S']&.to_i || DEFAULT_SLEEP_MAX_DELTA_S
+ @sleep_between_reports_s =
+ ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_BETWEEN_REPORTS_S']&.to_i || DEFAULT_SLEEP_BETWEEN_REPORTS_S
+
+ @reports_path =
+ ENV["GITLAB_DIAGNOSTIC_REPORTS_PATH"] || DEFAULT_REPORTS_PATH
+
+ @reports = [Gitlab::Memory::Reports::JemallocStats.new(reports_path: reports_path)]
+
+ init_prometheus_metrics
+ end
+
+ attr_reader :sleep_s, :sleep_max_delta_s, :sleep_between_reports_s, :reports_path
+
+ def run_thread
+ while alive
+ sleep interval_with_jitter
+
+ reports.select(&:active?).each do |report|
+ tms = Benchmark.measure do
+ report.run
+ end
+
+ log_report(report_label(report), tms)
+ @report_duration_counter.increment({ report: report_label(report) }, tms.real.to_i)
+
+ sleep sleep_between_reports_s
+ end
+ end
+ end
+
+ private
+
+ attr_reader :alive, :reports
+
+ # Returns the sleep interval with a random adjustment.
+ # The random adjustment is put in place to ensure continued availability.
+ def interval_with_jitter
+ sleep_s + rand(sleep_max_delta_s)
+ end
+
+ def log_report(report_label, tms)
+ Gitlab::AppLogger.info(
+ message: 'finished',
+ pid: $$,
+ worker_id: worker_id,
+ report: report_label,
+ duration_s: tms.real.round(2),
+ cpu_s: tms.utime.round(2),
+ sys_cpu_s: tms.stime.round(2)
+ )
+ end
+
+ def worker_id
+ ::Prometheus::PidProvider.worker_id
+ end
+
+ def report_label(report)
+ report.class.to_s.demodulize.underscore
+ end
+
+ def stop_working
+ @alive = false
+ end
+
+ def init_prometheus_metrics
+ default_labels = { pid: worker_id }
+
+ @report_duration_counter = Gitlab::Metrics.counter(
+ :gitlab_diag_report_duration_seconds_total,
+ 'Total time elapsed for running diagnostic report',
+ default_labels
+ )
+ end
+ end
+ end
+end