diff options
Diffstat (limited to 'lib/gitlab/memory/reports/jemalloc_stats.rb')
-rw-r--r-- | lib/gitlab/memory/reports/jemalloc_stats.rb | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/lib/gitlab/memory/reports/jemalloc_stats.rb b/lib/gitlab/memory/reports/jemalloc_stats.rb new file mode 100644 index 00000000000..b99bec4ac3e --- /dev/null +++ b/lib/gitlab/memory/reports/jemalloc_stats.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +module Gitlab + module Memory + module Reports + class JemallocStats + # On prod, Jemalloc reports sizes were ~2.5 MB: + # https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/15993#note_1014767214 + # We configured 1GB emptyDir per pod: + # https://gitlab.com/gitlab-com/gl-infra/k8s-workloads/gitlab-com/-/merge_requests/1949 + # The pod will be evicted when the size limit is exceeded. We never want this to happen, for availability. + # + # With the default, we have a headroom (250*2.5MB=625<1000 MB) to fit into configured emptyDir. + # It would allow us to keep 3+ days worth of reports for 6 workers running every 2 hours: 3*6*12=216<250 + # + # The cleanup logic will be redundant after we'll implement the uploads, which would perform the cleanup. + DEFAULT_MAX_REPORTS_STORED = 250 + + def initialize(reports_path:) + @reports_path = reports_path + end + + def run + return unless active? + + Gitlab::Memory::Jemalloc.dump_stats(path: reports_path, filename_label: worker_id).tap { cleanup } + end + + def active? + Feature.enabled?(:report_jemalloc_stats, type: :ops) + end + + private + + attr_reader :reports_path + + def cleanup + reports_files_modified_order[0...-max_reports_stored].each do |f| + File.unlink(f) if File.exist?(f) + rescue Errno::ENOENT + # Path does not exist: Ignore. We already check `File.exist?` + # Rescue to be extra safe, because each worker could perform a cleanup + end + end + + def reports_files_modified_order + pattern = File.join(reports_path, "#{Gitlab::Memory::Jemalloc::FILENAME_PREFIX}*") + + Dir.glob(pattern).sort_by do |f| + test('M', f) + rescue Errno::ENOENT + # Path does not exist: Return any timestamp to proceed with the sort + Time.current + end + end + + def worker_id + ::Prometheus::PidProvider.worker_id + end + + def max_reports_stored + ENV["GITLAB_DIAGNOSTIC_REPORTS_JEMALLOC_MAX_REPORTS_STORED"] || DEFAULT_MAX_REPORTS_STORED + end + end + end + end +end |