1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
# frozen_string_literal: true
module Gitlab
module Memory
class Reporter
COMPRESS_CMD = %w[gzip --fast].freeze
attr_reader :reports_path
def initialize(reports_path: nil, logger: Gitlab::AppLogger)
@reports_path = reports_path || ENV["GITLAB_DIAGNOSTIC_REPORTS_PATH"] || Dir.mktmpdir
@logger = logger
@worker_id = ::Prometheus::PidProvider.worker_id
@worker_uuid = SecureRandom.uuid
init_prometheus_metrics
end
def run_report(report)
return false unless report.active?
@logger.info(
log_labels(
message: 'started',
perf_report: report.name
))
start_monotonic_time = Gitlab::Metrics::System.monotonic_time
start_thread_cpu_time = Gitlab::Metrics::System.thread_cpu_time
report_file = store_report(report)
cpu_s = Gitlab::Metrics::System.thread_cpu_duration(start_thread_cpu_time)
duration_s = Gitlab::Metrics::System.monotonic_time - start_monotonic_time
@logger.info(
log_labels(
message: 'finished',
perf_report: report.name,
cpu_s: cpu_s.round(2),
duration_s: duration_s.round(2),
perf_report_file: report_file,
perf_report_size_bytes: file_size(report_file)
))
@report_duration_counter.increment({ report: report.name }, duration_s)
true
rescue StandardError => e
@logger.error(
log_labels(
message: 'failed',
perf_report: report.name,
error: e.inspect
))
false
end
private
def store_report(report)
# Store report in tmp subdir while it is still streaming.
# This will clearly separate finished reports from the files we are still writing to.
tmp_dir = File.join(@reports_path, 'tmp')
FileUtils.mkdir_p(tmp_dir)
report_file = file_name(report)
tmp_file_path = File.join(tmp_dir, report_file)
write_heap_dump_file(report, tmp_file_path)
File.join(@reports_path, report_file).tap do |report_file_path|
FileUtils.mv(tmp_file_path, report_file_path)
end
end
def write_heap_dump_file(report, path)
io_r, io_w = IO.pipe
err_r, err_w = IO.pipe
pid = nil
status = nil
File.open(path, 'wb') do |file|
extras = {
in: io_r,
out: file,
err: err_w
}
pid = Process.spawn(*COMPRESS_CMD, **extras)
io_r.close
err_w.close
report.run(io_w)
io_w.close
_, status = Process.wait2(pid)
end
errors = err_r.read&.strip
err_r.close
raise StandardError, "exit #{status.exitstatus}: #{errors}" if !status&.success? && errors.present?
ensure
[io_r, io_w, err_r, err_w].each(&:close)
# Make sure we don't leave any running processes behind.
Gitlab::ProcessManagement.signal(pid, :KILL) if pid
end
def log_labels(**extra_labels)
{
pid: $$,
worker_id: @worker_id,
perf_report_worker_uuid: @worker_uuid
}.merge(extra_labels)
end
def file_name(report)
timestamp = Time.current.strftime('%Y-%m-%d.%H:%M:%S:%L')
report_id = [@worker_id, @worker_uuid].join(".")
[report.name, timestamp, report_id, 'gz'].compact_blank.join('.')
end
def file_size(file_path)
File.size(file_path.to_s)
rescue Errno::ENOENT
0
end
def init_prometheus_metrics
default_labels = { pid: @worker_id }
@report_duration_counter = Gitlab::Metrics.counter(
:gitlab_diag_report_duration_seconds_total,
'Total time elapsed for running diagnostic report',
default_labels
)
end
end
end
end
|