1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
# frozen_string_literal: true
PUMA_EXTERNAL_METRICS_SERVER = Gitlab::Utils.to_boolean(ENV['PUMA_EXTERNAL_METRICS_SERVER'])
require Rails.root.join('metrics_server', 'metrics_server') if PUMA_EXTERNAL_METRICS_SERVER
# Keep separate directories for separate processes
def prometheus_default_multiproc_dir
return unless Rails.env.development? || Rails.env.test?
if Gitlab::Runtime.sidekiq?
Rails.root.join('tmp/prometheus_multiproc_dir/sidekiq')
elsif Gitlab::Runtime.puma?
Rails.root.join('tmp/prometheus_multiproc_dir/puma')
else
Rails.root.join('tmp/prometheus_multiproc_dir')
end
end
def puma_metrics_server_process?
Prometheus::PidProvider.worker_id == 'puma_master'
end
def sidekiq_metrics_server_process?
Gitlab::Runtime.sidekiq? && (!ENV['SIDEKIQ_WORKER_ID'] || ENV['SIDEKIQ_WORKER_ID'] == '0')
end
if puma_metrics_server_process? || sidekiq_metrics_server_process?
# The following is necessary to ensure stale Prometheus metrics don't accumulate over time.
# It needs to be done as early as here to ensure metrics files aren't deleted.
# After we hit our app in `warmup`, first metrics and corresponding files already being created,
# for example in `lib/gitlab/metrics/requests_rack_middleware.rb`.
Prometheus::CleanupMultiprocDirService.new.execute
::Prometheus::Client.reinitialize_on_pid_change(force: true)
end
::Prometheus::Client.configure do |config|
config.logger = Gitlab::AppLogger
config.multiprocess_files_dir = ENV['prometheus_multiproc_dir'] || prometheus_default_multiproc_dir
config.pid_provider = ::Prometheus::PidProvider.method(:worker_id)
end
Gitlab::Application.configure do |config|
# 0 should be Sentry to catch errors in this middleware
config.middleware.insert_after(Labkit::Middleware::Rack, Gitlab::Metrics::RequestsRackMiddleware)
end
# Any actions beyond this check should only execute outside of tests, when running in an application
# context (i.e. not in the Rails console or rspec) and when users have enabled metrics.
return if Rails.env.test? || !Gitlab::Runtime.application? || !Gitlab::Metrics.prometheus_metrics_enabled?
if Gitlab::Runtime.sidekiq? && (!ENV['SIDEKIQ_WORKER_ID'] || ENV['SIDEKIQ_WORKER_ID'] == '0')
# The single worker outside of a sidekiq-cluster, or the first worker (sidekiq_0)
# in a cluster of processes, is responsible for serving health checks.
#
# Do not clean the metrics directory here - the supervisor script should
# have already taken care of that.
Sidekiq.configure_server do |config|
config.on(:startup) do
# In https://gitlab.com/gitlab-org/gitlab/-/issues/345804 we are looking to
# only serve health-checks from a worker process; for backwards compatibility
# we still go through the metrics exporter server, but start to configure it
# with the new settings keys.
exporter_settings = Settings.monitoring.sidekiq_health_checks
Gitlab::Metrics::Exporter::SidekiqExporter.instance(exporter_settings).start
end
end
end
Gitlab::Cluster::LifecycleEvents.on_master_start do
Gitlab::Metrics.gauge(:deployments, 'GitLab Version', {}, :max).set({ version: Gitlab::VERSION, revision: Gitlab.revision }, 1)
if Gitlab::Runtime.puma?
Gitlab::Metrics::Samplers::PumaSampler.instance.start
if PUMA_EXTERNAL_METRICS_SERVER && Settings.monitoring.web_exporter.enabled
MetricsServer.start_for_puma
else
Gitlab::Metrics::Exporter::WebExporter.instance.start
end
end
Gitlab::Ci::Parsers.instrument!
rescue IOError => e
Gitlab::ErrorTracking.track_exception(e)
Gitlab::Metrics.error_detected!
end
Gitlab::Cluster::LifecycleEvents.on_worker_start do
defined?(::Prometheus::Client.reinitialize_on_pid_change) && ::Prometheus::Client.reinitialize_on_pid_change
logger = Gitlab::AppLogger
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(logger: logger).start
Gitlab::Metrics::Samplers::DatabaseSampler.initialize_instance(logger: logger).start
Gitlab::Metrics::Samplers::ThreadsSampler.initialize_instance(logger: logger).start
if Gitlab::Runtime.puma?
# Since we are observing a metrics server from the Puma primary, we would inherit
# this supervision thread after forking into workers, so we need to explicitly stop it here.
if PUMA_EXTERNAL_METRICS_SERVER
::MetricsServer::PumaProcessSupervisor.instance.stop
else
Gitlab::Metrics::Exporter::WebExporter.instance.stop
end
Gitlab::Metrics::Samplers::ActionCableSampler.instance(logger: logger).start
end
if Gitlab.ee? && Gitlab::Runtime.sidekiq?
Gitlab::Metrics::Samplers::GlobalSearchSampler.instance(logger: logger).start
end
Gitlab::Ci::Parsers.instrument!
rescue IOError => e
Gitlab::ErrorTracking.track_exception(e)
Gitlab::Metrics.error_detected!
end
if Gitlab::Runtime.puma?
Gitlab::Cluster::LifecycleEvents.on_before_graceful_shutdown do
# We need to ensure that before we re-exec or shutdown server
# we also stop the metrics server
if PUMA_EXTERNAL_METRICS_SERVER
::MetricsServer::PumaProcessSupervisor.instance.shutdown
else
Gitlab::Metrics::Exporter::WebExporter.instance.stop
end
end
Gitlab::Cluster::LifecycleEvents.on_before_master_restart do
# We need to ensure that before we re-exec server
# we also stop the metrics server
#
# We do it again, for being extra safe,
# but it should not be needed
if PUMA_EXTERNAL_METRICS_SERVER
::MetricsServer::PumaProcessSupervisor.instance.shutdown
else
Gitlab::Metrics::Exporter::WebExporter.instance.stop
end
end
end
|