1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
# frozen_string_literal: true
module Gitlab
module UsageDataConcerns
module Topology
include Gitlab::Utils::UsageData
JOB_TO_SERVICE_NAME = {
'gitlab-rails' => 'web',
'gitlab-sidekiq' => 'sidekiq',
'gitlab-workhorse' => 'workhorse',
'redis' => 'redis',
'postgres' => 'postgres',
'gitaly' => 'gitaly',
'prometheus' => 'prometheus',
'node' => 'node-exporter'
}.freeze
def topology_usage_data
topology_data, duration = measure_duration do
alt_usage_data(fallback: {}) do
{
nodes: topology_node_data
}.compact
end
end
{ topology: topology_data.merge(duration_s: duration) }
end
private
def topology_node_data
with_prometheus_client do |client|
# node-level data
by_instance_mem = topology_node_memory(client)
by_instance_cpus = topology_node_cpus(client)
# service-level data
by_instance_by_job_by_metric_memory = topology_all_service_memory(client)
by_instance_by_job_process_count = topology_all_service_process_count(client)
instances = Set.new(by_instance_mem.keys + by_instance_cpus.keys)
instances.map do |instance|
{
node_memory_total_bytes: by_instance_mem[instance],
node_cpus: by_instance_cpus[instance],
node_services:
topology_node_services(instance, by_instance_by_job_process_count, by_instance_by_job_by_metric_memory)
}.compact
end
end
end
def topology_node_memory(client)
aggregate_single(client, 'avg (node_memory_MemTotal_bytes) by (instance)')
end
def topology_node_cpus(client)
aggregate_single(client, 'count (node_cpu_seconds_total{mode="idle"}) by (instance)')
end
def topology_all_service_memory(client)
aggregate_many(
client,
'avg ({__name__ =~ "(ruby_){0,1}process_(resident|unique|proportional)_memory_bytes", job != "gitlab_exporter_process"}) by (instance, job, __name__)'
)
end
def topology_all_service_process_count(client)
aggregate_many(client, 'count ({__name__ =~ "(ruby_){0,1}process_start_time_seconds", job != "gitlab_exporter_process"}) by (instance, job)')
end
def topology_node_services(instance, all_process_counts, all_process_memory)
# returns all node service data grouped by service name as the key
instance_service_data =
topology_instance_service_process_count(instance, all_process_counts)
.deep_merge(topology_instance_service_memory(instance, all_process_memory))
# map to list of hashes where service names become values instead, and remove
# unknown services, since they might not be ours
instance_service_data.each_with_object([]) do |entry, list|
service, service_metrics = entry
gitlab_service = JOB_TO_SERVICE_NAME[service.to_s]
next unless gitlab_service
list << { name: gitlab_service }.merge(service_metrics)
end
end
def topology_instance_service_process_count(instance, all_instance_data)
topology_data_for_instance(instance, all_instance_data).to_h do |metric, count|
[metric['job'], { process_count: count }]
end
end
def topology_instance_service_memory(instance, all_instance_data)
topology_data_for_instance(instance, all_instance_data).each_with_object({}) do |entry, hash|
metric, memory = entry
job = metric['job']
key =
case metric['__name__']
when match_process_memory_metric_for_type('resident') then :process_memory_rss
when match_process_memory_metric_for_type('unique') then :process_memory_uss
when match_process_memory_metric_for_type('proportional') then :process_memory_pss
end
hash[job] ||= {}
hash[job][key] ||= memory
end
end
def match_process_memory_metric_for_type(type)
/(ruby_){0,1}process_#{type}_memory_bytes/
end
def topology_data_for_instance(instance, all_instance_data)
all_instance_data.filter { |metric, _value| metric['instance'] == instance }
end
def drop_port(instance)
instance.gsub(/:.+$/, '')
end
# Will retain a single `instance` key that values are mapped to
def aggregate_single(client, query)
client.aggregate(query) { |metric| drop_port(metric['instance']) }
end
# Will retain a composite key that values are mapped to
def aggregate_many(client, query)
client.aggregate(query) do |metric|
metric['instance'] = drop_port(metric['instance'])
metric
end
end
end
end
end
|