1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
package supervisor
import (
"os/exec"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/internal/helper/text"
)
var (
rssGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "gitaly_supervisor_rss_bytes",
Help: "Resident set size of supervised processes, in bytes.",
},
[]string{"name"},
)
healthCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "gitaly_supervisor_health_checks",
Help: "Count of Gitaly supervisor health checks",
},
[]string{"name", "status"},
)
)
func init() {
prometheus.MustRegister(rssGauge)
prometheus.MustRegister(healthCounter)
}
type monitorProcess struct {
pid int
wait <-chan struct{}
}
func monitorRss(procs <-chan monitorProcess, done chan<- struct{}, events chan<- Event, name string, threshold int) {
log.WithField("supervisor.name", name).WithField("supervisor.rss_threshold", threshold).Info("starting RSS monitor")
t := time.NewTicker(15 * time.Second)
defer t.Stop()
defer close(done)
for mp := range procs {
monitorLoop:
for {
rss := 1024 * getRss(mp.pid)
rssGauge.WithLabelValues(name).Set(float64(rss))
if rss > 0 {
event := Event{Type: MemoryLow, Pid: mp.pid}
if rss > threshold {
event.Type = MemoryHigh
}
select {
case events <- event:
case <-time.After(1 * time.Second):
// Prevent sending stale events
}
}
select {
case <-mp.wait:
break monitorLoop
case <-t.C:
}
}
}
}
// getRss returns RSS in kilobytes.
func getRss(pid int) int {
// I tried adding a library to do this but it seemed like overkill
// and YAGNI compared to doing this one 'ps' call.
psRss, err := exec.Command("ps", "-o", "rss=", "-p", strconv.Itoa(pid)).Output()
if err != nil {
return 0
}
rss, err := strconv.Atoi(text.ChompBytes(psRss))
if err != nil {
return 0
}
return rss
}
func monitorHealth(f func() error, events chan<- Event, name string, shutdown <-chan struct{}) {
for {
e := Event{Error: f()}
if e.Error != nil {
e.Type = HealthBad
healthCounter.WithLabelValues(name, "bad").Inc()
} else {
e.Type = HealthOK
healthCounter.WithLabelValues(name, "ok").Inc()
}
select {
case events <- e:
case <-time.After(1 * time.Second):
// Prevent sending stale events
case <-shutdown:
return
}
time.Sleep(15 * time.Second)
}
}
|