diff options
author | John Cai <jcai@gitlab.com> | 2022-06-13 20:22:25 +0300 |
---|---|---|
committer | John Cai <jcai@gitlab.com> | 2022-06-14 21:42:01 +0300 |
commit | e81362b53558de8aa578aaf2b7b6662b42fc00ea (patch) | |
tree | df2087f6eb912f376b20262364565860e1723f5d | |
parent | 4e84eaf02f3a5403ef44c3f51b8e46c57e24ed02 (diff) |
cgroups: Adjust metric names
Change the metric names to be more accurate. The memory.failcnt tracks
how often the kernel has tried to reclaim memory. While this maps
closely to the number of killed processes, they are not exactly the same
thing.
The cgroups cpu usage metrics are a running total. Add "total" at the
end of the metric to indicate this.
Changelog: changed
-rw-r--r-- | internal/cgroups/v1_linux.go | 16 | ||||
-rw-r--r-- | internal/cgroups/v1_linux_test.go | 14 |
2 files changed, 15 insertions, 15 deletions
diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go index e4b72f28b..201d098fe 100644 --- a/internal/cgroups/v1_linux.go +++ b/internal/cgroups/v1_linux.go @@ -17,10 +17,10 @@ import ( // CGroupV1Manager is the manager for cgroups v1 type CGroupV1Manager struct { - cfg cgroupscfg.Config - hierarchy func() ([]cgroups.Subsystem, error) - memoryFailedTotal, cpuUsage *prometheus.GaugeVec - procs *prometheus.GaugeVec + cfg cgroupscfg.Config + hierarchy func() ([]cgroups.Subsystem, error) + memoryReclaimAttemptsTotal, cpuUsage *prometheus.GaugeVec + procs *prometheus.GaugeVec } func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager { @@ -29,16 +29,16 @@ func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager { hierarchy: func() ([]cgroups.Subsystem, error) { return defaultSubsystems(cfg.Mountpoint) }, - memoryFailedTotal: prometheus.NewGaugeVec( + memoryReclaimAttemptsTotal: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "gitaly_cgroup_memory_failed_total", + Name: "gitaly_cgroup_memory_reclaim_attempts_total", Help: "Number of memory usage hits limits", }, []string{"path"}, ), cpuUsage: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "gitaly_cgroup_cpu_usage", + Name: "gitaly_cgroup_cpu_usage_total", Help: "CPU Usage of Cgroup", }, []string{"path", "type"}, @@ -157,7 +157,7 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) { if metrics, err := control.Stat(); err != nil { logger.WithError(err).Warn("unable to get cgroup stats") } else { - memoryMetric := cg.memoryFailedTotal.WithLabelValues(repoPath) + memoryMetric := cg.memoryReclaimAttemptsTotal.WithLabelValues(repoPath) memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt)) ch <- memoryMetric diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go index 844b06225..ede9155ba 100644 --- a/internal/cgroups/v1_linux_test.go +++ b/internal/cgroups/v1_linux_test.go @@ -188,13 +188,13 @@ func TestMetrics(t *testing.T) { repoCgroupPath := filepath.Join(v1Manager1.currentProcessCgroup(), "repos-0") - expected := bytes.NewBufferString(fmt.Sprintf(`# HELP gitaly_cgroup_cpu_usage CPU Usage of Cgroup -# TYPE gitaly_cgroup_cpu_usage gauge -gitaly_cgroup_cpu_usage{path="%s",type="kernel"} 0 -gitaly_cgroup_cpu_usage{path="%s",type="user"} 0 -# HELP gitaly_cgroup_memory_failed_total Number of memory usage hits limits -# TYPE gitaly_cgroup_memory_failed_total gauge -gitaly_cgroup_memory_failed_total{path="%s"} 2 + expected := bytes.NewBufferString(fmt.Sprintf(`# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup +# TYPE gitaly_cgroup_cpu_usage_total gauge +gitaly_cgroup_cpu_usage_total{path="%s",type="kernel"} 0 +gitaly_cgroup_cpu_usage_total{path="%s",type="user"} 0 +# HELP gitaly_cgroup_memory_reclaim_attempts_total Number of memory usage hits limits +# TYPE gitaly_cgroup_memory_reclaim_attempts_total gauge +gitaly_cgroup_memory_reclaim_attempts_total{path="%s"} 2 # HELP gitaly_cgroup_procs_total Total number of procs # TYPE gitaly_cgroup_procs_total gauge gitaly_cgroup_procs_total{path="%s",subsystem="cpu"} 1 |