cgroups: Adjust metric names

Change the metric names to be more accurate. The memory.failcnt tracks how often the kernel has tried to reclaim memory. While this maps closely to the number of killed processes, they are not exactly the same thing. The cgroups cpu usage metrics are a running total. Add "total" at the end of the metric to indicate this. Changelog: changed
author: John Cai <jcai@gitlab.com> 2022-06-13 20:22:25 +0300
committer: John Cai <jcai@gitlab.com> 2022-06-14 21:42:01 +0300
commit: e81362b53558de8aa578aaf2b7b6662b42fc00ea (patch)
tree: df2087f6eb912f376b20262364565860e1723f5d
parent: 4e84eaf02f3a5403ef44c3f51b8e46c57e24ed02 (diff)
2 files changed, 15 insertions, 15 deletions
diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go
index e4b72f28b..201d098fe 100644
--- a/internal/cgroups/v1_linux.go
+++ b/internal/cgroups/v1_linux.go
@@ -17,10 +17,10 @@ import (
 
 // CGroupV1Manager is the manager for cgroups v1
 type CGroupV1Manager struct {
-	cfg                         cgroupscfg.Config
-	hierarchy                   func() ([]cgroups.Subsystem, error)
-	memoryFailedTotal, cpuUsage *prometheus.GaugeVec
-	procs                       *prometheus.GaugeVec
+	cfg                                  cgroupscfg.Config
+	hierarchy                            func() ([]cgroups.Subsystem, error)
+	memoryReclaimAttemptsTotal, cpuUsage *prometheus.GaugeVec
+	procs                                *prometheus.GaugeVec
 }
 
 func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager {
@@ -29,16 +29,16 @@ func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager {
 		hierarchy: func() ([]cgroups.Subsystem, error) {
 			return defaultSubsystems(cfg.Mountpoint)
 		},
-		memoryFailedTotal: prometheus.NewGaugeVec(
+		memoryReclaimAttemptsTotal: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
-				Name: "gitaly_cgroup_memory_failed_total",
+				Name: "gitaly_cgroup_memory_reclaim_attempts_total",
 				Help: "Number of memory usage hits limits",
 			},
 			[]string{"path"},
 		),
 		cpuUsage: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
-				Name: "gitaly_cgroup_cpu_usage",
+				Name: "gitaly_cgroup_cpu_usage_total",
 				Help: "CPU Usage of Cgroup",
 			},
 			[]string{"path", "type"},
@@ -157,7 +157,7 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
 		if metrics, err := control.Stat(); err != nil {
 			logger.WithError(err).Warn("unable to get cgroup stats")
 		} else {
-			memoryMetric := cg.memoryFailedTotal.WithLabelValues(repoPath)
+			memoryMetric := cg.memoryReclaimAttemptsTotal.WithLabelValues(repoPath)
 			memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt))
 			ch <- memoryMetric
 
diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go
index 844b06225..ede9155ba 100644
--- a/internal/cgroups/v1_linux_test.go
+++ b/internal/cgroups/v1_linux_test.go
@@ -188,13 +188,13 @@ func TestMetrics(t *testing.T) {
 
 	repoCgroupPath := filepath.Join(v1Manager1.currentProcessCgroup(), "repos-0")
 
-	expected := bytes.NewBufferString(fmt.Sprintf(`# HELP gitaly_cgroup_cpu_usage CPU Usage of Cgroup
-# TYPE gitaly_cgroup_cpu_usage gauge
-gitaly_cgroup_cpu_usage{path="%s",type="kernel"} 0
-gitaly_cgroup_cpu_usage{path="%s",type="user"} 0
-# HELP gitaly_cgroup_memory_failed_total Number of memory usage hits limits
-# TYPE gitaly_cgroup_memory_failed_total gauge
-gitaly_cgroup_memory_failed_total{path="%s"} 2
+	expected := bytes.NewBufferString(fmt.Sprintf(`# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup
+# TYPE gitaly_cgroup_cpu_usage_total gauge
+gitaly_cgroup_cpu_usage_total{path="%s",type="kernel"} 0
+gitaly_cgroup_cpu_usage_total{path="%s",type="user"} 0
+# HELP gitaly_cgroup_memory_reclaim_attempts_total Number of memory usage hits limits
+# TYPE gitaly_cgroup_memory_reclaim_attempts_total gauge
+gitaly_cgroup_memory_reclaim_attempts_total{path="%s"} 2
 # HELP gitaly_cgroup_procs_total Total number of procs
 # TYPE gitaly_cgroup_procs_total gauge
 gitaly_cgroup_procs_total{path="%s",subsystem="cpu"} 1
author	John Cai <jcai@gitlab.com>	2022-06-13 20:22:25 +0300
committer	John Cai <jcai@gitlab.com>	2022-06-14 21:42:01 +0300
commit	e81362b53558de8aa578aaf2b7b6662b42fc00ea (patch)
tree	df2087f6eb912f376b20262364565860e1723f5d
parent	4e84eaf02f3a5403ef44c3f51b8e46c57e24ed02 (diff)