diff options
-rw-r--r-- | internal/cgroups/v1_linux.go | 20 | ||||
-rw-r--r-- | internal/cgroups/v1_linux_test.go | 49 | ||||
-rw-r--r-- | internal/gitaly/config/cgroups/cgroups.go | 3 | ||||
-rw-r--r-- | internal/gitaly/config/config_test.go | 22 |
4 files changed, 67 insertions, 27 deletions
diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go index 3fa5da4c3..17065ba20 100644 --- a/internal/cgroups/v1_linux.go +++ b/internal/cgroups/v1_linux.go @@ -17,10 +17,10 @@ import ( // CGroupV1Manager is the manager for cgroups v1 type CGroupV1Manager struct { - cfg cgroupscfg.Config - hierarchy func() ([]cgroups.Subsystem, error) - memoryFailedTotal, cpuUsage *prometheus.GaugeVec - procs *prometheus.GaugeVec + cfg cgroupscfg.Config + hierarchy func() ([]cgroups.Subsystem, error) + memoryReclaimAttemptsTotal, cpuUsage *prometheus.GaugeVec + procs *prometheus.GaugeVec } func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager { @@ -29,16 +29,16 @@ func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager { hierarchy: func() ([]cgroups.Subsystem, error) { return defaultSubsystems(cfg.Mountpoint) }, - memoryFailedTotal: prometheus.NewGaugeVec( + memoryReclaimAttemptsTotal: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "gitaly_cgroup_memory_failed_total", + Name: "gitaly_cgroup_memory_reclaim_attempts_total", Help: "Number of memory usage hits limits", }, []string{"path"}, ), cpuUsage: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "gitaly_cgroup_cpu_usage", + Name: "gitaly_cgroup_cpu_usage_total", Help: "CPU Usage of Cgroup", }, []string{"path", "type"}, @@ -140,6 +140,10 @@ func (cg *CGroupV1Manager) addToCgroup(pid int, cgroupPath string) error { // Collect collects metrics from the cgroups controller func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) { + if !cg.cfg.MetricsEnabled { + return + } + for i := 0; i < int(cg.cfg.Repositories.Count); i++ { repoPath := cg.repoPath(i) logger := log.Default().WithField("cgroup_path", repoPath) @@ -155,7 +159,7 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) { if metrics, err := control.Stat(); err != nil { logger.WithError(err).Warn("unable to get cgroup stats") } else { - memoryMetric := cg.memoryFailedTotal.WithLabelValues(repoPath) + memoryMetric := cg.memoryReclaimAttemptsTotal.WithLabelValues(repoPath) memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt)) ch <- memoryMetric diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go index aaa095db0..f0c898cfd 100644 --- a/internal/cgroups/v1_linux_test.go +++ b/internal/cgroups/v1_linux_test.go @@ -193,29 +193,42 @@ func TestMetrics(t *testing.T) { repoCgroupPath := filepath.Join(v1Manager1.currentProcessCgroup(), "repos-0") - expected := bytes.NewBufferString(fmt.Sprintf(`# HELP gitaly_cgroup_cpu_usage CPU Usage of Cgroup -# TYPE gitaly_cgroup_cpu_usage gauge -gitaly_cgroup_cpu_usage{path="%s",type="kernel"} 0 -gitaly_cgroup_cpu_usage{path="%s",type="user"} 0 -# HELP gitaly_cgroup_memory_failed_total Number of memory usage hits limits -# TYPE gitaly_cgroup_memory_failed_total gauge -gitaly_cgroup_memory_failed_total{path="%s"} 2 + expected := bytes.NewBufferString(fmt.Sprintf(`# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup +# TYPE gitaly_cgroup_cpu_usage_total gauge +gitaly_cgroup_cpu_usage_total{path="%s",type="kernel"} 0 +gitaly_cgroup_cpu_usage_total{path="%s",type="user"} 0 +# HELP gitaly_cgroup_memory_reclaim_attempts_total Number of memory usage hits limits +# TYPE gitaly_cgroup_memory_reclaim_attempts_total gauge +gitaly_cgroup_memory_reclaim_attempts_total{path="%s"} 2 # HELP gitaly_cgroup_procs_total Total number of procs # TYPE gitaly_cgroup_procs_total gauge gitaly_cgroup_procs_total{path="%s",subsystem="cpu"} 1 gitaly_cgroup_procs_total{path="%s",subsystem="memory"} 1 `, repoCgroupPath, repoCgroupPath, repoCgroupPath, repoCgroupPath, repoCgroupPath)) - assert.NoError(t, testutil.CollectAndCompare( - v1Manager1, - expected)) - - logEntry := hook.LastEntry() - assert.Contains( - t, - logEntry.Data["command.cgroup_path"], - repoCgroupPath, - "log field includes a cgroup path that is a subdirectory of the current process' cgroup path", - ) + + for _, metricsEnabled := range []bool{true, false} { + t.Run(fmt.Sprintf("metrics enabled: %v", metricsEnabled), func(t *testing.T) { + v1Manager1.cfg.MetricsEnabled = metricsEnabled + + if metricsEnabled { + assert.NoError(t, testutil.CollectAndCompare( + v1Manager1, + expected)) + } else { + assert.NoError(t, testutil.CollectAndCompare( + v1Manager1, + bytes.NewBufferString(""))) + } + + logEntry := hook.LastEntry() + assert.Contains( + t, + logEntry.Data["command.cgroup_path"], + repoCgroupPath, + "log field includes a cgroup path that is a subdirectory of the current process' cgroup path", + ) + }) + } } func readCgroupFile(t *testing.T, path string) []byte { diff --git a/internal/gitaly/config/cgroups/cgroups.go b/internal/gitaly/config/cgroups/cgroups.go index 935a1a565..8d9094552 100644 --- a/internal/gitaly/config/cgroups/cgroups.go +++ b/internal/gitaly/config/cgroups/cgroups.go @@ -14,7 +14,8 @@ type Config struct { MemoryBytes int64 `toml:"memory_bytes"` // CPUShares are the shares of CPU the parent cgroup is allowed to utilize. A value of 1024 // is full utilization of the CPU. 0 implies no CPU limit. - CPUShares uint64 `toml:"cpu_shares"` + CPUShares uint64 `toml:"cpu_shares"` + MetricsEnabled bool `toml:"metrics_enabled"` // Deprecated: No longer supported after 15.0 Count uint `toml:"count"` diff --git a/internal/gitaly/config/config_test.go b/internal/gitaly/config/config_test.go index 2967fd398..9c1961e99 100644 --- a/internal/gitaly/config/config_test.go +++ b/internal/gitaly/config/config_test.go @@ -1082,6 +1082,28 @@ func TestValidateCgroups(t *testing.T) { }, validateErr: errors.New("cgroups.repositories: cpu shares cannot exceed parent"), }, + { + name: "metrics enabled", + rawCfg: `[cgroups] + mountpoint = "/sys/fs/cgroup" + hierarchy_root = "gitaly" + metrics_enabled = true + [cgroups.repositories] + count = 10 + memory_bytes = 1024 + cpu_shares = 512 + `, + expect: cgroups.Config{ + Mountpoint: "/sys/fs/cgroup", + HierarchyRoot: "gitaly", + MetricsEnabled: true, + Repositories: cgroups.Repositories{ + Count: 10, + MemoryBytes: 1024, + CPUShares: 512, + }, + }, + }, } for _, tt := range testCases { t.Run(tt.name, func(t *testing.T) { |