Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Cai <jcai@gitlab.com>2021-12-08 15:45:11 +0300
committerJohn Cai <jcai@gitlab.com>2021-12-08 16:13:04 +0300
commit234974414f2e1f5c8855f4e07289a6570caf1c90 (patch)
treecdc62d025dd4354b50937282e15c651af71a0b2f
parent3ca16212b97e4c3f6a46b257efefc5dfc199adc3 (diff)
cgroups: emit cgroups stats to prometheus
In order to gain visibility into how cgroups is performing, turn CgroupsManager into a metrics collector so metrics can bubble up through the CommandFactory. Changelog: changed
-rw-r--r--internal/cgroups/cgroups.go3
-rw-r--r--internal/cgroups/mock_linux_test.go43
-rw-r--r--internal/cgroups/noop.go7
-rw-r--r--internal/cgroups/v1_linux.go55
-rw-r--r--internal/cgroups/v1_linux_test.go42
-rw-r--r--internal/git/command_factory.go1
-rw-r--r--internal/git/command_factory_cgroup_test.go4
7 files changed, 153 insertions, 2 deletions
diff --git a/internal/cgroups/cgroups.go b/internal/cgroups/cgroups.go
index 733f89d74..273746685 100644
--- a/internal/cgroups/cgroups.go
+++ b/internal/cgroups/cgroups.go
@@ -1,6 +1,7 @@
package cgroups
import (
+ "github.com/prometheus/client_golang/prometheus"
"gitlab.com/gitlab-org/gitaly/v14/internal/command"
"gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config/cgroups"
)
@@ -17,6 +18,8 @@ type Manager interface {
// It is expected to be called once at Gitaly shutdown from any
// instance of the Manager.
Cleanup() error
+ Describe(ch chan<- *prometheus.Desc)
+ Collect(ch chan<- prometheus.Metric)
}
// NewManager returns the appropriate Cgroups manager
diff --git a/internal/cgroups/mock_linux_test.go b/internal/cgroups/mock_linux_test.go
index 38f1e5ebc..d42c3e827 100644
--- a/internal/cgroups/mock_linux_test.go
+++ b/internal/cgroups/mock_linux_test.go
@@ -21,6 +21,7 @@ package cgroups
import (
"os"
"path/filepath"
+ "strconv"
"testing"
"github.com/containerd/cgroups"
@@ -54,3 +55,45 @@ func newMock(t *testing.T) *mockCgroup {
func (m *mockCgroup) hierarchy() ([]cgroups.Subsystem, error) {
return m.subsystems, nil
}
+
+func (m *mockCgroup) setupMockCgroupFiles(
+ t *testing.T,
+ manager *CGroupV1Manager,
+ memFailCount int,
+) {
+ for _, s := range m.subsystems {
+ path := filepath.Join(m.root, string(s.Name()), manager.currentProcessCgroup())
+ require.NoError(t, os.MkdirAll(path, 0o644))
+
+ for _, emptyFile := range []string{
+ "cpu.stat",
+ "memory.stat",
+ "memory.oom_control",
+ } {
+ require.NoError(t, os.WriteFile(filepath.Join(path, emptyFile), []byte(""), 0o644))
+ }
+
+ for _, zeroFile := range []string{
+ "memory.usage_in_bytes",
+ "memory.max_usage_in_bytes",
+ "memory.limit_in_bytes",
+ "memory.failcnt",
+ "memory.memsw.failcnt",
+ "memory.memsw.usage_in_bytes",
+ "memory.memsw.max_usage_in_bytes",
+ "memory.memsw.limit_in_bytes",
+ "memory.kmem.usage_in_bytes",
+ "memory.kmem.max_usage_in_bytes",
+ "memory.kmem.failcnt",
+ "memory.kmem.limit_in_bytes",
+ "memory.kmem.tcp.usage_in_bytes",
+ "memory.kmem.tcp.max_usage_in_bytes",
+ "memory.kmem.tcp.failcnt",
+ "memory.kmem.tcp.limit_in_bytes",
+ } {
+ require.NoError(t, os.WriteFile(filepath.Join(path, zeroFile), []byte("0"), 0o644))
+ }
+
+ require.NoError(t, os.WriteFile(filepath.Join(path, "memory.failcnt"), []byte(strconv.Itoa(memFailCount)), 0o644))
+ }
+}
diff --git a/internal/cgroups/noop.go b/internal/cgroups/noop.go
index caac82e2d..57f552902 100644
--- a/internal/cgroups/noop.go
+++ b/internal/cgroups/noop.go
@@ -1,6 +1,7 @@
package cgroups
import (
+ "github.com/prometheus/client_golang/prometheus"
"gitlab.com/gitlab-org/gitaly/v14/internal/command"
)
@@ -21,3 +22,9 @@ func (cg *NoopManager) AddCommand(cmd *command.Command) error {
func (cg *NoopManager) Cleanup() error {
return nil
}
+
+// Describe does nothing
+func (cg *NoopManager) Describe(ch chan<- *prometheus.Desc) {}
+
+// Collect does nothing
+func (cg *NoopManager) Collect(ch chan<- prometheus.Metric) {}
diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go
index 42a797aa1..477a34249 100644
--- a/internal/cgroups/v1_linux.go
+++ b/internal/cgroups/v1_linux.go
@@ -8,14 +8,17 @@ import (
"github.com/containerd/cgroups"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/prometheus/client_golang/prometheus"
"gitlab.com/gitlab-org/gitaly/v14/internal/command"
cgroupscfg "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config/cgroups"
)
// CGroupV1Manager is the manager for cgroups v1
type CGroupV1Manager struct {
- cfg cgroupscfg.Config
- hierarchy func() ([]cgroups.Subsystem, error)
+ cfg cgroupscfg.Config
+ hierarchy func() ([]cgroups.Subsystem, error)
+ paths map[string]interface{}
+ memoryFailedTotal, cpuUsage *prometheus.GaugeVec
}
func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager {
@@ -24,6 +27,21 @@ func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager {
hierarchy: func() ([]cgroups.Subsystem, error) {
return defaultSubsystems(cfg.Mountpoint)
},
+ paths: make(map[string]interface{}),
+ memoryFailedTotal: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_memory_failed_total",
+ Help: "Number of memory usage hits limits",
+ },
+ []string{"path"},
+ ),
+ cpuUsage: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_cpu_usage",
+ Help: "CPU Usage of Cgroup",
+ },
+ []string{"path", "type"},
+ ),
}
}
@@ -73,9 +91,42 @@ func (cg *CGroupV1Manager) AddCommand(cmd *command.Command) error {
return fmt.Errorf("failed adding process to cgroup: %w", err)
}
+ cg.paths[cgroupPath] = struct{}{}
+
return nil
}
+// Collect collects metrics from the cgroups controller
+func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
+ path := cg.currentProcessCgroup()
+ control, err := cgroups.Load(cg.hierarchy, cgroups.StaticPath(path))
+ if err != nil {
+ return
+ }
+
+ metrics, err := control.Stat()
+ if err != nil {
+ return
+ }
+
+ memoryMetric := cg.memoryFailedTotal.WithLabelValues(path)
+ memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt))
+ ch <- memoryMetric
+
+ cpuUserMetric := cg.cpuUsage.WithLabelValues(path, "user")
+ cpuUserMetric.Set(float64(metrics.CPU.Usage.User))
+ ch <- cpuUserMetric
+
+ cpuKernelMetric := cg.cpuUsage.WithLabelValues(path, "kernel")
+ cpuKernelMetric.Set(float64(metrics.CPU.Usage.Kernel))
+ ch <- cpuKernelMetric
+}
+
+// Describe describes the cgroup metrics that Collect provides
+func (cg *CGroupV1Manager) Describe(ch chan<- *prometheus.Desc) {
+ prometheus.DescribeByCollect(cg, ch)
+}
+
//nolint: revive,stylecheck // This is unintentionally missing documentation.
func (cg *CGroupV1Manager) Cleanup() error {
processCgroupPath := cg.currentProcessCgroup()
diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go
index 6141fca35..252aa9287 100644
--- a/internal/cgroups/v1_linux_test.go
+++ b/internal/cgroups/v1_linux_test.go
@@ -1,6 +1,7 @@
package cgroups
import (
+ "bytes"
"context"
"fmt"
"hash/crc32"
@@ -11,6 +12,8 @@ import (
"strings"
"testing"
+ "github.com/prometheus/client_golang/prometheus/testutil"
+ "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v14/internal/command"
"gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config/cgroups"
@@ -65,6 +68,7 @@ func TestAddCommand(t *testing.T) {
v1Manager1 := &CGroupV1Manager{
cfg: config,
hierarchy: mock.hierarchy,
+ paths: make(map[string]interface{}),
}
require.NoError(t, v1Manager1.Setup())
@@ -79,6 +83,7 @@ func TestAddCommand(t *testing.T) {
v1Manager2 := &CGroupV1Manager{
cfg: config,
hierarchy: mock.hierarchy,
+ paths: make(map[string]interface{}),
}
require.NoError(t, v1Manager2.AddCommand(cmd2))
@@ -115,6 +120,43 @@ func TestCleanup(t *testing.T) {
}
}
+func TestMetrics(t *testing.T) {
+ mock := newMock(t)
+
+ config := defaultCgroupsConfig()
+ v1Manager1 := newV1Manager(config)
+ v1Manager1.hierarchy = mock.hierarchy
+
+ require.NoError(t, v1Manager1.Setup())
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ cmd1 := exec.Command("ls", "-hal", ".")
+ cmd2, err := command.New(ctx, cmd1, nil, nil, nil)
+ require.NoError(t, err)
+ require.NoError(t, cmd2.Wait())
+
+ require.NoError(t, v1Manager1.AddCommand(cmd2))
+ mock.setupMockCgroupFiles(t, v1Manager1, 2)
+
+ cgroupPath := v1Manager1.currentProcessCgroup()
+
+ expected := bytes.NewBufferString(fmt.Sprintf(`# HELP gitaly_cgroup_cpu_usage CPU Usage of Cgroup
+# TYPE gitaly_cgroup_cpu_usage gauge
+gitaly_cgroup_cpu_usage{path="%s",type="kernel"} 0
+gitaly_cgroup_cpu_usage{path="%s",type="user"} 0
+# HELP gitaly_cgroup_memory_failed_total Number of memory usage hits limits
+# TYPE gitaly_cgroup_memory_failed_total gauge
+gitaly_cgroup_memory_failed_total{path="%s"} 2
+`, cgroupPath, cgroupPath, cgroupPath))
+ assert.NoError(t, testutil.CollectAndCompare(
+ v1Manager1,
+ expected,
+ "gitaly_cgroup_memory_failed_total",
+ "gitaly_cgroup_cpu_usage"))
+}
+
func readCgroupFile(t *testing.T, path string) []byte {
t.Helper()
diff --git a/internal/git/command_factory.go b/internal/git/command_factory.go
index 46c23c008..c3be0eb60 100644
--- a/internal/git/command_factory.go
+++ b/internal/git/command_factory.go
@@ -74,6 +74,7 @@ func (cf *ExecCommandFactory) Describe(descs chan<- *prometheus.Desc) {
// Collect is used to collect Prometheus metrics.
func (cf *ExecCommandFactory) Collect(metrics chan<- prometheus.Metric) {
cf.invalidCommandsMetric.Collect(metrics)
+ cf.cgroupsManager.Collect(metrics)
}
// New creates a new command for the repo repository.
diff --git a/internal/git/command_factory_cgroup_test.go b/internal/git/command_factory_cgroup_test.go
index d2b5e2e3e..d2b0ab2f9 100644
--- a/internal/git/command_factory_cgroup_test.go
+++ b/internal/git/command_factory_cgroup_test.go
@@ -5,6 +5,7 @@ import (
"path/filepath"
"testing"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v14/internal/command"
@@ -31,6 +32,9 @@ func (m *mockCgroupsManager) Cleanup() error {
return nil
}
+func (m *mockCgroupsManager) Collect(ch chan<- prometheus.Metric) {}
+func (m *mockCgroupsManager) Describe(ch chan<- *prometheus.Desc) {}
+
func TestNewCommandAddsToCgroup(t *testing.T) {
root := testhelper.TempDir(t)