Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-07-25 14:04:40 +0300
committerQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-07-28 06:59:10 +0300
commit577436851cf80904f27603e75e716dc6a006a7dc (patch)
tree9aef8354eba61c6c8145fb5fc83949376553a3f5 /internal/limiter
parentbd2ea331fe723a1f487176a2ac699d9531e0d434 (diff)
limiter: Implement Cgroup memory resource watcher
This commit implements Cgroup memory resource watcher to monitor the memory usage of the parent Cgroup. When the usage exceeds 90% of the memory limit or the cgroup is under OOM, the watcher considers it as a backoff event. We target the parent cgroup only for the sake of simplicity. Observing the memory usage of repository cgroups adds a lot of overhead. In addition, when the parent cgroup reaches its limit, all commands are affected. The impact of repository cgroup exceeding limit is local to some certain repositories.
Diffstat (limited to 'internal/limiter')
-rw-r--r--internal/limiter/watchers/cgroup_memory_watcher.go70
-rw-r--r--internal/limiter/watchers/cgroup_memory_watcher_test.go132
2 files changed, 202 insertions, 0 deletions
diff --git a/internal/limiter/watchers/cgroup_memory_watcher.go b/internal/limiter/watchers/cgroup_memory_watcher.go
new file mode 100644
index 000000000..4d293bf2e
--- /dev/null
+++ b/internal/limiter/watchers/cgroup_memory_watcher.go
@@ -0,0 +1,70 @@
+package watchers
+
+import (
+ "context"
+ "fmt"
+
+ "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/limiter"
+)
+
+const (
+ cgroupMemoryWatcherName = "CgroupMemory"
+ memoryThreshold = 0.9
+)
+
+// CgroupMemoryWatcher implements ResourceWatcher interface. This watcher polls
+// the statistics from the cgroup manager. It returns a backoff event in two
+// conditions:
+// * The current memory usage exceeds a soft threshold (90%).
+// * The cgroup is under OOM.
+type CgroupMemoryWatcher struct {
+ manager cgroups.Manager
+}
+
+// NewCgroupMemoryWatcher is the initializer of CgroupMemoryWatcher
+func NewCgroupMemoryWatcher(manager cgroups.Manager) *CgroupMemoryWatcher {
+ return &CgroupMemoryWatcher{
+ manager: manager,
+ }
+}
+
+// Name returns the name of CgroupMemoryWatcher
+func (c *CgroupMemoryWatcher) Name() string {
+ return cgroupMemoryWatcherName
+}
+
+// Poll asserts the cgroup statistics and returns a backoff event accordingly
+// when it is triggered. These stats are fetched from cgroup manager.
+func (c *CgroupMemoryWatcher) Poll(context.Context) (*limiter.BackoffEvent, error) {
+ if !c.manager.Ready() {
+ return &limiter.BackoffEvent{WatcherName: c.Name(), ShouldBackoff: false}, nil
+ }
+
+ stats, err := c.manager.Stats()
+ if err != nil {
+ return nil, fmt.Errorf("cgroup watcher: poll stats from cgroup manager: %w", err)
+ }
+ parentStats := stats.ParentStats
+
+ // Whether the parent cgroup isthe memory cgroup is under OOM, tasks may be stopped. This stat is available in
+ // Cgroup V1 only.
+ if parentStats.UnderOOM {
+ return &limiter.BackoffEvent{
+ WatcherName: c.Name(),
+ ShouldBackoff: true,
+ Reason: "cgroup is under OOM",
+ }, nil
+ }
+
+ if parentStats.MemoryLimit > 0 && parentStats.MemoryUsage > 0 &&
+ float64(parentStats.MemoryUsage)/float64(parentStats.MemoryLimit) >= memoryThreshold {
+ return &limiter.BackoffEvent{
+ WatcherName: c.Name(),
+ ShouldBackoff: true,
+ Reason: fmt.Sprintf("cgroup memory exceeds limit: %d/%d", parentStats.MemoryUsage, parentStats.MemoryLimit),
+ }, nil
+ }
+
+ return &limiter.BackoffEvent{WatcherName: c.Name(), ShouldBackoff: false}, nil
+}
diff --git a/internal/limiter/watchers/cgroup_memory_watcher_test.go b/internal/limiter/watchers/cgroup_memory_watcher_test.go
new file mode 100644
index 000000000..551c040cb
--- /dev/null
+++ b/internal/limiter/watchers/cgroup_memory_watcher_test.go
@@ -0,0 +1,132 @@
+package watchers
+
+import (
+ "fmt"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/limiter"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
+)
+
+func TestCgroupMemoryWatcher_Name(t *testing.T) {
+ t.Parallel()
+
+ manager := NewCgroupMemoryWatcher(&testCgroupManager{})
+ require.Equal(t, cgroupMemoryWatcherName, manager.Name())
+}
+
+func TestCgroupMemoryWatcher_Poll(t *testing.T) {
+ t.Parallel()
+
+ for _, tc := range []struct {
+ desc string
+ manager *testCgroupManager
+ expectedEvent *limiter.BackoffEvent
+ expectedErr error
+ }{
+ {
+ desc: "disabled watcher",
+ manager: &testCgroupManager{ready: false},
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: false,
+ },
+ expectedErr: nil,
+ },
+ {
+ desc: "cgroup stats return empty stats",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{{}},
+ },
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: false,
+ },
+ },
+ {
+ desc: "cgroup stats query returns errors",
+ manager: &testCgroupManager{
+ ready: true,
+ statsErr: fmt.Errorf("something goes wrong"),
+ statsList: []cgroups.Stats{{}},
+ },
+ expectedErr: fmt.Errorf("cgroup watcher: poll stats from cgroup manager: %w", fmt.Errorf("something goes wrong")),
+ },
+ {
+ desc: "cgroup memory usage is more than 90%",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{
+ {
+ ParentStats: cgroups.CgroupStats{
+ MemoryUsage: 1800000000,
+ MemoryLimit: 2000000000,
+ },
+ },
+ },
+ },
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: true,
+ Reason: "cgroup memory exceeds limit: 1800000000/2000000000",
+ },
+ expectedErr: nil,
+ },
+ {
+ desc: "cgroup is under OOM",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{
+ {
+ ParentStats: cgroups.CgroupStats{
+ MemoryUsage: 1900000000,
+ MemoryLimit: 2000000000,
+ UnderOOM: true,
+ },
+ },
+ },
+ },
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: true,
+ Reason: "cgroup is under OOM",
+ },
+ expectedErr: nil,
+ },
+ {
+ desc: "cgroup memory usage normal",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{
+ {
+ ParentStats: cgroups.CgroupStats{
+ MemoryUsage: 1700000000,
+ MemoryLimit: 2000000000,
+ },
+ },
+ },
+ },
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: false,
+ },
+ expectedErr: nil,
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ watcher := NewCgroupMemoryWatcher(tc.manager)
+ event, err := watcher.Poll(testhelper.Context(t))
+
+ if tc.expectedErr != nil {
+ require.Equal(t, tc.expectedErr, err)
+ require.Nil(t, event)
+ } else {
+ require.NoError(t, err)
+ require.Equal(t, tc.expectedEvent, event)
+ }
+ })
+ }
+}