diff options
author | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-07-25 14:04:40 +0300 |
---|---|---|
committer | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-07-28 06:59:10 +0300 |
commit | 577436851cf80904f27603e75e716dc6a006a7dc (patch) | |
tree | 9aef8354eba61c6c8145fb5fc83949376553a3f5 /internal/limiter | |
parent | bd2ea331fe723a1f487176a2ac699d9531e0d434 (diff) |
limiter: Implement Cgroup memory resource watcher
This commit implements Cgroup memory resource watcher to monitor the
memory usage of the parent Cgroup. When the usage exceeds 90% of the
memory limit or the cgroup is under OOM, the watcher considers it as a
backoff event.
We target the parent cgroup only for the sake of simplicity. Observing
the memory usage of repository cgroups adds a lot of overhead. In
addition, when the parent cgroup reaches its limit, all commands are
affected. The impact of repository cgroup exceeding limit is local to
some certain repositories.
Diffstat (limited to 'internal/limiter')
-rw-r--r-- | internal/limiter/watchers/cgroup_memory_watcher.go | 70 | ||||
-rw-r--r-- | internal/limiter/watchers/cgroup_memory_watcher_test.go | 132 |
2 files changed, 202 insertions, 0 deletions
diff --git a/internal/limiter/watchers/cgroup_memory_watcher.go b/internal/limiter/watchers/cgroup_memory_watcher.go new file mode 100644 index 000000000..4d293bf2e --- /dev/null +++ b/internal/limiter/watchers/cgroup_memory_watcher.go @@ -0,0 +1,70 @@ +package watchers + +import ( + "context" + "fmt" + + "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups" + "gitlab.com/gitlab-org/gitaly/v16/internal/limiter" +) + +const ( + cgroupMemoryWatcherName = "CgroupMemory" + memoryThreshold = 0.9 +) + +// CgroupMemoryWatcher implements ResourceWatcher interface. This watcher polls +// the statistics from the cgroup manager. It returns a backoff event in two +// conditions: +// * The current memory usage exceeds a soft threshold (90%). +// * The cgroup is under OOM. +type CgroupMemoryWatcher struct { + manager cgroups.Manager +} + +// NewCgroupMemoryWatcher is the initializer of CgroupMemoryWatcher +func NewCgroupMemoryWatcher(manager cgroups.Manager) *CgroupMemoryWatcher { + return &CgroupMemoryWatcher{ + manager: manager, + } +} + +// Name returns the name of CgroupMemoryWatcher +func (c *CgroupMemoryWatcher) Name() string { + return cgroupMemoryWatcherName +} + +// Poll asserts the cgroup statistics and returns a backoff event accordingly +// when it is triggered. These stats are fetched from cgroup manager. +func (c *CgroupMemoryWatcher) Poll(context.Context) (*limiter.BackoffEvent, error) { + if !c.manager.Ready() { + return &limiter.BackoffEvent{WatcherName: c.Name(), ShouldBackoff: false}, nil + } + + stats, err := c.manager.Stats() + if err != nil { + return nil, fmt.Errorf("cgroup watcher: poll stats from cgroup manager: %w", err) + } + parentStats := stats.ParentStats + + // Whether the parent cgroup isthe memory cgroup is under OOM, tasks may be stopped. This stat is available in + // Cgroup V1 only. + if parentStats.UnderOOM { + return &limiter.BackoffEvent{ + WatcherName: c.Name(), + ShouldBackoff: true, + Reason: "cgroup is under OOM", + }, nil + } + + if parentStats.MemoryLimit > 0 && parentStats.MemoryUsage > 0 && + float64(parentStats.MemoryUsage)/float64(parentStats.MemoryLimit) >= memoryThreshold { + return &limiter.BackoffEvent{ + WatcherName: c.Name(), + ShouldBackoff: true, + Reason: fmt.Sprintf("cgroup memory exceeds limit: %d/%d", parentStats.MemoryUsage, parentStats.MemoryLimit), + }, nil + } + + return &limiter.BackoffEvent{WatcherName: c.Name(), ShouldBackoff: false}, nil +} diff --git a/internal/limiter/watchers/cgroup_memory_watcher_test.go b/internal/limiter/watchers/cgroup_memory_watcher_test.go new file mode 100644 index 000000000..551c040cb --- /dev/null +++ b/internal/limiter/watchers/cgroup_memory_watcher_test.go @@ -0,0 +1,132 @@ +package watchers + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups" + "gitlab.com/gitlab-org/gitaly/v16/internal/limiter" + "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper" +) + +func TestCgroupMemoryWatcher_Name(t *testing.T) { + t.Parallel() + + manager := NewCgroupMemoryWatcher(&testCgroupManager{}) + require.Equal(t, cgroupMemoryWatcherName, manager.Name()) +} + +func TestCgroupMemoryWatcher_Poll(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + desc string + manager *testCgroupManager + expectedEvent *limiter.BackoffEvent + expectedErr error + }{ + { + desc: "disabled watcher", + manager: &testCgroupManager{ready: false}, + expectedEvent: &limiter.BackoffEvent{ + WatcherName: cgroupMemoryWatcherName, + ShouldBackoff: false, + }, + expectedErr: nil, + }, + { + desc: "cgroup stats return empty stats", + manager: &testCgroupManager{ + ready: true, + statsList: []cgroups.Stats{{}}, + }, + expectedEvent: &limiter.BackoffEvent{ + WatcherName: cgroupMemoryWatcherName, + ShouldBackoff: false, + }, + }, + { + desc: "cgroup stats query returns errors", + manager: &testCgroupManager{ + ready: true, + statsErr: fmt.Errorf("something goes wrong"), + statsList: []cgroups.Stats{{}}, + }, + expectedErr: fmt.Errorf("cgroup watcher: poll stats from cgroup manager: %w", fmt.Errorf("something goes wrong")), + }, + { + desc: "cgroup memory usage is more than 90%", + manager: &testCgroupManager{ + ready: true, + statsList: []cgroups.Stats{ + { + ParentStats: cgroups.CgroupStats{ + MemoryUsage: 1800000000, + MemoryLimit: 2000000000, + }, + }, + }, + }, + expectedEvent: &limiter.BackoffEvent{ + WatcherName: cgroupMemoryWatcherName, + ShouldBackoff: true, + Reason: "cgroup memory exceeds limit: 1800000000/2000000000", + }, + expectedErr: nil, + }, + { + desc: "cgroup is under OOM", + manager: &testCgroupManager{ + ready: true, + statsList: []cgroups.Stats{ + { + ParentStats: cgroups.CgroupStats{ + MemoryUsage: 1900000000, + MemoryLimit: 2000000000, + UnderOOM: true, + }, + }, + }, + }, + expectedEvent: &limiter.BackoffEvent{ + WatcherName: cgroupMemoryWatcherName, + ShouldBackoff: true, + Reason: "cgroup is under OOM", + }, + expectedErr: nil, + }, + { + desc: "cgroup memory usage normal", + manager: &testCgroupManager{ + ready: true, + statsList: []cgroups.Stats{ + { + ParentStats: cgroups.CgroupStats{ + MemoryUsage: 1700000000, + MemoryLimit: 2000000000, + }, + }, + }, + }, + expectedEvent: &limiter.BackoffEvent{ + WatcherName: cgroupMemoryWatcherName, + ShouldBackoff: false, + }, + expectedErr: nil, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + watcher := NewCgroupMemoryWatcher(tc.manager) + event, err := watcher.Poll(testhelper.Context(t)) + + if tc.expectedErr != nil { + require.Equal(t, tc.expectedErr, err) + require.Nil(t, event) + } else { + require.NoError(t, err) + require.Equal(t, tc.expectedEvent, event) + } + }) + } +} |