Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-07-25 14:04:40 +0300
committerQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-07-28 06:59:10 +0300
commit577436851cf80904f27603e75e716dc6a006a7dc (patch)
tree9aef8354eba61c6c8145fb5fc83949376553a3f5
parentbd2ea331fe723a1f487176a2ac699d9531e0d434 (diff)
limiter: Implement Cgroup memory resource watcher
This commit implements Cgroup memory resource watcher to monitor the memory usage of the parent Cgroup. When the usage exceeds 90% of the memory limit or the cgroup is under OOM, the watcher considers it as a backoff event. We target the parent cgroup only for the sake of simplicity. Observing the memory usage of repository cgroups adds a lot of overhead. In addition, when the parent cgroup reaches its limit, all commands are affected. The impact of repository cgroup exceeding limit is local to some certain repositories.
-rw-r--r--internal/limiter/watchers/cgroup_memory_watcher.go70
-rw-r--r--internal/limiter/watchers/cgroup_memory_watcher_test.go132
2 files changed, 202 insertions, 0 deletions
diff --git a/internal/limiter/watchers/cgroup_memory_watcher.go b/internal/limiter/watchers/cgroup_memory_watcher.go
new file mode 100644
index 000000000..4d293bf2e
--- /dev/null
+++ b/internal/limiter/watchers/cgroup_memory_watcher.go
@@ -0,0 +1,70 @@
+package watchers
+
+import (
+ "context"
+ "fmt"
+
+ "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/limiter"
+)
+
+const (
+ cgroupMemoryWatcherName = "CgroupMemory"
+ memoryThreshold = 0.9
+)
+
+// CgroupMemoryWatcher implements ResourceWatcher interface. This watcher polls
+// the statistics from the cgroup manager. It returns a backoff event in two
+// conditions:
+// * The current memory usage exceeds a soft threshold (90%).
+// * The cgroup is under OOM.
+type CgroupMemoryWatcher struct {
+ manager cgroups.Manager
+}
+
+// NewCgroupMemoryWatcher is the initializer of CgroupMemoryWatcher
+func NewCgroupMemoryWatcher(manager cgroups.Manager) *CgroupMemoryWatcher {
+ return &CgroupMemoryWatcher{
+ manager: manager,
+ }
+}
+
+// Name returns the name of CgroupMemoryWatcher
+func (c *CgroupMemoryWatcher) Name() string {
+ return cgroupMemoryWatcherName
+}
+
+// Poll asserts the cgroup statistics and returns a backoff event accordingly
+// when it is triggered. These stats are fetched from cgroup manager.
+func (c *CgroupMemoryWatcher) Poll(context.Context) (*limiter.BackoffEvent, error) {
+ if !c.manager.Ready() {
+ return &limiter.BackoffEvent{WatcherName: c.Name(), ShouldBackoff: false}, nil
+ }
+
+ stats, err := c.manager.Stats()
+ if err != nil {
+ return nil, fmt.Errorf("cgroup watcher: poll stats from cgroup manager: %w", err)
+ }
+ parentStats := stats.ParentStats
+
+ // Whether the parent cgroup isthe memory cgroup is under OOM, tasks may be stopped. This stat is available in
+ // Cgroup V1 only.
+ if parentStats.UnderOOM {
+ return &limiter.BackoffEvent{
+ WatcherName: c.Name(),
+ ShouldBackoff: true,
+ Reason: "cgroup is under OOM",
+ }, nil
+ }
+
+ if parentStats.MemoryLimit > 0 && parentStats.MemoryUsage > 0 &&
+ float64(parentStats.MemoryUsage)/float64(parentStats.MemoryLimit) >= memoryThreshold {
+ return &limiter.BackoffEvent{
+ WatcherName: c.Name(),
+ ShouldBackoff: true,
+ Reason: fmt.Sprintf("cgroup memory exceeds limit: %d/%d", parentStats.MemoryUsage, parentStats.MemoryLimit),
+ }, nil
+ }
+
+ return &limiter.BackoffEvent{WatcherName: c.Name(), ShouldBackoff: false}, nil
+}
diff --git a/internal/limiter/watchers/cgroup_memory_watcher_test.go b/internal/limiter/watchers/cgroup_memory_watcher_test.go
new file mode 100644
index 000000000..551c040cb
--- /dev/null
+++ b/internal/limiter/watchers/cgroup_memory_watcher_test.go
@@ -0,0 +1,132 @@
+package watchers
+
+import (
+ "fmt"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/limiter"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
+)
+
+func TestCgroupMemoryWatcher_Name(t *testing.T) {
+ t.Parallel()
+
+ manager := NewCgroupMemoryWatcher(&testCgroupManager{})
+ require.Equal(t, cgroupMemoryWatcherName, manager.Name())
+}
+
+func TestCgroupMemoryWatcher_Poll(t *testing.T) {
+ t.Parallel()
+
+ for _, tc := range []struct {
+ desc string
+ manager *testCgroupManager
+ expectedEvent *limiter.BackoffEvent
+ expectedErr error
+ }{
+ {
+ desc: "disabled watcher",
+ manager: &testCgroupManager{ready: false},
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: false,
+ },
+ expectedErr: nil,
+ },
+ {
+ desc: "cgroup stats return empty stats",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{{}},
+ },
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: false,
+ },
+ },
+ {
+ desc: "cgroup stats query returns errors",
+ manager: &testCgroupManager{
+ ready: true,
+ statsErr: fmt.Errorf("something goes wrong"),
+ statsList: []cgroups.Stats{{}},
+ },
+ expectedErr: fmt.Errorf("cgroup watcher: poll stats from cgroup manager: %w", fmt.Errorf("something goes wrong")),
+ },
+ {
+ desc: "cgroup memory usage is more than 90%",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{
+ {
+ ParentStats: cgroups.CgroupStats{
+ MemoryUsage: 1800000000,
+ MemoryLimit: 2000000000,
+ },
+ },
+ },
+ },
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: true,
+ Reason: "cgroup memory exceeds limit: 1800000000/2000000000",
+ },
+ expectedErr: nil,
+ },
+ {
+ desc: "cgroup is under OOM",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{
+ {
+ ParentStats: cgroups.CgroupStats{
+ MemoryUsage: 1900000000,
+ MemoryLimit: 2000000000,
+ UnderOOM: true,
+ },
+ },
+ },
+ },
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: true,
+ Reason: "cgroup is under OOM",
+ },
+ expectedErr: nil,
+ },
+ {
+ desc: "cgroup memory usage normal",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{
+ {
+ ParentStats: cgroups.CgroupStats{
+ MemoryUsage: 1700000000,
+ MemoryLimit: 2000000000,
+ },
+ },
+ },
+ },
+ expectedEvent: &limiter.BackoffEvent{
+ WatcherName: cgroupMemoryWatcherName,
+ ShouldBackoff: false,
+ },
+ expectedErr: nil,
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ watcher := NewCgroupMemoryWatcher(tc.manager)
+ event, err := watcher.Poll(testhelper.Context(t))
+
+ if tc.expectedErr != nil {
+ require.Equal(t, tc.expectedErr, err)
+ require.Nil(t, event)
+ } else {
+ require.NoError(t, err)
+ require.Equal(t, tc.expectedEvent, event)
+ }
+ })
+ }
+}