Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-11-10 13:40:19 +0300
committerQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-11-17 17:46:09 +0300
commit79e68324a8c6449531404164f3210bdfd271a68b (patch)
tree663f6b16f9df07dafea0071c8a300b745b9c716e
parente66f6ec5ad5b47cb43e69a4690c3e9e1a9a49b4e (diff)
limiter: Ignore `total_inactive_file` instead of `inactive_file`
In https://gitlab.com/gitlab-org/gitaly/-/merge_requests/6508, we implemented a fix to ignore highly evictable Page Caches from cgroup memory threshold. The used metric was parent cgroup's `inactive_file`. Unfortunately, that metric reflects the inactive Page Caches of direct processes inside the parent cgroup. It doesn't account for the indirect processes in children repository cgroups. We don't spawn any process in the parent cgroup. So, the fix was useless. In Cgroup V1, `memory.stat` has `total_inactive_file`. It's exactly what we are looking for. In Cgroup V2, `inactive_file` includes all of its substree consumptions. So, we can keep using that field.
-rw-r--r--internal/cgroups/cgroups.go20
-rw-r--r--internal/cgroups/handler_linux_test.go30
-rw-r--r--internal/cgroups/v1_linux.go12
-rw-r--r--internal/cgroups/v2_linux.go16
-rw-r--r--internal/limiter/watchers/cgroup_memory_watcher.go4
-rw-r--r--internal/limiter/watchers/cgroup_memory_watcher_test.go18
6 files changed, 67 insertions, 33 deletions
diff --git a/internal/cgroups/cgroups.go b/internal/cgroups/cgroups.go
index 8cf8df116..f8bca0609 100644
--- a/internal/cgroups/cgroups.go
+++ b/internal/cgroups/cgroups.go
@@ -34,18 +34,18 @@ type CgroupStats struct {
// UnderOOM is the current OOM status of a cgroup. This information is available for Cgroup V1 only. It's read
// from the `memory.oom_control` file.
UnderOOM bool
- // Anon reflects the `rss` (V1) or `anon` (V2) of `memory.stat` file. That's amount of memory used in anonymous mappings
- Anon uint64
+ // TotalAnon reflects the `rss` (V1) or `anon` (V2) of `memory.stat` file. That's amount of memory used in anonymous mappings
+ TotalAnon uint64
// ActiveAnon reflects the `active_file` of `memory.stat` file, anonymous and swap cache memory on active LRU list.
- ActiveAnon uint64
+ TotalActiveAnon uint64
// InactiveAnon reflects the `inactive_anon` of `memory.stat` file, anonymous and swap cache memory on inactive LRU list.
- InactiveAnon uint64
- // File reflects the `cache` (V1) or `file` (V2) of `memory.stat` file, bytes of page cache memory.
- File uint64
- // ActiveFile reflects the `active_file` of `memory.stat` file, bytes of file-backed memory that cannot be reclaimed.
- ActiveFile uint64
- // InactiveFile reflects the `inactive_file` of `memory.stat` file, bytes of file-backed memory on inactive LRU list.
- InactiveFile uint64
+ TotalInactiveAnon uint64
+ // TotalFile reflects the `cache` (V1) or `file` (V2) of `memory.stat` file, bytes of page cache memory.
+ TotalFile uint64
+ // TotalActiveFile reflects the `active_file` of `memory.stat` file, bytes of file-backed memory that cannot be reclaimed.
+ TotalActiveFile uint64
+ // TotalInactiveFile reflects the `inactive_file` of `memory.stat` file, bytes of file-backed memory on inactive LRU list.
+ TotalInactiveFile uint64
}
// Stats stores statistics of all cgroups managed by a manager
diff --git a/internal/cgroups/handler_linux_test.go b/internal/cgroups/handler_linux_test.go
index 9556b393b..ac87939e6 100644
--- a/internal/cgroups/handler_linux_test.go
+++ b/internal/cgroups/handler_linux_test.go
@@ -735,6 +735,18 @@ oom_kill 3`},
{"cpu.stat", `nr_periods 10
nr_throttled 50
throttled_time 1000000`}, // 0.001 seconds
+ {"memory.stat", `cache 0
+rss 0
+inactive_anon 100
+active_anon 200
+inactive_file 300
+active_file 400
+total_cache 235000000
+total_rss 234000000
+total_inactive_anon 200000000
+total_active_anon 34000000
+total_inactive_file 100000000
+total_active_file 135000000`},
},
expectedStats: Stats{
ParentStats: CgroupStats{
@@ -744,6 +756,12 @@ throttled_time 1000000`}, // 0.001 seconds
MemoryLimit: 2000000000,
OOMKills: 3,
UnderOOM: true,
+ TotalAnon: 234000000,
+ TotalActiveAnon: 34000000,
+ TotalInactiveAnon: 200000000,
+ TotalFile: 235000000,
+ TotalActiveFile: 135000000,
+ TotalInactiveFile: 100000000,
},
},
},
@@ -773,6 +791,12 @@ oom_kill 5`},
{"cpu.stat", `nr_periods 10
nr_throttled 50
throttled_usec 1000000`}, // 0.001 seconds
+ {"memory.stat", `anon 234000000
+file 235000000
+inactive_anon 200000000
+active_anon 34000000
+inactive_file 100000000
+active_file 135000000`},
},
expectedStats: Stats{
ParentStats: CgroupStats{
@@ -781,6 +805,12 @@ throttled_usec 1000000`}, // 0.001 seconds
MemoryUsage: 1234000000,
MemoryLimit: 2000000000,
OOMKills: 5,
+ TotalAnon: 234000000,
+ TotalActiveAnon: 34000000,
+ TotalInactiveAnon: 200000000,
+ TotalFile: 235000000,
+ TotalActiveFile: 135000000,
+ TotalInactiveFile: 100000000,
},
},
},
diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go
index 53f6cb5a4..e56829c26 100644
--- a/internal/cgroups/v1_linux.go
+++ b/internal/cgroups/v1_linux.go
@@ -195,12 +195,12 @@ func (cvh *cgroupV1Handler) stats() (Stats, error) {
MemoryLimit: metrics.Memory.Usage.Limit,
OOMKills: metrics.MemoryOomControl.OomKill,
UnderOOM: metrics.MemoryOomControl.UnderOom != 0,
- Anon: metrics.Memory.RSS,
- ActiveAnon: metrics.Memory.ActiveAnon,
- InactiveAnon: metrics.Memory.InactiveAnon,
- File: metrics.Memory.Cache,
- ActiveFile: metrics.Memory.ActiveFile,
- InactiveFile: metrics.Memory.InactiveFile,
+ TotalAnon: metrics.Memory.TotalRSS,
+ TotalActiveAnon: metrics.Memory.TotalActiveAnon,
+ TotalInactiveAnon: metrics.Memory.TotalInactiveAnon,
+ TotalFile: metrics.Memory.TotalCache,
+ TotalActiveFile: metrics.Memory.TotalActiveFile,
+ TotalInactiveFile: metrics.Memory.TotalInactiveFile,
},
}, nil
}
diff --git a/internal/cgroups/v2_linux.go b/internal/cgroups/v2_linux.go
index 396cf104f..479b2c5ce 100644
--- a/internal/cgroups/v2_linux.go
+++ b/internal/cgroups/v2_linux.go
@@ -189,14 +189,18 @@ func (cvh *cgroupV2Handler) stats() (Stats, error) {
CPUThrottledDuration: float64(metrics.CPU.ThrottledUsec) / float64(time.Second),
MemoryUsage: metrics.Memory.Usage,
MemoryLimit: metrics.Memory.UsageLimit,
- Anon: metrics.Memory.Anon,
- ActiveAnon: metrics.Memory.ActiveAnon,
- InactiveAnon: metrics.Memory.InactiveAnon,
- File: metrics.Memory.File,
- ActiveFile: metrics.Memory.ActiveFile,
- InactiveFile: metrics.Memory.InactiveFile,
+ // memory.stat breaks down the cgroup's memory footprint into different types of memory. In
+ // Cgroup V2, this file includes the consumption of the cgroup’s entire subtree. Total_* stats
+ // were removed.
+ TotalAnon: metrics.Memory.Anon,
+ TotalActiveAnon: metrics.Memory.ActiveAnon,
+ TotalInactiveAnon: metrics.Memory.InactiveAnon,
+ TotalFile: metrics.Memory.File,
+ TotalActiveFile: metrics.Memory.ActiveFile,
+ TotalInactiveFile: metrics.Memory.InactiveFile,
},
}
+
if metrics.MemoryEvents != nil {
stats.ParentStats.OOMKills = metrics.MemoryEvents.OomKill
}
diff --git a/internal/limiter/watchers/cgroup_memory_watcher.go b/internal/limiter/watchers/cgroup_memory_watcher.go
index 453e5eee6..ab686ba12 100644
--- a/internal/limiter/watchers/cgroup_memory_watcher.go
+++ b/internal/limiter/watchers/cgroup_memory_watcher.go
@@ -73,14 +73,14 @@ func (c *CgroupMemoryWatcher) Poll(context.Context) (*limiter.BackoffEvent, erro
// for some special insignificant cases (LazyFree for example). A portion of the Page Caches, noted by `inactive_file`,
// is the target for the eviction first. So, it makes sense to exclude the easy evictable memory from the threshold.
if parentStats.MemoryLimit > 0 && parentStats.MemoryUsage > 0 &&
- float64(parentStats.MemoryUsage-parentStats.InactiveFile)/float64(parentStats.MemoryLimit) >= c.memoryThreshold {
+ float64(parentStats.MemoryUsage-parentStats.TotalInactiveFile)/float64(parentStats.MemoryLimit) >= c.memoryThreshold {
return &limiter.BackoffEvent{
WatcherName: c.Name(),
ShouldBackoff: true,
Reason: "cgroup memory exceeds threshold",
Stats: map[string]any{
"memory_usage": parentStats.MemoryUsage,
- "inactive_file": parentStats.InactiveFile,
+ "inactive_file": parentStats.TotalInactiveFile,
"memory_limit": parentStats.MemoryLimit,
"memory_threshold": c.memoryThreshold,
},
diff --git a/internal/limiter/watchers/cgroup_memory_watcher_test.go b/internal/limiter/watchers/cgroup_memory_watcher_test.go
index 1e30eac8c..ccc2df3a0 100644
--- a/internal/limiter/watchers/cgroup_memory_watcher_test.go
+++ b/internal/limiter/watchers/cgroup_memory_watcher_test.go
@@ -89,9 +89,9 @@ func TestCgroupMemoryWatcher_Poll(t *testing.T) {
statsList: []cgroups.Stats{
{
ParentStats: cgroups.CgroupStats{
- MemoryUsage: 1900000000,
- InactiveFile: 100000000,
- MemoryLimit: 2000000000,
+ MemoryUsage: 1900000000,
+ TotalInactiveFile: 100000000,
+ MemoryLimit: 2000000000,
},
},
},
@@ -156,9 +156,9 @@ func TestCgroupMemoryWatcher_Poll(t *testing.T) {
statsList: []cgroups.Stats{
{
ParentStats: cgroups.CgroupStats{
- MemoryUsage: 1900000000,
- InactiveFile: 200000000,
- MemoryLimit: 2000000000,
+ MemoryUsage: 1900000000,
+ TotalInactiveFile: 200000000,
+ MemoryLimit: 2000000000,
},
},
},
@@ -176,9 +176,9 @@ func TestCgroupMemoryWatcher_Poll(t *testing.T) {
statsList: []cgroups.Stats{
{
ParentStats: cgroups.CgroupStats{
- MemoryUsage: 1200000000,
- InactiveFile: 100000000,
- MemoryLimit: 2000000000,
+ MemoryUsage: 1200000000,
+ TotalInactiveFile: 100000000,
+ MemoryLimit: 2000000000,
},
},
},