Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-07-11 09:37:10 +0300
committerQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-07-11 09:37:10 +0300
commitdb86f94f1921fb1affa96adaffb45b78de569129 (patch)
tree9b881b874e8a099c75c9493c48a5b2608616f91a
parentab5d534057f73d43a8539406f5923e9314e9c982 (diff)
parente0e0cc45caff5305763f7e5afd6150e8a6f51407 (diff)
Merge branch 'zh-cgroups-v2' into 'master'
cgroup: Add support for cgroups v2 See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5547 Merged-by: Quang-Minh Nguyen <qmnguyen@gitlab.com> Approved-by: karthik nayak <knayak@gitlab.com> Approved-by: Quang-Minh Nguyen <qmnguyen@gitlab.com> Reviewed-by: Steve Xuereb <sxuereb@gitlab.com> Reviewed-by: Quang-Minh Nguyen <qmnguyen@gitlab.com> Reviewed-by: karthik nayak <knayak@gitlab.com> Co-authored-by: ZheNing Hu <adlternative@gmail.com>
-rw-r--r--NOTICE26
-rw-r--r--go.mod1
-rw-r--r--go.sum3
-rw-r--r--internal/cgroups/cgroups.go24
-rw-r--r--internal/cgroups/cgroups_linux_test.go178
-rw-r--r--internal/cgroups/manager.go17
-rw-r--r--internal/cgroups/manager_linux.go177
-rw-r--r--internal/cgroups/metrics.go87
-rw-r--r--internal/cgroups/mock_linux_test.go79
-rw-r--r--internal/cgroups/v1.go12
-rw-r--r--internal/cgroups/v1_linux.go221
-rw-r--r--internal/cgroups/v1_linux_test.go332
-rw-r--r--internal/cgroups/v2_linux.go175
-rw-r--r--internal/cgroups/v2_linux_test.go546
14 files changed, 1425 insertions, 453 deletions
diff --git a/NOTICE b/NOTICE
index 3ffc3ebd5..26f5569ae 100644
--- a/NOTICE
+++ b/NOTICE
@@ -6499,6 +6499,32 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+LICENSE - github.com/cilium/ebpf
+MIT License
+
+Copyright (c) 2017 Nathan Sweet
+Copyright (c) 2018, 2019 Cloudflare
+Copyright (c) 2019 Authors of Cilium
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
LICENSE - github.com/client9/reopen
The MIT License (MIT)
diff --git a/go.mod b/go.mod
index ac9120680..1027ee865 100644
--- a/go.mod
+++ b/go.mod
@@ -94,6 +94,7 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
+ github.com/cilium/ebpf v0.9.1 // indirect
github.com/client9/reopen v1.0.0 // indirect
github.com/cloudflare/circl v1.3.3 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
diff --git a/go.sum b/go.sum
index 656c434b8..eb89c8ee7 100644
--- a/go.sum
+++ b/go.sum
@@ -911,6 +911,8 @@ github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX
github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
+github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4=
+github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng=
@@ -1182,6 +1184,7 @@ github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHqu
github.com/franela/goblin v0.0.0-20210519012713-85d372ac71e2/go.mod h1:VzmDKDJVZI3aJmnRI9VjAn9nJ8qPPsN1fqzr9dqInIo=
github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
+github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
diff --git a/internal/cgroups/cgroups.go b/internal/cgroups/cgroups.go
index 5d44ba70d..0c6927502 100644
--- a/internal/cgroups/cgroups.go
+++ b/internal/cgroups/cgroups.go
@@ -2,11 +2,9 @@ package cgroups
import (
"os/exec"
- "path/filepath"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
- "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
)
@@ -44,7 +42,7 @@ type Manager interface {
// NewManager returns the appropriate Cgroups manager
func NewManager(cfg cgroups.Config, pid int) Manager {
if cfg.Repositories.Count > 0 {
- return newV1Manager(cfg, pid)
+ return newCgroupManager(cfg, pid)
}
return &NoopManager{}
@@ -52,23 +50,5 @@ func NewManager(cfg cgroups.Config, pid int) Manager {
// PruneOldCgroups prunes old cgroups for both the memory and cpu subsystems
func PruneOldCgroups(cfg cgroups.Config, logger log.FieldLogger) {
- if cfg.HierarchyRoot == "" {
- return
- }
-
- if err := config.PruneOldGitalyProcessDirectories(
- logger,
- filepath.Join(cfg.Mountpoint, "memory",
- cfg.HierarchyRoot),
- ); err != nil {
- logger.WithError(err).Error("failed to clean up memory cgroups")
- }
-
- if err := config.PruneOldGitalyProcessDirectories(
- logger,
- filepath.Join(cfg.Mountpoint, "cpu",
- cfg.HierarchyRoot),
- ); err != nil {
- logger.WithError(err).Error("failed to clean up cpu cgroups")
- }
+ pruneOldCgroups(cfg, logger)
}
diff --git a/internal/cgroups/cgroups_linux_test.go b/internal/cgroups/cgroups_linux_test.go
index e52eecb5e..8ed551d2d 100644
--- a/internal/cgroups/cgroups_linux_test.go
+++ b/internal/cgroups/cgroups_linux_test.go
@@ -1,17 +1,12 @@
+//go:build linux
+
package cgroups
import (
- "fmt"
- "io/fs"
- "os"
- "os/exec"
- "path/filepath"
"testing"
- "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
- "gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
"gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
)
@@ -20,174 +15,5 @@ func TestMain(m *testing.M) {
}
func TestNewManager(t *testing.T) {
- cfg := cgroups.Config{Repositories: cgroups.Repositories{Count: 10}}
-
- require.IsType(t, &CGroupV1Manager{}, &CGroupV1Manager{cfg: cfg})
require.IsType(t, &NoopManager{}, NewManager(cgroups.Config{}, 1))
}
-
-func TestPruneOldCgroups(t *testing.T) {
- t.Parallel()
-
- testCases := []struct {
- desc string
- cfg cgroups.Config
- expectedPruned bool
- // setup returns a pid
- setup func(*testing.T, cgroups.Config) int
- }{
- {
- desc: "process belongs to another user",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "gitaly",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- pid := 1
- cgroupManager := NewManager(cfg, pid)
- require.NoError(t, cgroupManager.Setup())
-
- return pid
- },
- expectedPruned: true,
- },
- {
- desc: "no hierarchy root",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- pid := 1
- cgroupManager := NewManager(cfg, pid)
- require.NoError(t, cgroupManager.Setup())
-
- return 1
- },
- expectedPruned: false,
- },
- {
- desc: "pid of finished process",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "gitaly",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- cmd := exec.Command("ls")
- require.NoError(t, cmd.Run())
- pid := cmd.Process.Pid
-
- cgroupManager := NewManager(cfg, pid)
- require.NoError(t, cgroupManager.Setup())
-
- memoryRoot := filepath.Join(
- cfg.Mountpoint,
- "memory",
- cfg.HierarchyRoot,
- "memory.limit_in_bytes",
- )
- require.NoError(t, os.WriteFile(memoryRoot, []byte{}, fs.ModeAppend))
-
- return pid
- },
- expectedPruned: true,
- },
- {
- desc: "pid of running process",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "gitaly",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- pid := os.Getpid()
-
- cgroupManager := NewManager(cfg, pid)
- require.NoError(t, cgroupManager.Setup())
-
- return pid
- },
- expectedPruned: false,
- },
- {
- desc: "gitaly-0 directory is deleted",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "gitaly",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- cgroupManager := NewManager(cfg, 0)
- require.NoError(t, cgroupManager.Setup())
-
- return 0
- },
- expectedPruned: true,
- },
- }
-
- for _, tc := range testCases {
- t.Run(tc.desc, func(t *testing.T) {
- memoryRoot := filepath.Join(
- tc.cfg.Mountpoint,
- "memory",
- tc.cfg.HierarchyRoot,
- )
- cpuRoot := filepath.Join(
- tc.cfg.Mountpoint,
- "cpu",
- tc.cfg.HierarchyRoot,
- )
-
- require.NoError(t, os.MkdirAll(cpuRoot, perm.PublicDir))
- require.NoError(t, os.MkdirAll(memoryRoot, perm.PublicDir))
-
- pid := tc.setup(t, tc.cfg)
-
- logger, hook := test.NewNullLogger()
- PruneOldCgroups(tc.cfg, logger)
-
- // create cgroups directories with a different pid
- oldGitalyProcessMemoryDir := filepath.Join(
- memoryRoot,
- fmt.Sprintf("gitaly-%d", pid),
- )
- oldGitalyProcesssCPUDir := filepath.Join(
- cpuRoot,
- fmt.Sprintf("gitaly-%d", pid),
- )
-
- if tc.expectedPruned {
- require.NoDirExists(t, oldGitalyProcessMemoryDir)
- require.NoDirExists(t, oldGitalyProcesssCPUDir)
- } else {
- require.DirExists(t, oldGitalyProcessMemoryDir)
- require.DirExists(t, oldGitalyProcesssCPUDir)
- require.Len(t, hook.Entries, 0)
- }
- })
- }
-}
diff --git a/internal/cgroups/manager.go b/internal/cgroups/manager.go
new file mode 100644
index 000000000..445138394
--- /dev/null
+++ b/internal/cgroups/manager.go
@@ -0,0 +1,17 @@
+//go:build !linux
+
+package cgroups
+
+import (
+ log "github.com/sirupsen/logrus"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+ cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+)
+
+func newCgroupManager(cfg cgroupscfg.Config, pid int) Manager {
+ return &NoopManager{}
+}
+
+func pruneOldCgroups(cfg cgroups.Config, logger log.FieldLogger) {
+ return
+}
diff --git a/internal/cgroups/manager_linux.go b/internal/cgroups/manager_linux.go
new file mode 100644
index 000000000..7b8c4a34d
--- /dev/null
+++ b/internal/cgroups/manager_linux.go
@@ -0,0 +1,177 @@
+//go:build linux
+
+package cgroups
+
+import (
+ "fmt"
+ "hash/crc32"
+ "os/exec"
+ "strings"
+
+ cgrps "github.com/containerd/cgroups/v3"
+ "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/prometheus/client_golang/prometheus"
+ log "github.com/sirupsen/logrus"
+ cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+)
+
+// cfs_period_us hardcoded to be 100ms.
+const cfsPeriodUs uint64 = 100000
+
+type cgroupHandler interface {
+ setupParent(reposResources *specs.LinuxResources) error
+ setupRepository(reposResources *specs.LinuxResources) error
+ addToCgroup(pid int, cgroupPath string) error
+ collect(ch chan<- prometheus.Metric)
+ cleanup() error
+ currentProcessCgroup() string
+ repoPath(groupID int) string
+}
+
+// CGroupManager is a manager class that implements specific methods related to cgroups
+type CGroupManager struct {
+ cfg cgroupscfg.Config
+ pid int
+
+ handler cgroupHandler
+}
+
+func newCgroupManager(cfg cgroupscfg.Config, pid int) *CGroupManager {
+ return newCgroupManagerWithMode(cfg, pid, cgrps.Mode())
+}
+
+func newCgroupManagerWithMode(cfg cgroupscfg.Config, pid int, mode cgrps.CGMode) *CGroupManager {
+ var handler cgroupHandler
+ switch mode {
+ case cgrps.Legacy, cgrps.Hybrid:
+ handler = newV1Handler(cfg, pid)
+ case cgrps.Unified:
+ handler = newV2Handler(cfg, pid)
+ log.Warnf("Gitaly now includes experimental support for CgroupV2. Please proceed with caution and use this experimental feature at your own risk")
+ default:
+ log.Fatalf("unknown cgroup version")
+ }
+
+ return &CGroupManager{
+ cfg: cfg,
+ pid: pid,
+ handler: handler,
+ }
+}
+
+// Setup parent cgroups and repository sub cgroups
+func (cgm *CGroupManager) Setup() error {
+ if err := cgm.handler.setupParent(cgm.configParentResources()); err != nil {
+ return err
+ }
+ if err := cgm.handler.setupRepository(cgm.configRepositoryResources()); err != nil {
+ return err
+ }
+ return nil
+}
+
+// AddCommand adds a Cmd to a cgroup
+func (cgm *CGroupManager) AddCommand(cmd *exec.Cmd, opts ...AddCommandOption) (string, error) {
+ var cfg addCommandCfg
+ for _, opt := range opts {
+ opt(&cfg)
+ }
+
+ key := cfg.cgroupKey
+ if key == "" {
+ key = strings.Join(cmd.Args, "/")
+ }
+
+ checksum := crc32.ChecksumIEEE(
+ []byte(key),
+ )
+
+ if cmd.Process == nil {
+ return "", fmt.Errorf("cannot add command that has not yet been started")
+ }
+
+ groupID := uint(checksum) % cgm.cfg.Repositories.Count
+ cgroupPath := cgm.handler.repoPath(int(groupID))
+
+ return cgroupPath, cgm.handler.addToCgroup(cmd.Process.Pid, cgroupPath)
+}
+
+// Cleanup cleans up cgroups created in Setup.
+func (cgm *CGroupManager) Cleanup() error {
+ return cgm.handler.cleanup()
+}
+
+// Describe is used to generate description information for each CGroupManager prometheus metric
+func (cgm *CGroupManager) Describe(ch chan<- *prometheus.Desc) {
+ prometheus.DescribeByCollect(cgm, ch)
+}
+
+// Collect is used to collect the current values of all CGroupManager prometheus metrics
+func (cgm *CGroupManager) Collect(ch chan<- prometheus.Metric) {
+ cgm.handler.collect(ch)
+}
+
+func (cgm *CGroupManager) currentProcessCgroup() string {
+ return cgm.handler.currentProcessCgroup()
+}
+
+func (cgm *CGroupManager) configParentResources() *specs.LinuxResources {
+ cfsPeriodUs := cfsPeriodUs
+ var parentResources specs.LinuxResources
+ // Leave them `nil` so it takes kernel default unless cfg value above `0`.
+ parentResources.CPU = &specs.LinuxCPU{}
+
+ if cgm.cfg.CPUShares > 0 {
+ parentResources.CPU.Shares = &cgm.cfg.CPUShares
+ }
+
+ if cgm.cfg.CPUQuotaUs > 0 {
+ parentResources.CPU.Quota = &cgm.cfg.CPUQuotaUs
+ parentResources.CPU.Period = &cfsPeriodUs
+ }
+
+ if cgm.cfg.MemoryBytes > 0 {
+ parentResources.Memory = &specs.LinuxMemory{Limit: &cgm.cfg.MemoryBytes}
+ }
+ return &parentResources
+}
+
+func (cgm *CGroupManager) configRepositoryResources() *specs.LinuxResources {
+ cfsPeriodUs := cfsPeriodUs
+ var reposResources specs.LinuxResources
+ // Leave them `nil` so it takes kernel default unless cfg value above `0`.
+ reposResources.CPU = &specs.LinuxCPU{}
+
+ if cgm.cfg.Repositories.CPUShares > 0 {
+ reposResources.CPU.Shares = &cgm.cfg.Repositories.CPUShares
+ }
+
+ if cgm.cfg.Repositories.CPUQuotaUs > 0 {
+ reposResources.CPU.Quota = &cgm.cfg.Repositories.CPUQuotaUs
+ reposResources.CPU.Period = &cfsPeriodUs
+ }
+
+ if cgm.cfg.Repositories.MemoryBytes > 0 {
+ reposResources.Memory = &specs.LinuxMemory{Limit: &cgm.cfg.Repositories.MemoryBytes}
+ }
+ return &reposResources
+}
+
+func pruneOldCgroups(cfg cgroupscfg.Config, logger log.FieldLogger) {
+ pruneOldCgroupsWithMode(cfg, logger, cgrps.Mode())
+}
+
+func pruneOldCgroupsWithMode(cfg cgroupscfg.Config, logger log.FieldLogger, mode cgrps.CGMode) {
+ if cfg.HierarchyRoot == "" {
+ return
+ }
+
+ switch mode {
+ case cgrps.Legacy, cgrps.Hybrid:
+ pruneOldCgroupsV1(cfg, logger)
+ case cgrps.Unified:
+ pruneOldCgroupsV2(cfg, logger)
+ default:
+ log.Fatalf("unknown cgroup version")
+ }
+}
diff --git a/internal/cgroups/metrics.go b/internal/cgroups/metrics.go
new file mode 100644
index 000000000..a8ffa618f
--- /dev/null
+++ b/internal/cgroups/metrics.go
@@ -0,0 +1,87 @@
+package cgroups
+
+import "github.com/prometheus/client_golang/prometheus"
+
+type cgroupsMetrics struct {
+ memoryReclaimAttemptsTotal *prometheus.GaugeVec
+ cpuUsage *prometheus.GaugeVec
+ cpuCFSPeriods *prometheus.Desc
+ cpuCFSThrottledPeriods *prometheus.Desc
+ cpuCFSThrottledTime *prometheus.Desc
+ procs *prometheus.GaugeVec
+}
+
+func newV1CgroupsMetrics() *cgroupsMetrics {
+ return &cgroupsMetrics{
+ memoryReclaimAttemptsTotal: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_memory_reclaim_attempts_total",
+ Help: "Number of memory usage hits limits",
+ },
+ []string{"path"},
+ ),
+ cpuUsage: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_cpu_usage_total",
+ Help: "CPU Usage of Cgroup",
+ },
+ []string{"path", "type"},
+ ),
+ cpuCFSPeriods: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_periods_total",
+ "Number of elapsed enforcement period intervals",
+ []string{"path"}, nil,
+ ),
+ cpuCFSThrottledPeriods: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_throttled_periods_total",
+ "Number of throttled period intervals",
+ []string{"path"}, nil,
+ ),
+ cpuCFSThrottledTime: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_throttled_seconds_total",
+ "Total time duration the Cgroup has been throttled",
+ []string{"path"}, nil,
+ ),
+ procs: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_procs_total",
+ Help: "Total number of procs",
+ },
+ []string{"path", "subsystem"},
+ ),
+ }
+}
+
+func newV2CgroupsMetrics() *cgroupsMetrics {
+ return &cgroupsMetrics{
+ cpuUsage: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_cpu_usage_total",
+ Help: "CPU Usage of Cgroup",
+ },
+ []string{"path", "type"},
+ ),
+ cpuCFSPeriods: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_periods_total",
+ "Number of elapsed enforcement period intervals",
+ []string{"path"}, nil,
+ ),
+ cpuCFSThrottledPeriods: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_throttled_periods_total",
+ "Number of throttled period intervals",
+ []string{"path"}, nil,
+ ),
+ cpuCFSThrottledTime: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_throttled_seconds_total",
+ "Total time duration the Cgroup has been throttled",
+ []string{"path"}, nil,
+ ),
+ procs: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_procs_total",
+ Help: "Total number of procs",
+ },
+ []string{"path", "subsystem"},
+ ),
+ }
+}
diff --git a/internal/cgroups/mock_linux_test.go b/internal/cgroups/mock_linux_test.go
index 2cf735149..135dca76f 100644
--- a/internal/cgroups/mock_linux_test.go
+++ b/internal/cgroups/mock_linux_test.go
@@ -1,3 +1,5 @@
+//go:build linux
+
/*
Adapted from https://github.com/containerd/cgroups/blob/f1d9380fd3c028194db9582825512fdf3f39ab2a/mock_test.go
@@ -25,8 +27,11 @@ import (
"strconv"
"testing"
+ cgrps "github.com/containerd/cgroups/v3"
"github.com/containerd/cgroups/v3/cgroup1"
+ "github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
+ cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
"gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
"gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
)
@@ -54,13 +59,9 @@ func newMock(t *testing.T) *mockCgroup {
}
}
-func (m *mockCgroup) hierarchy() ([]cgroup1.Subsystem, error) {
- return m.subsystems, nil
-}
-
func (m *mockCgroup) setupMockCgroupFiles(
t *testing.T,
- manager *CGroupV1Manager,
+ manager *CGroupManager,
memFailCount int,
) {
for _, s := range m.subsystems {
@@ -117,3 +118,71 @@ throttled_time 1000000`
}
}
}
+
+func (m *mockCgroup) newCgroupManager(cfg cgroupscfg.Config, pid int) *CGroupManager {
+ return newCgroupManagerWithMode(cfg, pid, cgrps.Legacy)
+}
+
+func (m *mockCgroup) pruneOldCgroups(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
+ pruneOldCgroupsWithMode(cfg, logger, cgrps.Legacy)
+}
+
+type mockCgroupV2 struct {
+ root string
+}
+
+func newMockV2(t *testing.T) *mockCgroupV2 {
+ t.Helper()
+
+ return &mockCgroupV2{
+ root: testhelper.TempDir(t),
+ }
+}
+
+func (m *mockCgroupV2) setupMockCgroupFiles(
+ t *testing.T,
+ manager *CGroupManager,
+) {
+ cgroupPath := filepath.Join(m.root, manager.currentProcessCgroup())
+ require.NoError(t, os.MkdirAll(cgroupPath, perm.SharedDir))
+
+ contentByFilename := map[string]string{
+ "cgroup.procs": "",
+ "cgroup.subtree_control": "cpu cpuset memory",
+ "cgroup.controllers": "cpu cpuset memory",
+ "cpu.max": "max 100000",
+ "cpu.weight": "10",
+ "memory.max": "max",
+ "cpu.stat": `nr_periods 10
+ nr_throttled 20
+ throttled_usec 1000000`,
+ }
+
+ for filename, content := range contentByFilename {
+ controlFilePath := filepath.Join(m.root, manager.cfg.HierarchyRoot, filename)
+ require.NoError(t, os.WriteFile(controlFilePath, []byte(content), perm.SharedFile))
+ }
+
+ for filename, content := range contentByFilename {
+ controlFilePath := filepath.Join(cgroupPath, filename)
+ require.NoError(t, os.WriteFile(controlFilePath, []byte(content), perm.SharedFile))
+ }
+
+ for shard := uint(0); shard < manager.cfg.Repositories.Count; shard++ {
+ shardPath := filepath.Join(cgroupPath, fmt.Sprintf("repos-%d", shard))
+ require.NoError(t, os.MkdirAll(shardPath, perm.SharedDir))
+
+ for filename, content := range contentByFilename {
+ shardControlFilePath := filepath.Join(shardPath, filename)
+ require.NoError(t, os.WriteFile(shardControlFilePath, []byte(content), perm.SharedFile))
+ }
+ }
+}
+
+func (m *mockCgroupV2) newCgroupManager(cfg cgroupscfg.Config, pid int) *CGroupManager {
+ return newCgroupManagerWithMode(cfg, pid, cgrps.Unified)
+}
+
+func (m *mockCgroupV2) pruneOldCgroups(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
+ pruneOldCgroupsWithMode(cfg, logger, cgrps.Unified)
+}
diff --git a/internal/cgroups/v1.go b/internal/cgroups/v1.go
deleted file mode 100644
index 8935bcdc5..000000000
--- a/internal/cgroups/v1.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build !linux
-
-package cgroups
-
-import (
- "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
-)
-
-// For systems other than Linux, we return a noop manager if cgroups was enabled.
-func newV1Manager(cfg cgroups.Config, pid int) *NoopManager {
- return &NoopManager{}
-}
diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go
index 09bf23619..22e9ab841 100644
--- a/internal/cgroups/v1_linux.go
+++ b/internal/cgroups/v1_linux.go
@@ -1,9 +1,9 @@
+//go:build linux
+
package cgroups
import (
"fmt"
- "hash/crc32"
- "os/exec"
"path/filepath"
"strings"
"time"
@@ -11,167 +11,59 @@ import (
"github.com/containerd/cgroups/v3/cgroup1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/client_golang/prometheus"
+ "github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
"gitlab.com/gitlab-org/gitaly/v16/internal/log"
)
-// cfs_period_us hardcoded to be 100ms.
-const cfsPeriodUs uint64 = 100000
+type cgroupV1Handler struct {
+ cfg cgroupscfg.Config
+ hierarchy func() ([]cgroup1.Subsystem, error)
-// CGroupV1Manager is the manager for cgroups v1
-type CGroupV1Manager struct {
- cfg cgroupscfg.Config
- hierarchy func() ([]cgroup1.Subsystem, error)
- memoryReclaimAttemptsTotal *prometheus.GaugeVec
- cpuUsage *prometheus.GaugeVec
- cpuCFSPeriods *prometheus.Desc
- cpuCFSThrottledPeriods *prometheus.Desc
- cpuCFSThrottledTime *prometheus.Desc
- procs *prometheus.GaugeVec
- pid int
+ *cgroupsMetrics
+ pid int
}
-func newV1Manager(cfg cgroupscfg.Config, pid int) *CGroupV1Manager {
- return &CGroupV1Manager{
+func newV1Handler(cfg cgroupscfg.Config, pid int) *cgroupV1Handler {
+ return &cgroupV1Handler{
cfg: cfg,
pid: pid,
hierarchy: func() ([]cgroup1.Subsystem, error) {
return defaultSubsystems(cfg.Mountpoint)
},
- memoryReclaimAttemptsTotal: prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "gitaly_cgroup_memory_reclaim_attempts_total",
- Help: "Number of memory usage hits limits",
- },
- []string{"path"},
- ),
- cpuUsage: prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "gitaly_cgroup_cpu_usage_total",
- Help: "CPU Usage of Cgroup",
- },
- []string{"path", "type"},
- ),
- cpuCFSPeriods: prometheus.NewDesc(
- "gitaly_cgroup_cpu_cfs_periods_total",
- "Number of elapsed enforcement period intervals",
- []string{"path"}, nil,
- ),
- cpuCFSThrottledPeriods: prometheus.NewDesc(
- "gitaly_cgroup_cpu_cfs_throttled_periods_total",
- "Number of throttled period intervals",
- []string{"path"}, nil,
- ),
- cpuCFSThrottledTime: prometheus.NewDesc(
- "gitaly_cgroup_cpu_cfs_throttled_seconds_total",
- "Total time duration the Cgroup has been throttled",
- []string{"path"}, nil,
- ),
- procs: prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "gitaly_cgroup_procs_total",
- Help: "Total number of procs",
- },
- []string{"path", "subsystem"},
- ),
+ cgroupsMetrics: newV1CgroupsMetrics(),
}
}
-//nolint:revive // This is unintentionally missing documentation.
-func (cg *CGroupV1Manager) Setup() error {
- cfsPeriodUs := cfsPeriodUs
-
- var parentResources specs.LinuxResources
- // Leave them `nil` so it takes kernel default unless cfg value above `0`.
- parentResources.CPU = &specs.LinuxCPU{}
-
- if cg.cfg.CPUShares > 0 {
- parentResources.CPU.Shares = &cg.cfg.CPUShares
- }
-
- if cg.cfg.CPUQuotaUs > 0 {
- parentResources.CPU.Quota = &cg.cfg.CPUQuotaUs
- parentResources.CPU.Period = &cfsPeriodUs
- }
-
- if cg.cfg.MemoryBytes > 0 {
- parentResources.Memory = &specs.LinuxMemory{Limit: &cg.cfg.MemoryBytes}
- }
-
+func (cvh *cgroupV1Handler) setupParent(parentResources *specs.LinuxResources) error {
if _, err := cgroup1.New(
- cgroup1.StaticPath(cg.currentProcessCgroup()),
- &parentResources,
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.StaticPath(cvh.currentProcessCgroup()),
+ parentResources,
+ cgroup1.WithHiearchy(cvh.hierarchy),
); err != nil {
return fmt.Errorf("failed creating parent cgroup: %w", err)
}
+ return nil
+}
- var reposResources specs.LinuxResources
- // Leave them `nil` so it takes kernel default unless cfg value above `0`.
- reposResources.CPU = &specs.LinuxCPU{}
-
- if cg.cfg.Repositories.CPUShares > 0 {
- reposResources.CPU.Shares = &cg.cfg.Repositories.CPUShares
- }
-
- if cg.cfg.Repositories.CPUQuotaUs > 0 {
- reposResources.CPU.Quota = &cg.cfg.Repositories.CPUQuotaUs
- reposResources.CPU.Period = &cfsPeriodUs
- }
-
- if cg.cfg.Repositories.MemoryBytes > 0 {
- reposResources.Memory = &specs.LinuxMemory{Limit: &cg.cfg.Repositories.MemoryBytes}
- }
-
- for i := 0; i < int(cg.cfg.Repositories.Count); i++ {
+func (cvh *cgroupV1Handler) setupRepository(reposResources *specs.LinuxResources) error {
+ for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
if _, err := cgroup1.New(
- cgroup1.StaticPath(cg.repoPath(i)),
- &reposResources,
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.StaticPath(cvh.repoPath(i)),
+ reposResources,
+ cgroup1.WithHiearchy(cvh.hierarchy),
); err != nil {
return fmt.Errorf("failed creating repository cgroup: %w", err)
}
}
-
return nil
}
-// AddCommand adds the given command to one of the CGroup's buckets. The bucket used for the command
-// is determined by hashing the repository storage and path. No error is returned if the command has already
-// exited.
-func (cg *CGroupV1Manager) AddCommand(
- cmd *exec.Cmd,
- opts ...AddCommandOption,
-) (string, error) {
- var cfg addCommandCfg
- for _, opt := range opts {
- opt(&cfg)
- }
-
- key := cfg.cgroupKey
- if key == "" {
- key = strings.Join(cmd.Args, "/")
- }
-
- checksum := crc32.ChecksumIEEE(
- []byte(key),
- )
-
- if cmd.Process == nil {
- return "", fmt.Errorf("cannot add command that has not yet been started")
- }
-
- groupID := uint(checksum) % cg.cfg.Repositories.Count
- cgroupPath := cg.repoPath(int(groupID))
-
- return cgroupPath, cg.addToCgroup(cmd.Process.Pid, cgroupPath)
-}
-
-func (cg *CGroupV1Manager) addToCgroup(pid int, cgroupPath string) error {
+func (cvh *cgroupV1Handler) addToCgroup(pid int, cgroupPath string) error {
control, err := cgroup1.Load(
cgroup1.StaticPath(cgroupPath),
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.WithHiearchy(cvh.hierarchy),
)
if err != nil {
return fmt.Errorf("failed loading %s cgroup: %w", cgroupPath, err)
@@ -189,18 +81,17 @@ func (cg *CGroupV1Manager) addToCgroup(pid int, cgroupPath string) error {
return nil
}
-// Collect collects metrics from the cgroups controller
-func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
- if !cg.cfg.MetricsEnabled {
+func (cvh *cgroupV1Handler) collect(ch chan<- prometheus.Metric) {
+ if !cvh.cfg.MetricsEnabled {
return
}
- for i := 0; i < int(cg.cfg.Repositories.Count); i++ {
- repoPath := cg.repoPath(i)
+ for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
+ repoPath := cvh.repoPath(i)
logger := log.Default().WithField("cgroup_path", repoPath)
control, err := cgroup1.Load(
cgroup1.StaticPath(repoPath),
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.WithHiearchy(cvh.hierarchy),
)
if err != nil {
logger.WithError(err).Warn("unable to load cgroup controller")
@@ -210,41 +101,41 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
if metrics, err := control.Stat(); err != nil {
logger.WithError(err).Warn("unable to get cgroup stats")
} else {
- memoryMetric := cg.memoryReclaimAttemptsTotal.WithLabelValues(repoPath)
+ memoryMetric := cvh.memoryReclaimAttemptsTotal.WithLabelValues(repoPath)
memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt))
ch <- memoryMetric
- cpuUserMetric := cg.cpuUsage.WithLabelValues(repoPath, "user")
+ cpuUserMetric := cvh.cpuUsage.WithLabelValues(repoPath, "user")
cpuUserMetric.Set(float64(metrics.CPU.Usage.User))
ch <- cpuUserMetric
ch <- prometheus.MustNewConstMetric(
- cg.cpuCFSPeriods,
+ cvh.cpuCFSPeriods,
prometheus.CounterValue,
float64(metrics.CPU.Throttling.Periods),
repoPath,
)
ch <- prometheus.MustNewConstMetric(
- cg.cpuCFSThrottledPeriods,
+ cvh.cpuCFSThrottledPeriods,
prometheus.CounterValue,
float64(metrics.CPU.Throttling.ThrottledPeriods),
repoPath,
)
ch <- prometheus.MustNewConstMetric(
- cg.cpuCFSThrottledTime,
+ cvh.cpuCFSThrottledTime,
prometheus.CounterValue,
float64(metrics.CPU.Throttling.ThrottledTime)/float64(time.Second),
repoPath,
)
- cpuKernelMetric := cg.cpuUsage.WithLabelValues(repoPath, "kernel")
+ cpuKernelMetric := cvh.cpuUsage.WithLabelValues(repoPath, "kernel")
cpuKernelMetric.Set(float64(metrics.CPU.Usage.Kernel))
ch <- cpuKernelMetric
}
- if subsystems, err := cg.hierarchy(); err != nil {
+ if subsystems, err := cvh.hierarchy(); err != nil {
logger.WithError(err).Warn("unable to get cgroup hierarchy")
} else {
for _, subsystem := range subsystems {
@@ -256,7 +147,7 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
continue
}
- procsMetric := cg.procs.WithLabelValues(repoPath, string(subsystem.Name()))
+ procsMetric := cvh.procs.WithLabelValues(repoPath, string(subsystem.Name()))
procsMetric.Set(float64(len(processes)))
ch <- procsMetric
}
@@ -264,18 +155,12 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
}
}
-// Describe describes the cgroup metrics that Collect provides
-func (cg *CGroupV1Manager) Describe(ch chan<- *prometheus.Desc) {
- prometheus.DescribeByCollect(cg, ch)
-}
-
-//nolint:revive // This is unintentionally missing documentation.
-func (cg *CGroupV1Manager) Cleanup() error {
- processCgroupPath := cg.currentProcessCgroup()
+func (cvh *cgroupV1Handler) cleanup() error {
+ processCgroupPath := cvh.currentProcessCgroup()
control, err := cgroup1.Load(
cgroup1.StaticPath(processCgroupPath),
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.WithHiearchy(cvh.hierarchy),
)
if err != nil {
return fmt.Errorf("failed loading cgroup %s: %w", processCgroupPath, err)
@@ -288,12 +173,12 @@ func (cg *CGroupV1Manager) Cleanup() error {
return nil
}
-func (cg *CGroupV1Manager) repoPath(groupID int) string {
- return filepath.Join(cg.currentProcessCgroup(), fmt.Sprintf("repos-%d", groupID))
+func (cvh *cgroupV1Handler) repoPath(groupID int) string {
+ return filepath.Join(cvh.currentProcessCgroup(), fmt.Sprintf("repos-%d", groupID))
}
-func (cg *CGroupV1Manager) currentProcessCgroup() string {
- return config.GetGitalyProcessTempDir(cg.cfg.HierarchyRoot, cg.pid)
+func (cvh *cgroupV1Handler) currentProcessCgroup() string {
+ return config.GetGitalyProcessTempDir(cvh.cfg.HierarchyRoot, cvh.pid)
}
func defaultSubsystems(root string) ([]cgroup1.Subsystem, error) {
@@ -304,3 +189,21 @@ func defaultSubsystems(root string) ([]cgroup1.Subsystem, error) {
return subsystems, nil
}
+
+func pruneOldCgroupsV1(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
+ if err := config.PruneOldGitalyProcessDirectories(
+ logger,
+ filepath.Join(cfg.Mountpoint, "memory",
+ cfg.HierarchyRoot),
+ ); err != nil {
+ logger.WithError(err).Error("failed to clean up memory cgroups")
+ }
+
+ if err := config.PruneOldGitalyProcessDirectories(
+ logger,
+ filepath.Join(cfg.Mountpoint, "cpu",
+ cfg.HierarchyRoot),
+ ); err != nil {
+ logger.WithError(err).Error("failed to clean up cpu cgroups")
+ }
+}
diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go
index a364d7965..a68ebed4d 100644
--- a/internal/cgroups/v1_linux_test.go
+++ b/internal/cgroups/v1_linux_test.go
@@ -1,9 +1,11 @@
+//go:build linux
+
package cgroups
import (
- "bytes"
"fmt"
"hash/crc32"
+ "io/fs"
"os"
"os/exec"
"path/filepath"
@@ -11,7 +13,9 @@ import (
"strings"
"testing"
+ cgrps "github.com/containerd/cgroups/v3"
"github.com/prometheus/client_golang/prometheus/testutil"
+ "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
@@ -31,6 +35,15 @@ func defaultCgroupsConfig() cgroups.Config {
}
}
+func TestNewManagerV1(t *testing.T) {
+ cfg := cgroups.Config{Repositories: cgroups.Repositories{Count: 10}}
+
+ manager := newCgroupManagerWithMode(cfg, 1, cgrps.Legacy)
+ require.IsType(t, &cgroupV1Handler{}, manager.handler)
+ manager = newCgroupManagerWithMode(cfg, 1, cgrps.Hybrid)
+ require.IsType(t, &cgroupV1Handler{}, manager.handler)
+}
+
func TestSetup_ParentCgroups(t *testing.T) {
tests := []struct {
name string
@@ -84,12 +97,9 @@ func TestSetup_ParentCgroups(t *testing.T) {
mock := newMock(t)
pid := 1
tt.cfg.HierarchyRoot = "gitaly"
+ tt.cfg.Mountpoint = mock.root
- v1Manager := &CGroupV1Manager{
- cfg: tt.cfg,
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ v1Manager := mock.newCgroupManager(tt.cfg, pid)
require.NoError(t, v1Manager.Setup())
memoryLimitPath := filepath.Join(
@@ -167,12 +177,10 @@ func TestSetup_RepoCgroups(t *testing.T) {
cfg := defaultCgroupsConfig()
cfg.Repositories = tt.cfg
cfg.Repositories.Count = 3
+ cfg.HierarchyRoot = "gitaly"
+ cfg.Mountpoint = mock.root
- v1Manager := &CGroupV1Manager{
- cfg: cfg,
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ v1Manager := mock.newCgroupManager(cfg, pid)
require.NoError(t, v1Manager.Setup())
@@ -208,24 +216,18 @@ func TestAddCommand(t *testing.T) {
config.Repositories.Count = 10
config.Repositories.MemoryBytes = 1024
config.Repositories.CPUShares = 16
+ config.HierarchyRoot = "gitaly"
+ config.Mountpoint = mock.root
pid := 1
- v1Manager1 := &CGroupV1Manager{
- cfg: config,
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ v1Manager1 := mock.newCgroupManager(config, pid)
require.NoError(t, v1Manager1.Setup())
ctx := testhelper.Context(t)
cmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
require.NoError(t, cmd2.Run())
- v1Manager2 := &CGroupV1Manager{
- cfg: config,
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ v1Manager2 := mock.newCgroupManager(config, pid)
t.Run("without overridden key", func(t *testing.T) {
_, err := v1Manager2.AddCommand(cmd2)
@@ -270,11 +272,11 @@ func TestCleanup(t *testing.T) {
mock := newMock(t)
pid := 1
- v1Manager := &CGroupV1Manager{
- cfg: defaultCgroupsConfig(),
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ cfg := defaultCgroupsConfig()
+ cfg.Mountpoint = mock.root
+
+ v1Manager := mock.newCgroupManager(cfg, pid)
+
require.NoError(t, v1Manager.Setup())
require.NoError(t, v1Manager.Cleanup())
@@ -288,48 +290,17 @@ func TestCleanup(t *testing.T) {
}
func TestMetrics(t *testing.T) {
- t.Parallel()
-
- mock := newMock(t)
-
- config := defaultCgroupsConfig()
- config.Repositories.Count = 1
- config.Repositories.MemoryBytes = 1048576
- config.Repositories.CPUShares = 16
-
- v1Manager1 := newV1Manager(config, 1)
- v1Manager1.hierarchy = mock.hierarchy
-
- mock.setupMockCgroupFiles(t, v1Manager1, 2)
-
- require.NoError(t, v1Manager1.Setup())
-
- ctx := testhelper.Context(t)
-
- cmd := exec.CommandContext(ctx, "ls", "-hal", ".")
- require.NoError(t, cmd.Start())
- _, err := v1Manager1.AddCommand(cmd)
- require.NoError(t, err)
-
- gitCmd1 := exec.CommandContext(ctx, "ls", "-hal", ".")
- require.NoError(t, gitCmd1.Start())
- _, err = v1Manager1.AddCommand(gitCmd1)
- require.NoError(t, err)
-
- gitCmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
- require.NoError(t, gitCmd2.Start())
- _, err = v1Manager1.AddCommand(gitCmd2)
- require.NoError(t, err)
- defer func() {
- require.NoError(t, gitCmd2.Wait())
- }()
-
- require.NoError(t, cmd.Wait())
- require.NoError(t, gitCmd1.Wait())
-
- repoCgroupPath := filepath.Join(v1Manager1.currentProcessCgroup(), "repos-0")
-
- expected := strings.NewReader(strings.ReplaceAll(`# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup
+ tests := []struct {
+ name string
+ metricsEnabled bool
+ pid int
+ expect string
+ }{
+ {
+ name: "metrics enabled: true",
+ metricsEnabled: true,
+ pid: 1,
+ expect: `# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup
# TYPE gitaly_cgroup_cpu_usage_total gauge
gitaly_cgroup_cpu_usage_total{path="%s",type="kernel"} 0
gitaly_cgroup_cpu_usage_total{path="%s",type="user"} 0
@@ -349,20 +320,223 @@ gitaly_cgroup_cpu_cfs_throttled_periods_total{path="%s"} 20
# HELP gitaly_cgroup_cpu_cfs_throttled_seconds_total Total time duration the Cgroup has been throttled
# TYPE gitaly_cgroup_cpu_cfs_throttled_seconds_total counter
gitaly_cgroup_cpu_cfs_throttled_seconds_total{path="%s"} 0.001
-`, "%s", repoCgroupPath))
+`,
+ },
+ {
+ name: "metrics enabled: false",
+ metricsEnabled: false,
+ pid: 2,
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+ mock := newMock(t)
+
+ config := defaultCgroupsConfig()
+ config.Repositories.Count = 1
+ config.Repositories.MemoryBytes = 1048576
+ config.Repositories.CPUShares = 16
+ config.Mountpoint = mock.root
+ config.MetricsEnabled = tt.metricsEnabled
+
+ v1Manager1 := mock.newCgroupManager(config, tt.pid)
+
+ mock.setupMockCgroupFiles(t, v1Manager1, 2)
+ require.NoError(t, v1Manager1.Setup())
+
+ ctx := testhelper.Context(t)
+
+ cmd := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, cmd.Start())
+ _, err := v1Manager1.AddCommand(cmd)
+ require.NoError(t, err)
+
+ gitCmd1 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, gitCmd1.Start())
+ _, err = v1Manager1.AddCommand(gitCmd1)
+ require.NoError(t, err)
+
+ gitCmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, gitCmd2.Start())
+ _, err = v1Manager1.AddCommand(gitCmd2)
+ require.NoError(t, err)
+ defer func() {
+ require.NoError(t, gitCmd2.Wait())
+ }()
+
+ require.NoError(t, cmd.Wait())
+ require.NoError(t, gitCmd1.Wait())
- for _, metricsEnabled := range []bool{true, false} {
- t.Run(fmt.Sprintf("metrics enabled: %v", metricsEnabled), func(t *testing.T) {
- v1Manager1.cfg.MetricsEnabled = metricsEnabled
+ repoCgroupPath := filepath.Join(v1Manager1.currentProcessCgroup(), "repos-0")
+
+ expected := strings.NewReader(strings.ReplaceAll(tt.expect, "%s", repoCgroupPath))
+ assert.NoError(t, testutil.CollectAndCompare(v1Manager1, expected))
+ })
+ }
+}
+
+func TestPruneOldCgroups(t *testing.T) {
+ t.Parallel()
+
+ testCases := []struct {
+ desc string
+ cfg cgroups.Config
+ expectedPruned bool
+ // setup returns a pid
+ setup func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int
+ }{
+ {
+ desc: "process belongs to another user",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ pid := 1
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ require.NoError(t, cgroupManager.Setup())
+
+ return pid
+ },
+ expectedPruned: true,
+ },
+ {
+ desc: "no hierarchy root",
+ cfg: cgroups.Config{
+ HierarchyRoot: "",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ pid := 1
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ require.NoError(t, cgroupManager.Setup())
+ return 1
+ },
+ expectedPruned: false,
+ },
+ {
+ desc: "pid of finished process",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ cmd := exec.Command("ls")
+ require.NoError(t, cmd.Run())
+ pid := cmd.Process.Pid
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ require.NoError(t, cgroupManager.Setup())
+
+ memoryRoot := filepath.Join(
+ cfg.Mountpoint,
+ "memory",
+ cfg.HierarchyRoot,
+ "memory.limit_in_bytes",
+ )
+ require.NoError(t, os.WriteFile(memoryRoot, []byte{}, fs.ModeAppend))
+
+ return pid
+ },
+ expectedPruned: true,
+ },
+ {
+ desc: "pid of running process",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ pid := os.Getpid()
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ require.NoError(t, cgroupManager.Setup())
+
+ return pid
+ },
+ expectedPruned: false,
+ },
+ {
+ desc: "gitaly-0 directory is deleted",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ cgroupManager := mock.newCgroupManager(cfg, 0)
+ require.NoError(t, cgroupManager.Setup())
+
+ return 0
+ },
+ expectedPruned: true,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ mock := newMock(t)
+ tc.cfg.Mountpoint = mock.root
+
+ memoryRoot := filepath.Join(
+ tc.cfg.Mountpoint,
+ "memory",
+ tc.cfg.HierarchyRoot,
+ )
+ cpuRoot := filepath.Join(
+ tc.cfg.Mountpoint,
+ "cpu",
+ tc.cfg.HierarchyRoot,
+ )
+
+ require.NoError(t, os.MkdirAll(cpuRoot, perm.PublicDir))
+ require.NoError(t, os.MkdirAll(memoryRoot, perm.PublicDir))
+
+ pid := tc.setup(t, tc.cfg, mock)
+
+ logger, hook := test.NewNullLogger()
+
+ mock.pruneOldCgroups(tc.cfg, logger)
+
+ // create cgroups directories with a different pid
+ oldGitalyProcessMemoryDir := filepath.Join(
+ memoryRoot,
+ fmt.Sprintf("gitaly-%d", pid),
+ )
+ oldGitalyProcesssCPUDir := filepath.Join(
+ cpuRoot,
+ fmt.Sprintf("gitaly-%d", pid),
+ )
- if metricsEnabled {
- assert.NoError(t, testutil.CollectAndCompare(
- v1Manager1,
- expected))
+ if tc.expectedPruned {
+ require.NoDirExists(t, oldGitalyProcessMemoryDir)
+ require.NoDirExists(t, oldGitalyProcesssCPUDir)
} else {
- assert.NoError(t, testutil.CollectAndCompare(
- v1Manager1,
- bytes.NewBufferString("")))
+ require.DirExists(t, oldGitalyProcessMemoryDir)
+ require.DirExists(t, oldGitalyProcesssCPUDir)
+ require.Len(t, hook.Entries, 0)
}
})
}
diff --git a/internal/cgroups/v2_linux.go b/internal/cgroups/v2_linux.go
new file mode 100644
index 000000000..a2f81f60b
--- /dev/null
+++ b/internal/cgroups/v2_linux.go
@@ -0,0 +1,175 @@
+//go:build linux
+
+package cgroups
+
+import (
+ "errors"
+ "fmt"
+ "io/fs"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "github.com/containerd/cgroups/v3/cgroup2"
+ "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/sirupsen/logrus"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
+ cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/log"
+)
+
+type cgroupV2Handler struct {
+ cfg cgroupscfg.Config
+
+ *cgroupsMetrics
+ pid int
+}
+
+func newV2Handler(cfg cgroupscfg.Config, pid int) *cgroupV2Handler {
+ return &cgroupV2Handler{
+ cfg: cfg,
+ pid: pid,
+ cgroupsMetrics: newV2CgroupsMetrics(),
+ }
+}
+
+func (cvh *cgroupV2Handler) setupParent(parentResources *specs.LinuxResources) error {
+ if _, err := cgroup2.NewManager(cvh.cfg.Mountpoint, "/"+cvh.currentProcessCgroup(), cgroup2.ToResources(parentResources)); err != nil {
+ return fmt.Errorf("failed creating parent cgroup: %w", err)
+ }
+
+ return nil
+}
+
+func (cvh *cgroupV2Handler) setupRepository(reposResources *specs.LinuxResources) error {
+ for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
+ if _, err := cgroup2.NewManager(
+ cvh.cfg.Mountpoint,
+ "/"+cvh.repoPath(i),
+ cgroup2.ToResources(reposResources),
+ ); err != nil {
+ return fmt.Errorf("failed creating repository cgroup: %w", err)
+ }
+ }
+ return nil
+}
+
+func (cvh *cgroupV2Handler) addToCgroup(pid int, cgroupPath string) error {
+ control, err := cgroup2.Load("/"+cgroupPath, cgroup2.WithMountpoint(cvh.cfg.Mountpoint))
+ if err != nil {
+ return fmt.Errorf("failed loading %s cgroup: %w", cgroupPath, err)
+ }
+
+ if err := control.AddProc(uint64(pid)); err != nil {
+ // Command could finish so quickly before we can add it to a cgroup, so
+ // we don't consider it an error.
+ if strings.Contains(err.Error(), "no such process") {
+ return nil
+ }
+ return fmt.Errorf("failed adding process to cgroup: %w", err)
+ }
+
+ return nil
+}
+
+func (cvh *cgroupV2Handler) collect(ch chan<- prometheus.Metric) {
+ if !cvh.cfg.MetricsEnabled {
+ return
+ }
+
+ for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
+ repoPath := cvh.repoPath(i)
+ logger := log.Default().WithField("cgroup_path", repoPath)
+ control, err := cgroup2.Load("/"+repoPath, cgroup2.WithMountpoint(cvh.cfg.Mountpoint))
+ if err != nil {
+ logger.WithError(err).Warn("unable to load cgroup controller")
+ return
+ }
+
+ if metrics, err := control.Stat(); err != nil {
+ logger.WithError(err).Warn("unable to get cgroup stats")
+ } else {
+ cpuUserMetric := cvh.cpuUsage.WithLabelValues(repoPath, "user")
+ cpuUserMetric.Set(float64(metrics.CPU.UserUsec))
+ ch <- cpuUserMetric
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSPeriods,
+ prometheus.CounterValue,
+ float64(metrics.CPU.NrPeriods),
+ repoPath,
+ )
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSThrottledPeriods,
+ prometheus.CounterValue,
+ float64(metrics.CPU.NrThrottled),
+ repoPath,
+ )
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSThrottledTime,
+ prometheus.CounterValue,
+ float64(metrics.CPU.ThrottledUsec)/float64(time.Second),
+ repoPath,
+ )
+
+ cpuKernelMetric := cvh.cpuUsage.WithLabelValues(repoPath, "kernel")
+ cpuKernelMetric.Set(float64(metrics.CPU.SystemUsec))
+ ch <- cpuKernelMetric
+ }
+
+ if subsystems, err := control.Controllers(); err != nil {
+ logger.WithError(err).Warn("unable to get cgroup hierarchy")
+ } else {
+ processes, err := control.Procs(true)
+ if err != nil {
+ logger.WithError(err).
+ Warn("unable to get process list")
+ continue
+ }
+
+ for _, subsystem := range subsystems {
+ procsMetric := cvh.procs.WithLabelValues(repoPath, subsystem)
+ procsMetric.Set(float64(len(processes)))
+ ch <- procsMetric
+ }
+ }
+ }
+}
+
+func (cvh *cgroupV2Handler) cleanup() error {
+ processCgroupPath := cvh.currentProcessCgroup()
+
+ control, err := cgroup2.Load("/"+processCgroupPath, cgroup2.WithMountpoint(cvh.cfg.Mountpoint))
+ if err != nil {
+ return fmt.Errorf("failed loading cgroup %s: %w", processCgroupPath, err)
+ }
+
+ if err := control.Delete(); err != nil {
+ return fmt.Errorf("failed cleaning up cgroup %s: %w", processCgroupPath, err)
+ }
+
+ return nil
+}
+
+func (cvh *cgroupV2Handler) repoPath(groupID int) string {
+ return filepath.Join(cvh.currentProcessCgroup(), fmt.Sprintf("repos-%d", groupID))
+}
+
+func (cvh *cgroupV2Handler) currentProcessCgroup() string {
+ return config.GetGitalyProcessTempDir(cvh.cfg.HierarchyRoot, cvh.pid)
+}
+
+func pruneOldCgroupsV2(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
+ if err := config.PruneOldGitalyProcessDirectories(
+ logger,
+ filepath.Join(cfg.Mountpoint, cfg.HierarchyRoot),
+ ); err != nil {
+ var pathError *fs.PathError
+ if !errors.As(err, &pathError) {
+ logger.WithError(err).Error("failed to clean up cpu cgroups")
+ }
+ }
+}
diff --git a/internal/cgroups/v2_linux_test.go b/internal/cgroups/v2_linux_test.go
new file mode 100644
index 000000000..834a148cd
--- /dev/null
+++ b/internal/cgroups/v2_linux_test.go
@@ -0,0 +1,546 @@
+//go:build linux
+
+package cgroups
+
+import (
+ "fmt"
+ "hash/crc32"
+ "io/fs"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "testing"
+
+ cgrps "github.com/containerd/cgroups/v3"
+ "github.com/prometheus/client_golang/prometheus/testutil"
+ "github.com/sirupsen/logrus/hooks/test"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
+)
+
+func defaultCgroupsV2Config() cgroups.Config {
+ return cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 3,
+ MemoryBytes: 1024000,
+ CPUShares: 256,
+ CPUQuotaUs: 2000,
+ },
+ }
+}
+
+func TestNewManagerV2(t *testing.T) {
+ cfg := cgroups.Config{Repositories: cgroups.Repositories{Count: 10}}
+
+ manager := newCgroupManagerWithMode(cfg, 1, cgrps.Unified)
+ require.IsType(t, &cgroupV2Handler{}, manager.handler)
+}
+
+func TestSetup_ParentCgroupsV2(t *testing.T) {
+ tests := []struct {
+ name string
+ cfg cgroups.Config
+ wantMemoryBytes int
+ wantCPUWeight int
+ wantCPUMax string
+ }{
+ {
+ name: "all config specified",
+ cfg: cgroups.Config{
+ MemoryBytes: 102400,
+ CPUShares: 256,
+ CPUQuotaUs: 2000,
+ },
+ wantMemoryBytes: 102400,
+ wantCPUWeight: 256,
+ wantCPUMax: "2000 100000",
+ },
+ {
+ name: "only memory limit set",
+ cfg: cgroups.Config{
+ MemoryBytes: 102400,
+ },
+ wantMemoryBytes: 102400,
+ },
+ {
+ name: "only cpu shares set",
+ cfg: cgroups.Config{
+ CPUShares: 512,
+ },
+ wantCPUWeight: 512,
+ },
+ {
+ name: "only cpu quota set",
+ cfg: cgroups.Config{
+ CPUQuotaUs: 2000,
+ },
+ wantCPUMax: "2000 100000",
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ mock := newMockV2(t)
+
+ pid := 1
+ tt.cfg.HierarchyRoot = "gitaly"
+ tt.cfg.Mountpoint = mock.root
+
+ v2Manager := mock.newCgroupManager(tt.cfg, pid)
+ mock.setupMockCgroupFiles(t, v2Manager)
+
+ require.NoError(t, v2Manager.Setup())
+
+ memoryMaxPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), "memory.max",
+ )
+ requireCgroupWithInt(t, memoryMaxPath, tt.wantMemoryBytes)
+
+ cpuWeightPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), "cpu.weight",
+ )
+ requireCgroupWithInt(t, cpuWeightPath, calculateWantCPUWeight(tt.wantCPUWeight))
+
+ cpuMaxPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), "cpu.max",
+ )
+ requireCgroupWithString(t, cpuMaxPath, tt.wantCPUMax)
+ })
+ }
+}
+
+func TestSetup_RepoCgroupsV2(t *testing.T) {
+ tests := []struct {
+ name string
+ cfg cgroups.Repositories
+ wantMemoryBytes int
+ wantCPUWeight int
+ wantCPUMax string
+ }{
+ {
+ name: "all config specified",
+ cfg: defaultCgroupsV2Config().Repositories,
+ wantMemoryBytes: 1024000,
+ wantCPUWeight: 256,
+ wantCPUMax: "2000 100000",
+ },
+ {
+ name: "only memory limit set",
+ cfg: cgroups.Repositories{
+ Count: 3,
+ MemoryBytes: 1024000,
+ },
+ wantMemoryBytes: 1024000,
+ },
+ {
+ name: "only cpu shares set",
+ cfg: cgroups.Repositories{
+ Count: 3,
+ CPUShares: 512,
+ },
+ wantCPUWeight: 512,
+ },
+ {
+ name: "only cpu quota set",
+ cfg: cgroups.Repositories{
+ Count: 3,
+ CPUQuotaUs: 1000,
+ },
+ wantCPUMax: "1000 100000",
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ mock := newMockV2(t)
+
+ pid := 1
+
+ cfg := defaultCgroupsV2Config()
+ cfg.Mountpoint = mock.root
+ cfg.Repositories = tt.cfg
+
+ v2Manager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, v2Manager)
+ require.NoError(t, v2Manager.Setup())
+
+ for i := 0; i < 3; i++ {
+ memoryMaxPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", i), "memory.max",
+ )
+ requireCgroupWithInt(t, memoryMaxPath, tt.wantMemoryBytes)
+
+ cpuWeightPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", i), "cpu.weight",
+ )
+ requireCgroupWithInt(t, cpuWeightPath, calculateWantCPUWeight(tt.wantCPUWeight))
+
+ cpuMaxPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", i), "cpu.max",
+ )
+ requireCgroupWithString(t, cpuMaxPath, tt.wantCPUMax)
+ }
+ })
+ }
+}
+
+func TestAddCommandV2(t *testing.T) {
+ mock := newMockV2(t)
+
+ config := defaultCgroupsV2Config()
+ config.Repositories.Count = 10
+ config.Repositories.MemoryBytes = 1024
+ config.Repositories.CPUShares = 16
+ config.Mountpoint = mock.root
+
+ pid := 1
+
+ v2Manager1 := mock.newCgroupManager(config, pid)
+ mock.setupMockCgroupFiles(t, v2Manager1)
+
+ require.NoError(t, v2Manager1.Setup())
+ ctx := testhelper.Context(t)
+
+ cmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, cmd2.Run())
+
+ v2Manager2 := mock.newCgroupManager(config, pid)
+
+ t.Run("without overridden key", func(t *testing.T) {
+ _, err := v2Manager2.AddCommand(cmd2)
+ require.NoError(t, err)
+
+ checksum := crc32.ChecksumIEEE([]byte(strings.Join(cmd2.Args, "/")))
+ groupID := uint(checksum) % config.Repositories.Count
+
+ path := filepath.Join(mock.root, "gitaly",
+ fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", groupID), "cgroup.procs")
+ content := readCgroupFile(t, path)
+
+ cmdPid, err := strconv.Atoi(string(content))
+ require.NoError(t, err)
+
+ require.Equal(t, cmd2.Process.Pid, cmdPid)
+ })
+
+ t.Run("with overridden key", func(t *testing.T) {
+ _, err := v2Manager2.AddCommand(cmd2, WithCgroupKey("foobar"))
+ require.NoError(t, err)
+
+ checksum := crc32.ChecksumIEEE([]byte("foobar"))
+ groupID := uint(checksum) % config.Repositories.Count
+
+ path := filepath.Join(mock.root, "gitaly",
+ fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", groupID), "cgroup.procs")
+ content := readCgroupFile(t, path)
+
+ cmdPid, err := strconv.Atoi(string(content))
+ require.NoError(t, err)
+
+ require.Equal(t, cmd2.Process.Pid, cmdPid)
+ })
+}
+
+func TestCleanupV2(t *testing.T) {
+ mock := newMockV2(t)
+
+ pid := 1
+ cfg := defaultCgroupsV2Config()
+ cfg.Mountpoint = mock.root
+
+ v2Manager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, v2Manager)
+
+ require.NoError(t, v2Manager.Setup())
+ require.NoError(t, v2Manager.Cleanup())
+
+ for i := 0; i < 3; i++ {
+ require.NoDirExists(t, filepath.Join(mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", i)))
+ }
+}
+
+func TestMetricsV2(t *testing.T) {
+ tests := []struct {
+ name string
+ metricsEnabled bool
+ pid int
+ expect string
+ }{
+ {
+ name: "metrics enabled: true",
+ metricsEnabled: true,
+ pid: 1,
+ expect: `# HELP gitaly_cgroup_cpu_cfs_periods_total Number of elapsed enforcement period intervals
+# TYPE gitaly_cgroup_cpu_cfs_periods_total counter
+gitaly_cgroup_cpu_cfs_periods_total{path="%s"} 10
+# HELP gitaly_cgroup_cpu_cfs_throttled_periods_total Number of throttled period intervals
+# TYPE gitaly_cgroup_cpu_cfs_throttled_periods_total counter
+gitaly_cgroup_cpu_cfs_throttled_periods_total{path="%s"} 20
+# HELP gitaly_cgroup_cpu_cfs_throttled_seconds_total Total time duration the Cgroup has been throttled
+# TYPE gitaly_cgroup_cpu_cfs_throttled_seconds_total counter
+gitaly_cgroup_cpu_cfs_throttled_seconds_total{path="%s"} 0.001
+# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup
+# TYPE gitaly_cgroup_cpu_usage_total gauge
+gitaly_cgroup_cpu_usage_total{path="%s",type="kernel"} 0
+gitaly_cgroup_cpu_usage_total{path="%s",type="user"} 0
+# HELP gitaly_cgroup_procs_total Total number of procs
+# TYPE gitaly_cgroup_procs_total gauge
+gitaly_cgroup_procs_total{path="%s",subsystem="cpu"} 1
+gitaly_cgroup_procs_total{path="%s",subsystem="cpuset"} 1
+gitaly_cgroup_procs_total{path="%s",subsystem="memory"} 1
+`,
+ },
+ {
+ name: "metrics enabled: false",
+ metricsEnabled: false,
+ pid: 2,
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+ mock := newMockV2(t)
+
+ config := defaultCgroupsV2Config()
+ config.Repositories.Count = 1
+ config.Repositories.MemoryBytes = 1048576
+ config.Repositories.CPUShares = 16
+ config.Mountpoint = mock.root
+ config.MetricsEnabled = tt.metricsEnabled
+
+ v2Manager1 := mock.newCgroupManager(config, tt.pid)
+
+ mock.setupMockCgroupFiles(t, v2Manager1)
+ require.NoError(t, v2Manager1.Setup())
+
+ ctx := testhelper.Context(t)
+
+ cmd := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, cmd.Start())
+ _, err := v2Manager1.AddCommand(cmd)
+ require.NoError(t, err)
+
+ gitCmd1 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, gitCmd1.Start())
+ _, err = v2Manager1.AddCommand(gitCmd1)
+ require.NoError(t, err)
+
+ gitCmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, gitCmd2.Start())
+ _, err = v2Manager1.AddCommand(gitCmd2)
+ require.NoError(t, err)
+ defer func() {
+ require.NoError(t, gitCmd2.Wait())
+ }()
+
+ require.NoError(t, cmd.Wait())
+ require.NoError(t, gitCmd1.Wait())
+
+ repoCgroupPath := filepath.Join(v2Manager1.currentProcessCgroup(), "repos-0")
+
+ expected := strings.NewReader(strings.ReplaceAll(tt.expect, "%s", repoCgroupPath))
+
+ assert.NoError(t, testutil.CollectAndCompare(v2Manager1, expected))
+ })
+ }
+}
+
+func TestPruneOldCgroupsV2(t *testing.T) {
+ t.Parallel()
+
+ testCases := []struct {
+ desc string
+ cfg cgroups.Config
+ expectedPruned bool
+ // setup returns a pid
+ setup func(*testing.T, cgroups.Config, *mockCgroupV2) int
+ }{
+ {
+ desc: "process belongs to another user",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ pid := 1
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+
+ return pid
+ },
+ expectedPruned: true,
+ },
+ {
+ desc: "no hierarchy root",
+ cfg: cgroups.Config{
+ HierarchyRoot: "",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ pid := 1
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+ return 1
+ },
+ expectedPruned: false,
+ },
+ {
+ desc: "pid of finished process",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ cmd := exec.Command("ls")
+ require.NoError(t, cmd.Run())
+ pid := cmd.Process.Pid
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+
+ memoryFile := filepath.Join(
+ cfg.Mountpoint,
+ cfg.HierarchyRoot,
+ "memory.limit_in_bytes",
+ )
+ require.NoError(t, os.WriteFile(memoryFile, []byte{}, fs.ModeAppend))
+
+ return pid
+ },
+ expectedPruned: true,
+ },
+ {
+ desc: "pid of running process",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ pid := os.Getpid()
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+
+ return pid
+ },
+ expectedPruned: false,
+ },
+ {
+ desc: "gitaly-0 directory is deleted",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ cgroupManager := mock.newCgroupManager(cfg, 0)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+
+ return 0
+ },
+ expectedPruned: true,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ mock := newMockV2(t)
+ tc.cfg.Mountpoint = mock.root
+
+ root := filepath.Join(
+ tc.cfg.Mountpoint,
+ tc.cfg.HierarchyRoot,
+ )
+ require.NoError(t, os.MkdirAll(root, perm.PublicDir))
+
+ pid := tc.setup(t, tc.cfg, mock)
+
+ logger, _ := test.NewNullLogger()
+ mock.pruneOldCgroups(tc.cfg, logger)
+
+ // create cgroups directories with a different pid
+ oldGitalyProcessDir := filepath.Join(
+ root,
+ fmt.Sprintf("gitaly-%d", pid),
+ )
+
+ if tc.expectedPruned {
+ require.NoDirExists(t, oldGitalyProcessDir)
+ } else {
+ require.DirExists(t, oldGitalyProcessDir)
+ }
+ })
+ }
+}
+
+func calculateWantCPUWeight(wantCPUWeight int) int {
+ if wantCPUWeight == 0 {
+ return 0
+ }
+ return 1 + ((wantCPUWeight-2)*9999)/262142
+}
+
+func requireCgroupWithString(t *testing.T, cgroupFile string, want string) {
+ t.Helper()
+
+ if want == "" {
+ return
+ }
+ require.Equal(t,
+ string(readCgroupFile(t, cgroupFile)),
+ want,
+ )
+}
+
+func requireCgroupWithInt(t *testing.T, cgroupFile string, want int) {
+ t.Helper()
+
+ if want <= 0 {
+ return
+ }
+
+ require.Equal(t,
+ string(readCgroupFile(t, cgroupFile)),
+ strconv.Itoa(want),
+ )
+}