Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZheNing Hu <adlternative@gmail.com>2023-06-26 11:35:49 +0300
committerZheNing Hu <adlternative@gmail.com>2023-07-10 15:28:58 +0300
commit3e780e2c3a93ea866dbf7db2ddf26bc78e91d0ee (patch)
treedabfd02748ab727ada87a95f31b219ad649b891e
parentd4299d66b722acbec358944ee9f70d2c13d8f67a (diff)
cgroup: add support for cgroup v2
Due to the lack of cgroup V2 support in Gitaly, some operating systems that use cgroup V2 may not be able to use cgroups in Gitaly properly. Therefore, this patch adds support for cgroup V2 in Gitaly, which maintains a similar interface to cgroup V1 but removes the "gitaly_cgroup_memory_reclaim_attempts_total" metric and modifies the label of the "gitaly_cgroup_procs_total" metric compared to V1. Signed-off-by: ZheNing Hu <adlternative@gmail.com>
-rw-r--r--NOTICE26
-rw-r--r--go.mod1
-rw-r--r--go.sum3
-rw-r--r--internal/cgroups/cgroups.go6
-rw-r--r--internal/cgroups/manager.go6
-rw-r--r--internal/cgroups/manager_linux.go38
-rw-r--r--internal/cgroups/mock_linux_test.go65
-rw-r--r--internal/cgroups/v1_linux_test.go15
-rw-r--r--internal/cgroups/v2_linux.go206
-rw-r--r--internal/cgroups/v2_linux_test.go546
10 files changed, 898 insertions, 14 deletions
diff --git a/NOTICE b/NOTICE
index d08942acb..12f55adef 100644
--- a/NOTICE
+++ b/NOTICE
@@ -6499,6 +6499,32 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+LICENSE - github.com/cilium/ebpf
+MIT License
+
+Copyright (c) 2017 Nathan Sweet
+Copyright (c) 2018, 2019 Cloudflare
+Copyright (c) 2019 Authors of Cilium
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
LICENSE - github.com/client9/reopen
The MIT License (MIT)
diff --git a/go.mod b/go.mod
index 82a81c2d0..09924a1f4 100644
--- a/go.mod
+++ b/go.mod
@@ -94,6 +94,7 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
+ github.com/cilium/ebpf v0.9.1 // indirect
github.com/client9/reopen v1.0.0 // indirect
github.com/cloudflare/circl v1.3.3 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
diff --git a/go.sum b/go.sum
index 10b4d19f0..482abdfee 100644
--- a/go.sum
+++ b/go.sum
@@ -682,6 +682,8 @@ github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX
github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
+github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4=
+github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng=
@@ -942,6 +944,7 @@ github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHqu
github.com/franela/goblin v0.0.0-20210519012713-85d372ac71e2/go.mod h1:VzmDKDJVZI3aJmnRI9VjAn9nJ8qPPsN1fqzr9dqInIo=
github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
+github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
diff --git a/internal/cgroups/cgroups.go b/internal/cgroups/cgroups.go
index 20f78b6f0..0c6927502 100644
--- a/internal/cgroups/cgroups.go
+++ b/internal/cgroups/cgroups.go
@@ -50,9 +50,5 @@ func NewManager(cfg cgroups.Config, pid int) Manager {
// PruneOldCgroups prunes old cgroups for both the memory and cpu subsystems
func PruneOldCgroups(cfg cgroups.Config, logger log.FieldLogger) {
- if cfg.HierarchyRoot == "" {
- return
- }
-
- pruneOldCgroupsV1(cfg, logger)
+ pruneOldCgroups(cfg, logger)
}
diff --git a/internal/cgroups/manager.go b/internal/cgroups/manager.go
index 10592f62a..445138394 100644
--- a/internal/cgroups/manager.go
+++ b/internal/cgroups/manager.go
@@ -3,9 +3,15 @@
package cgroups
import (
+ log "github.com/sirupsen/logrus"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
)
func newCgroupManager(cfg cgroupscfg.Config, pid int) Manager {
return &NoopManager{}
}
+
+func pruneOldCgroups(cfg cgroups.Config, logger log.FieldLogger) {
+ return
+}
diff --git a/internal/cgroups/manager_linux.go b/internal/cgroups/manager_linux.go
index d85d90340..7b8c4a34d 100644
--- a/internal/cgroups/manager_linux.go
+++ b/internal/cgroups/manager_linux.go
@@ -8,8 +8,10 @@ import (
"os/exec"
"strings"
+ cgrps "github.com/containerd/cgroups/v3"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/client_golang/prometheus"
+ log "github.com/sirupsen/logrus"
cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
)
@@ -35,10 +37,25 @@ type CGroupManager struct {
}
func newCgroupManager(cfg cgroupscfg.Config, pid int) *CGroupManager {
+ return newCgroupManagerWithMode(cfg, pid, cgrps.Mode())
+}
+
+func newCgroupManagerWithMode(cfg cgroupscfg.Config, pid int, mode cgrps.CGMode) *CGroupManager {
+ var handler cgroupHandler
+ switch mode {
+ case cgrps.Legacy, cgrps.Hybrid:
+ handler = newV1Handler(cfg, pid)
+ case cgrps.Unified:
+ handler = newV2Handler(cfg, pid)
+ log.Warnf("Gitaly now includes experimental support for CgroupV2. Please proceed with caution and use this experimental feature at your own risk")
+ default:
+ log.Fatalf("unknown cgroup version")
+ }
+
return &CGroupManager{
cfg: cfg,
pid: pid,
- handler: newV1Handler(cfg, pid),
+ handler: handler,
}
}
@@ -139,3 +156,22 @@ func (cgm *CGroupManager) configRepositoryResources() *specs.LinuxResources {
}
return &reposResources
}
+
+func pruneOldCgroups(cfg cgroupscfg.Config, logger log.FieldLogger) {
+ pruneOldCgroupsWithMode(cfg, logger, cgrps.Mode())
+}
+
+func pruneOldCgroupsWithMode(cfg cgroupscfg.Config, logger log.FieldLogger, mode cgrps.CGMode) {
+ if cfg.HierarchyRoot == "" {
+ return
+ }
+
+ switch mode {
+ case cgrps.Legacy, cgrps.Hybrid:
+ pruneOldCgroupsV1(cfg, logger)
+ case cgrps.Unified:
+ pruneOldCgroupsV2(cfg, logger)
+ default:
+ log.Fatalf("unknown cgroup version")
+ }
+}
diff --git a/internal/cgroups/mock_linux_test.go b/internal/cgroups/mock_linux_test.go
index ca94b450d..135dca76f 100644
--- a/internal/cgroups/mock_linux_test.go
+++ b/internal/cgroups/mock_linux_test.go
@@ -27,6 +27,7 @@ import (
"strconv"
"testing"
+ cgrps "github.com/containerd/cgroups/v3"
"github.com/containerd/cgroups/v3/cgroup1"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
@@ -119,9 +120,69 @@ throttled_time 1000000`
}
func (m *mockCgroup) newCgroupManager(cfg cgroupscfg.Config, pid int) *CGroupManager {
- return newCgroupManager(cfg, pid)
+ return newCgroupManagerWithMode(cfg, pid, cgrps.Legacy)
}
func (m *mockCgroup) pruneOldCgroups(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
- PruneOldCgroups(cfg, logger)
+ pruneOldCgroupsWithMode(cfg, logger, cgrps.Legacy)
+}
+
+type mockCgroupV2 struct {
+ root string
+}
+
+func newMockV2(t *testing.T) *mockCgroupV2 {
+ t.Helper()
+
+ return &mockCgroupV2{
+ root: testhelper.TempDir(t),
+ }
+}
+
+func (m *mockCgroupV2) setupMockCgroupFiles(
+ t *testing.T,
+ manager *CGroupManager,
+) {
+ cgroupPath := filepath.Join(m.root, manager.currentProcessCgroup())
+ require.NoError(t, os.MkdirAll(cgroupPath, perm.SharedDir))
+
+ contentByFilename := map[string]string{
+ "cgroup.procs": "",
+ "cgroup.subtree_control": "cpu cpuset memory",
+ "cgroup.controllers": "cpu cpuset memory",
+ "cpu.max": "max 100000",
+ "cpu.weight": "10",
+ "memory.max": "max",
+ "cpu.stat": `nr_periods 10
+ nr_throttled 20
+ throttled_usec 1000000`,
+ }
+
+ for filename, content := range contentByFilename {
+ controlFilePath := filepath.Join(m.root, manager.cfg.HierarchyRoot, filename)
+ require.NoError(t, os.WriteFile(controlFilePath, []byte(content), perm.SharedFile))
+ }
+
+ for filename, content := range contentByFilename {
+ controlFilePath := filepath.Join(cgroupPath, filename)
+ require.NoError(t, os.WriteFile(controlFilePath, []byte(content), perm.SharedFile))
+ }
+
+ for shard := uint(0); shard < manager.cfg.Repositories.Count; shard++ {
+ shardPath := filepath.Join(cgroupPath, fmt.Sprintf("repos-%d", shard))
+ require.NoError(t, os.MkdirAll(shardPath, perm.SharedDir))
+
+ for filename, content := range contentByFilename {
+ shardControlFilePath := filepath.Join(shardPath, filename)
+ require.NoError(t, os.WriteFile(shardControlFilePath, []byte(content), perm.SharedFile))
+ }
+ }
+}
+
+func (m *mockCgroupV2) newCgroupManager(cfg cgroupscfg.Config, pid int) *CGroupManager {
+ return newCgroupManagerWithMode(cfg, pid, cgrps.Unified)
+}
+
+func (m *mockCgroupV2) pruneOldCgroups(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
+ pruneOldCgroupsWithMode(cfg, logger, cgrps.Unified)
}
diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go
index 524b4f91f..a68ebed4d 100644
--- a/internal/cgroups/v1_linux_test.go
+++ b/internal/cgroups/v1_linux_test.go
@@ -13,6 +13,7 @@ import (
"strings"
"testing"
+ cgrps "github.com/containerd/cgroups/v3"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
@@ -37,7 +38,9 @@ func defaultCgroupsConfig() cgroups.Config {
func TestNewManagerV1(t *testing.T) {
cfg := cgroups.Config{Repositories: cgroups.Repositories{Count: 10}}
- manager := newCgroupManager(cfg, 1)
+ manager := newCgroupManagerWithMode(cfg, 1, cgrps.Legacy)
+ require.IsType(t, &cgroupV1Handler{}, manager.handler)
+ manager = newCgroupManagerWithMode(cfg, 1, cgrps.Hybrid)
require.IsType(t, &cgroupV1Handler{}, manager.handler)
}
@@ -397,7 +400,7 @@ func TestPruneOldCgroups(t *testing.T) {
},
setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
pid := 1
- cgroupManager := newCgroupManager(cfg, pid)
+ cgroupManager := mock.newCgroupManager(cfg, pid)
require.NoError(t, cgroupManager.Setup())
return pid
@@ -416,7 +419,7 @@ func TestPruneOldCgroups(t *testing.T) {
},
setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
pid := 1
- cgroupManager := newCgroupManager(cfg, pid)
+ cgroupManager := mock.newCgroupManager(cfg, pid)
require.NoError(t, cgroupManager.Setup())
return 1
},
@@ -437,7 +440,7 @@ func TestPruneOldCgroups(t *testing.T) {
require.NoError(t, cmd.Run())
pid := cmd.Process.Pid
- cgroupManager := newCgroupManager(cfg, pid)
+ cgroupManager := mock.newCgroupManager(cfg, pid)
require.NoError(t, cgroupManager.Setup())
memoryRoot := filepath.Join(
@@ -465,7 +468,7 @@ func TestPruneOldCgroups(t *testing.T) {
setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
pid := os.Getpid()
- cgroupManager := newCgroupManager(cfg, pid)
+ cgroupManager := mock.newCgroupManager(cfg, pid)
require.NoError(t, cgroupManager.Setup())
return pid
@@ -483,7 +486,7 @@ func TestPruneOldCgroups(t *testing.T) {
},
},
setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
- cgroupManager := newCgroupManager(cfg, 0)
+ cgroupManager := mock.newCgroupManager(cfg, 0)
require.NoError(t, cgroupManager.Setup())
return 0
diff --git a/internal/cgroups/v2_linux.go b/internal/cgroups/v2_linux.go
new file mode 100644
index 000000000..abcbfbdc6
--- /dev/null
+++ b/internal/cgroups/v2_linux.go
@@ -0,0 +1,206 @@
+//go:build linux
+
+package cgroups
+
+import (
+ "errors"
+ "fmt"
+ "io/fs"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "github.com/containerd/cgroups/v3/cgroup2"
+ "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/sirupsen/logrus"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
+ cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/log"
+)
+
+type cgroupV2Handler struct {
+ cfg cgroupscfg.Config
+ cpuUsage *prometheus.GaugeVec
+ cpuCFSPeriods *prometheus.Desc
+ cpuCFSThrottledPeriods *prometheus.Desc
+ cpuCFSThrottledTime *prometheus.Desc
+ procs *prometheus.GaugeVec
+ pid int
+}
+
+func newV2Handler(cfg cgroupscfg.Config, pid int) *cgroupV2Handler {
+ return &cgroupV2Handler{
+ cfg: cfg,
+ pid: pid,
+ cpuUsage: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_cpu_usage_total",
+ Help: "CPU Usage of Cgroup",
+ },
+ []string{"path", "type"},
+ ),
+ cpuCFSPeriods: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_periods_total",
+ "Number of elapsed enforcement period intervals",
+ []string{"path"}, nil,
+ ),
+ cpuCFSThrottledPeriods: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_throttled_periods_total",
+ "Number of throttled period intervals",
+ []string{"path"}, nil,
+ ),
+ cpuCFSThrottledTime: prometheus.NewDesc(
+ "gitaly_cgroup_cpu_cfs_throttled_seconds_total",
+ "Total time duration the Cgroup has been throttled",
+ []string{"path"}, nil,
+ ),
+ procs: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "gitaly_cgroup_procs_total",
+ Help: "Total number of procs",
+ },
+ []string{"path", "subsystem"},
+ ),
+ }
+}
+
+func (cvh *cgroupV2Handler) setupParent(parentResources *specs.LinuxResources) error {
+ if _, err := cgroup2.NewManager(cvh.cfg.Mountpoint, "/"+cvh.currentProcessCgroup(), cgroup2.ToResources(parentResources)); err != nil {
+ return fmt.Errorf("failed creating parent cgroup: %w", err)
+ }
+
+ return nil
+}
+
+func (cvh *cgroupV2Handler) setupRepository(reposResources *specs.LinuxResources) error {
+ for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
+ if _, err := cgroup2.NewManager(
+ cvh.cfg.Mountpoint,
+ "/"+cvh.repoPath(i),
+ cgroup2.ToResources(reposResources),
+ ); err != nil {
+ return fmt.Errorf("failed creating repository cgroup: %w", err)
+ }
+ }
+ return nil
+}
+
+func (cvh *cgroupV2Handler) addToCgroup(pid int, cgroupPath string) error {
+ control, err := cgroup2.Load("/"+cgroupPath, cgroup2.WithMountpoint(cvh.cfg.Mountpoint))
+ if err != nil {
+ return fmt.Errorf("failed loading %s cgroup: %w", cgroupPath, err)
+ }
+
+ if err := control.AddProc(uint64(pid)); err != nil {
+ // Command could finish so quickly before we can add it to a cgroup, so
+ // we don't consider it an error.
+ if strings.Contains(err.Error(), "no such process") {
+ return nil
+ }
+ return fmt.Errorf("failed adding process to cgroup: %w", err)
+ }
+
+ return nil
+}
+
+func (cvh *cgroupV2Handler) collect(ch chan<- prometheus.Metric) {
+ if !cvh.cfg.MetricsEnabled {
+ return
+ }
+
+ for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
+ repoPath := cvh.repoPath(i)
+ logger := log.Default().WithField("cgroup_path", repoPath)
+ control, err := cgroup2.Load("/"+repoPath, cgroup2.WithMountpoint(cvh.cfg.Mountpoint))
+ if err != nil {
+ logger.WithError(err).Warn("unable to load cgroup controller")
+ return
+ }
+
+ if metrics, err := control.Stat(); err != nil {
+ logger.WithError(err).Warn("unable to get cgroup stats")
+ } else {
+ cpuUserMetric := cvh.cpuUsage.WithLabelValues(repoPath, "user")
+ cpuUserMetric.Set(float64(metrics.CPU.UserUsec))
+ ch <- cpuUserMetric
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSPeriods,
+ prometheus.CounterValue,
+ float64(metrics.CPU.NrPeriods),
+ repoPath,
+ )
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSThrottledPeriods,
+ prometheus.CounterValue,
+ float64(metrics.CPU.NrThrottled),
+ repoPath,
+ )
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSThrottledTime,
+ prometheus.CounterValue,
+ float64(metrics.CPU.ThrottledUsec)/float64(time.Second),
+ repoPath,
+ )
+
+ cpuKernelMetric := cvh.cpuUsage.WithLabelValues(repoPath, "kernel")
+ cpuKernelMetric.Set(float64(metrics.CPU.SystemUsec))
+ ch <- cpuKernelMetric
+ }
+
+ if subsystems, err := control.Controllers(); err != nil {
+ logger.WithError(err).Warn("unable to get cgroup hierarchy")
+ } else {
+ processes, err := control.Procs(true)
+ if err != nil {
+ logger.WithError(err).
+ Warn("unable to get process list")
+ continue
+ }
+
+ for _, subsystem := range subsystems {
+ procsMetric := cvh.procs.WithLabelValues(repoPath, subsystem)
+ procsMetric.Set(float64(len(processes)))
+ ch <- procsMetric
+ }
+ }
+ }
+}
+
+func (cvh *cgroupV2Handler) cleanup() error {
+ processCgroupPath := cvh.currentProcessCgroup()
+
+ control, err := cgroup2.Load("/"+processCgroupPath, cgroup2.WithMountpoint(cvh.cfg.Mountpoint))
+ if err != nil {
+ return fmt.Errorf("failed loading cgroup %s: %w", processCgroupPath, err)
+ }
+
+ if err := control.Delete(); err != nil {
+ return fmt.Errorf("failed cleaning up cgroup %s: %w", processCgroupPath, err)
+ }
+
+ return nil
+}
+
+func (cvh *cgroupV2Handler) repoPath(groupID int) string {
+ return filepath.Join(cvh.currentProcessCgroup(), fmt.Sprintf("repos-%d", groupID))
+}
+
+func (cvh *cgroupV2Handler) currentProcessCgroup() string {
+ return config.GetGitalyProcessTempDir(cvh.cfg.HierarchyRoot, cvh.pid)
+}
+
+func pruneOldCgroupsV2(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
+ if err := config.PruneOldGitalyProcessDirectories(
+ logger,
+ filepath.Join(cfg.Mountpoint, cfg.HierarchyRoot),
+ ); err != nil {
+ var pathError *fs.PathError
+ if !errors.As(err, &pathError) {
+ logger.WithError(err).Error("failed to clean up cpu cgroups")
+ }
+ }
+}
diff --git a/internal/cgroups/v2_linux_test.go b/internal/cgroups/v2_linux_test.go
new file mode 100644
index 000000000..834a148cd
--- /dev/null
+++ b/internal/cgroups/v2_linux_test.go
@@ -0,0 +1,546 @@
+//go:build linux
+
+package cgroups
+
+import (
+ "fmt"
+ "hash/crc32"
+ "io/fs"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "testing"
+
+ cgrps "github.com/containerd/cgroups/v3"
+ "github.com/prometheus/client_golang/prometheus/testutil"
+ "github.com/sirupsen/logrus/hooks/test"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
+)
+
+func defaultCgroupsV2Config() cgroups.Config {
+ return cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 3,
+ MemoryBytes: 1024000,
+ CPUShares: 256,
+ CPUQuotaUs: 2000,
+ },
+ }
+}
+
+func TestNewManagerV2(t *testing.T) {
+ cfg := cgroups.Config{Repositories: cgroups.Repositories{Count: 10}}
+
+ manager := newCgroupManagerWithMode(cfg, 1, cgrps.Unified)
+ require.IsType(t, &cgroupV2Handler{}, manager.handler)
+}
+
+func TestSetup_ParentCgroupsV2(t *testing.T) {
+ tests := []struct {
+ name string
+ cfg cgroups.Config
+ wantMemoryBytes int
+ wantCPUWeight int
+ wantCPUMax string
+ }{
+ {
+ name: "all config specified",
+ cfg: cgroups.Config{
+ MemoryBytes: 102400,
+ CPUShares: 256,
+ CPUQuotaUs: 2000,
+ },
+ wantMemoryBytes: 102400,
+ wantCPUWeight: 256,
+ wantCPUMax: "2000 100000",
+ },
+ {
+ name: "only memory limit set",
+ cfg: cgroups.Config{
+ MemoryBytes: 102400,
+ },
+ wantMemoryBytes: 102400,
+ },
+ {
+ name: "only cpu shares set",
+ cfg: cgroups.Config{
+ CPUShares: 512,
+ },
+ wantCPUWeight: 512,
+ },
+ {
+ name: "only cpu quota set",
+ cfg: cgroups.Config{
+ CPUQuotaUs: 2000,
+ },
+ wantCPUMax: "2000 100000",
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ mock := newMockV2(t)
+
+ pid := 1
+ tt.cfg.HierarchyRoot = "gitaly"
+ tt.cfg.Mountpoint = mock.root
+
+ v2Manager := mock.newCgroupManager(tt.cfg, pid)
+ mock.setupMockCgroupFiles(t, v2Manager)
+
+ require.NoError(t, v2Manager.Setup())
+
+ memoryMaxPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), "memory.max",
+ )
+ requireCgroupWithInt(t, memoryMaxPath, tt.wantMemoryBytes)
+
+ cpuWeightPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), "cpu.weight",
+ )
+ requireCgroupWithInt(t, cpuWeightPath, calculateWantCPUWeight(tt.wantCPUWeight))
+
+ cpuMaxPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), "cpu.max",
+ )
+ requireCgroupWithString(t, cpuMaxPath, tt.wantCPUMax)
+ })
+ }
+}
+
+func TestSetup_RepoCgroupsV2(t *testing.T) {
+ tests := []struct {
+ name string
+ cfg cgroups.Repositories
+ wantMemoryBytes int
+ wantCPUWeight int
+ wantCPUMax string
+ }{
+ {
+ name: "all config specified",
+ cfg: defaultCgroupsV2Config().Repositories,
+ wantMemoryBytes: 1024000,
+ wantCPUWeight: 256,
+ wantCPUMax: "2000 100000",
+ },
+ {
+ name: "only memory limit set",
+ cfg: cgroups.Repositories{
+ Count: 3,
+ MemoryBytes: 1024000,
+ },
+ wantMemoryBytes: 1024000,
+ },
+ {
+ name: "only cpu shares set",
+ cfg: cgroups.Repositories{
+ Count: 3,
+ CPUShares: 512,
+ },
+ wantCPUWeight: 512,
+ },
+ {
+ name: "only cpu quota set",
+ cfg: cgroups.Repositories{
+ Count: 3,
+ CPUQuotaUs: 1000,
+ },
+ wantCPUMax: "1000 100000",
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ mock := newMockV2(t)
+
+ pid := 1
+
+ cfg := defaultCgroupsV2Config()
+ cfg.Mountpoint = mock.root
+ cfg.Repositories = tt.cfg
+
+ v2Manager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, v2Manager)
+ require.NoError(t, v2Manager.Setup())
+
+ for i := 0; i < 3; i++ {
+ memoryMaxPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", i), "memory.max",
+ )
+ requireCgroupWithInt(t, memoryMaxPath, tt.wantMemoryBytes)
+
+ cpuWeightPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", i), "cpu.weight",
+ )
+ requireCgroupWithInt(t, cpuWeightPath, calculateWantCPUWeight(tt.wantCPUWeight))
+
+ cpuMaxPath := filepath.Join(
+ mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", i), "cpu.max",
+ )
+ requireCgroupWithString(t, cpuMaxPath, tt.wantCPUMax)
+ }
+ })
+ }
+}
+
+func TestAddCommandV2(t *testing.T) {
+ mock := newMockV2(t)
+
+ config := defaultCgroupsV2Config()
+ config.Repositories.Count = 10
+ config.Repositories.MemoryBytes = 1024
+ config.Repositories.CPUShares = 16
+ config.Mountpoint = mock.root
+
+ pid := 1
+
+ v2Manager1 := mock.newCgroupManager(config, pid)
+ mock.setupMockCgroupFiles(t, v2Manager1)
+
+ require.NoError(t, v2Manager1.Setup())
+ ctx := testhelper.Context(t)
+
+ cmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, cmd2.Run())
+
+ v2Manager2 := mock.newCgroupManager(config, pid)
+
+ t.Run("without overridden key", func(t *testing.T) {
+ _, err := v2Manager2.AddCommand(cmd2)
+ require.NoError(t, err)
+
+ checksum := crc32.ChecksumIEEE([]byte(strings.Join(cmd2.Args, "/")))
+ groupID := uint(checksum) % config.Repositories.Count
+
+ path := filepath.Join(mock.root, "gitaly",
+ fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", groupID), "cgroup.procs")
+ content := readCgroupFile(t, path)
+
+ cmdPid, err := strconv.Atoi(string(content))
+ require.NoError(t, err)
+
+ require.Equal(t, cmd2.Process.Pid, cmdPid)
+ })
+
+ t.Run("with overridden key", func(t *testing.T) {
+ _, err := v2Manager2.AddCommand(cmd2, WithCgroupKey("foobar"))
+ require.NoError(t, err)
+
+ checksum := crc32.ChecksumIEEE([]byte("foobar"))
+ groupID := uint(checksum) % config.Repositories.Count
+
+ path := filepath.Join(mock.root, "gitaly",
+ fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", groupID), "cgroup.procs")
+ content := readCgroupFile(t, path)
+
+ cmdPid, err := strconv.Atoi(string(content))
+ require.NoError(t, err)
+
+ require.Equal(t, cmd2.Process.Pid, cmdPid)
+ })
+}
+
+func TestCleanupV2(t *testing.T) {
+ mock := newMockV2(t)
+
+ pid := 1
+ cfg := defaultCgroupsV2Config()
+ cfg.Mountpoint = mock.root
+
+ v2Manager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, v2Manager)
+
+ require.NoError(t, v2Manager.Setup())
+ require.NoError(t, v2Manager.Cleanup())
+
+ for i := 0; i < 3; i++ {
+ require.NoDirExists(t, filepath.Join(mock.root, "gitaly", fmt.Sprintf("gitaly-%d", pid), fmt.Sprintf("repos-%d", i)))
+ }
+}
+
+func TestMetricsV2(t *testing.T) {
+ tests := []struct {
+ name string
+ metricsEnabled bool
+ pid int
+ expect string
+ }{
+ {
+ name: "metrics enabled: true",
+ metricsEnabled: true,
+ pid: 1,
+ expect: `# HELP gitaly_cgroup_cpu_cfs_periods_total Number of elapsed enforcement period intervals
+# TYPE gitaly_cgroup_cpu_cfs_periods_total counter
+gitaly_cgroup_cpu_cfs_periods_total{path="%s"} 10
+# HELP gitaly_cgroup_cpu_cfs_throttled_periods_total Number of throttled period intervals
+# TYPE gitaly_cgroup_cpu_cfs_throttled_periods_total counter
+gitaly_cgroup_cpu_cfs_throttled_periods_total{path="%s"} 20
+# HELP gitaly_cgroup_cpu_cfs_throttled_seconds_total Total time duration the Cgroup has been throttled
+# TYPE gitaly_cgroup_cpu_cfs_throttled_seconds_total counter
+gitaly_cgroup_cpu_cfs_throttled_seconds_total{path="%s"} 0.001
+# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup
+# TYPE gitaly_cgroup_cpu_usage_total gauge
+gitaly_cgroup_cpu_usage_total{path="%s",type="kernel"} 0
+gitaly_cgroup_cpu_usage_total{path="%s",type="user"} 0
+# HELP gitaly_cgroup_procs_total Total number of procs
+# TYPE gitaly_cgroup_procs_total gauge
+gitaly_cgroup_procs_total{path="%s",subsystem="cpu"} 1
+gitaly_cgroup_procs_total{path="%s",subsystem="cpuset"} 1
+gitaly_cgroup_procs_total{path="%s",subsystem="memory"} 1
+`,
+ },
+ {
+ name: "metrics enabled: false",
+ metricsEnabled: false,
+ pid: 2,
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+ mock := newMockV2(t)
+
+ config := defaultCgroupsV2Config()
+ config.Repositories.Count = 1
+ config.Repositories.MemoryBytes = 1048576
+ config.Repositories.CPUShares = 16
+ config.Mountpoint = mock.root
+ config.MetricsEnabled = tt.metricsEnabled
+
+ v2Manager1 := mock.newCgroupManager(config, tt.pid)
+
+ mock.setupMockCgroupFiles(t, v2Manager1)
+ require.NoError(t, v2Manager1.Setup())
+
+ ctx := testhelper.Context(t)
+
+ cmd := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, cmd.Start())
+ _, err := v2Manager1.AddCommand(cmd)
+ require.NoError(t, err)
+
+ gitCmd1 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, gitCmd1.Start())
+ _, err = v2Manager1.AddCommand(gitCmd1)
+ require.NoError(t, err)
+
+ gitCmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, gitCmd2.Start())
+ _, err = v2Manager1.AddCommand(gitCmd2)
+ require.NoError(t, err)
+ defer func() {
+ require.NoError(t, gitCmd2.Wait())
+ }()
+
+ require.NoError(t, cmd.Wait())
+ require.NoError(t, gitCmd1.Wait())
+
+ repoCgroupPath := filepath.Join(v2Manager1.currentProcessCgroup(), "repos-0")
+
+ expected := strings.NewReader(strings.ReplaceAll(tt.expect, "%s", repoCgroupPath))
+
+ assert.NoError(t, testutil.CollectAndCompare(v2Manager1, expected))
+ })
+ }
+}
+
+func TestPruneOldCgroupsV2(t *testing.T) {
+ t.Parallel()
+
+ testCases := []struct {
+ desc string
+ cfg cgroups.Config
+ expectedPruned bool
+ // setup returns a pid
+ setup func(*testing.T, cgroups.Config, *mockCgroupV2) int
+ }{
+ {
+ desc: "process belongs to another user",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ pid := 1
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+
+ return pid
+ },
+ expectedPruned: true,
+ },
+ {
+ desc: "no hierarchy root",
+ cfg: cgroups.Config{
+ HierarchyRoot: "",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ pid := 1
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+ return 1
+ },
+ expectedPruned: false,
+ },
+ {
+ desc: "pid of finished process",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ cmd := exec.Command("ls")
+ require.NoError(t, cmd.Run())
+ pid := cmd.Process.Pid
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+
+ memoryFile := filepath.Join(
+ cfg.Mountpoint,
+ cfg.HierarchyRoot,
+ "memory.limit_in_bytes",
+ )
+ require.NoError(t, os.WriteFile(memoryFile, []byte{}, fs.ModeAppend))
+
+ return pid
+ },
+ expectedPruned: true,
+ },
+ {
+ desc: "pid of running process",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ pid := os.Getpid()
+
+ cgroupManager := mock.newCgroupManager(cfg, pid)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+
+ return pid
+ },
+ expectedPruned: false,
+ },
+ {
+ desc: "gitaly-0 directory is deleted",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroupV2) int {
+ cgroupManager := mock.newCgroupManager(cfg, 0)
+ mock.setupMockCgroupFiles(t, cgroupManager)
+ require.NoError(t, cgroupManager.Setup())
+
+ return 0
+ },
+ expectedPruned: true,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ mock := newMockV2(t)
+ tc.cfg.Mountpoint = mock.root
+
+ root := filepath.Join(
+ tc.cfg.Mountpoint,
+ tc.cfg.HierarchyRoot,
+ )
+ require.NoError(t, os.MkdirAll(root, perm.PublicDir))
+
+ pid := tc.setup(t, tc.cfg, mock)
+
+ logger, _ := test.NewNullLogger()
+ mock.pruneOldCgroups(tc.cfg, logger)
+
+ // create cgroups directories with a different pid
+ oldGitalyProcessDir := filepath.Join(
+ root,
+ fmt.Sprintf("gitaly-%d", pid),
+ )
+
+ if tc.expectedPruned {
+ require.NoDirExists(t, oldGitalyProcessDir)
+ } else {
+ require.DirExists(t, oldGitalyProcessDir)
+ }
+ })
+ }
+}
+
+func calculateWantCPUWeight(wantCPUWeight int) int {
+ if wantCPUWeight == 0 {
+ return 0
+ }
+ return 1 + ((wantCPUWeight-2)*9999)/262142
+}
+
+func requireCgroupWithString(t *testing.T, cgroupFile string, want string) {
+ t.Helper()
+
+ if want == "" {
+ return
+ }
+ require.Equal(t,
+ string(readCgroupFile(t, cgroupFile)),
+ want,
+ )
+}
+
+func requireCgroupWithInt(t *testing.T, cgroupFile string, want int) {
+ t.Helper()
+
+ if want <= 0 {
+ return
+ }
+
+ require.Equal(t,
+ string(readCgroupFile(t, cgroupFile)),
+ strconv.Itoa(want),
+ )
+}