Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZheNing Hu <adlternative@gmail.com>2023-06-15 12:52:32 +0300
committerZheNing Hu <adlternative@gmail.com>2023-07-09 11:48:48 +0300
commitd4299d66b722acbec358944ee9f70d2c13d8f67a (patch)
treee8560db15531df6646f21e258c51a2ac67e2041a
parent297a7d99ca7a6b59486c7d1f2adf4faa3e2103e3 (diff)
cgroup: refactor cgroup interfaces
We're preparing to add V2 implementation for cgroup. Since cgroup V1 and V2 have similar implementation logic, we've extracted the common logic into the cgroupHandler interface, and implemented CGroupManager that satisfies the original Manager interface, which internally uses the cgroupHandler interface. Currently, only V1 implementation is available. To be compatible with non-Linux environments, we've added a Noop CgroupManger implementation in manager.go by specifying a build tag for non-Linux operating systems. We've also ensured that the existing multiple test files can only run on Linux by specifying the linux operating system in the build tags. Signed-off-by: ZheNing Hu <adlternative@gmail.com>
-rw-r--r--internal/cgroups/cgroups.go20
-rw-r--r--internal/cgroups/cgroups_linux_test.go178
-rw-r--r--internal/cgroups/manager.go11
-rw-r--r--internal/cgroups/manager_linux.go141
-rw-r--r--internal/cgroups/mock_linux_test.go18
-rw-r--r--internal/cgroups/v1.go12
-rw-r--r--internal/cgroups/v1_linux.go172
-rw-r--r--internal/cgroups/v1_linux_test.go329
8 files changed, 476 insertions, 405 deletions
diff --git a/internal/cgroups/cgroups.go b/internal/cgroups/cgroups.go
index 5d44ba70d..20f78b6f0 100644
--- a/internal/cgroups/cgroups.go
+++ b/internal/cgroups/cgroups.go
@@ -2,11 +2,9 @@ package cgroups
import (
"os/exec"
- "path/filepath"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
- "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
)
@@ -44,7 +42,7 @@ type Manager interface {
// NewManager returns the appropriate Cgroups manager
func NewManager(cfg cgroups.Config, pid int) Manager {
if cfg.Repositories.Count > 0 {
- return newV1Manager(cfg, pid)
+ return newCgroupManager(cfg, pid)
}
return &NoopManager{}
@@ -56,19 +54,5 @@ func PruneOldCgroups(cfg cgroups.Config, logger log.FieldLogger) {
return
}
- if err := config.PruneOldGitalyProcessDirectories(
- logger,
- filepath.Join(cfg.Mountpoint, "memory",
- cfg.HierarchyRoot),
- ); err != nil {
- logger.WithError(err).Error("failed to clean up memory cgroups")
- }
-
- if err := config.PruneOldGitalyProcessDirectories(
- logger,
- filepath.Join(cfg.Mountpoint, "cpu",
- cfg.HierarchyRoot),
- ); err != nil {
- logger.WithError(err).Error("failed to clean up cpu cgroups")
- }
+ pruneOldCgroupsV1(cfg, logger)
}
diff --git a/internal/cgroups/cgroups_linux_test.go b/internal/cgroups/cgroups_linux_test.go
index e52eecb5e..8ed551d2d 100644
--- a/internal/cgroups/cgroups_linux_test.go
+++ b/internal/cgroups/cgroups_linux_test.go
@@ -1,17 +1,12 @@
+//go:build linux
+
package cgroups
import (
- "fmt"
- "io/fs"
- "os"
- "os/exec"
- "path/filepath"
"testing"
- "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
- "gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
"gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
)
@@ -20,174 +15,5 @@ func TestMain(m *testing.M) {
}
func TestNewManager(t *testing.T) {
- cfg := cgroups.Config{Repositories: cgroups.Repositories{Count: 10}}
-
- require.IsType(t, &CGroupV1Manager{}, &CGroupV1Manager{cfg: cfg})
require.IsType(t, &NoopManager{}, NewManager(cgroups.Config{}, 1))
}
-
-func TestPruneOldCgroups(t *testing.T) {
- t.Parallel()
-
- testCases := []struct {
- desc string
- cfg cgroups.Config
- expectedPruned bool
- // setup returns a pid
- setup func(*testing.T, cgroups.Config) int
- }{
- {
- desc: "process belongs to another user",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "gitaly",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- pid := 1
- cgroupManager := NewManager(cfg, pid)
- require.NoError(t, cgroupManager.Setup())
-
- return pid
- },
- expectedPruned: true,
- },
- {
- desc: "no hierarchy root",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- pid := 1
- cgroupManager := NewManager(cfg, pid)
- require.NoError(t, cgroupManager.Setup())
-
- return 1
- },
- expectedPruned: false,
- },
- {
- desc: "pid of finished process",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "gitaly",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- cmd := exec.Command("ls")
- require.NoError(t, cmd.Run())
- pid := cmd.Process.Pid
-
- cgroupManager := NewManager(cfg, pid)
- require.NoError(t, cgroupManager.Setup())
-
- memoryRoot := filepath.Join(
- cfg.Mountpoint,
- "memory",
- cfg.HierarchyRoot,
- "memory.limit_in_bytes",
- )
- require.NoError(t, os.WriteFile(memoryRoot, []byte{}, fs.ModeAppend))
-
- return pid
- },
- expectedPruned: true,
- },
- {
- desc: "pid of running process",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "gitaly",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- pid := os.Getpid()
-
- cgroupManager := NewManager(cfg, pid)
- require.NoError(t, cgroupManager.Setup())
-
- return pid
- },
- expectedPruned: false,
- },
- {
- desc: "gitaly-0 directory is deleted",
- cfg: cgroups.Config{
- Mountpoint: testhelper.TempDir(t),
- HierarchyRoot: "gitaly",
- Repositories: cgroups.Repositories{
- Count: 10,
- MemoryBytes: 10 * 1024 * 1024,
- CPUShares: 1024,
- },
- },
- setup: func(t *testing.T, cfg cgroups.Config) int {
- cgroupManager := NewManager(cfg, 0)
- require.NoError(t, cgroupManager.Setup())
-
- return 0
- },
- expectedPruned: true,
- },
- }
-
- for _, tc := range testCases {
- t.Run(tc.desc, func(t *testing.T) {
- memoryRoot := filepath.Join(
- tc.cfg.Mountpoint,
- "memory",
- tc.cfg.HierarchyRoot,
- )
- cpuRoot := filepath.Join(
- tc.cfg.Mountpoint,
- "cpu",
- tc.cfg.HierarchyRoot,
- )
-
- require.NoError(t, os.MkdirAll(cpuRoot, perm.PublicDir))
- require.NoError(t, os.MkdirAll(memoryRoot, perm.PublicDir))
-
- pid := tc.setup(t, tc.cfg)
-
- logger, hook := test.NewNullLogger()
- PruneOldCgroups(tc.cfg, logger)
-
- // create cgroups directories with a different pid
- oldGitalyProcessMemoryDir := filepath.Join(
- memoryRoot,
- fmt.Sprintf("gitaly-%d", pid),
- )
- oldGitalyProcesssCPUDir := filepath.Join(
- cpuRoot,
- fmt.Sprintf("gitaly-%d", pid),
- )
-
- if tc.expectedPruned {
- require.NoDirExists(t, oldGitalyProcessMemoryDir)
- require.NoDirExists(t, oldGitalyProcesssCPUDir)
- } else {
- require.DirExists(t, oldGitalyProcessMemoryDir)
- require.DirExists(t, oldGitalyProcesssCPUDir)
- require.Len(t, hook.Entries, 0)
- }
- })
- }
-}
diff --git a/internal/cgroups/manager.go b/internal/cgroups/manager.go
new file mode 100644
index 000000000..10592f62a
--- /dev/null
+++ b/internal/cgroups/manager.go
@@ -0,0 +1,11 @@
+//go:build !linux
+
+package cgroups
+
+import (
+ cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+)
+
+func newCgroupManager(cfg cgroupscfg.Config, pid int) Manager {
+ return &NoopManager{}
+}
diff --git a/internal/cgroups/manager_linux.go b/internal/cgroups/manager_linux.go
new file mode 100644
index 000000000..d85d90340
--- /dev/null
+++ b/internal/cgroups/manager_linux.go
@@ -0,0 +1,141 @@
+//go:build linux
+
+package cgroups
+
+import (
+ "fmt"
+ "hash/crc32"
+ "os/exec"
+ "strings"
+
+ "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/prometheus/client_golang/prometheus"
+ cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
+)
+
+// cfs_period_us hardcoded to be 100ms.
+const cfsPeriodUs uint64 = 100000
+
+type cgroupHandler interface {
+ setupParent(reposResources *specs.LinuxResources) error
+ setupRepository(reposResources *specs.LinuxResources) error
+ addToCgroup(pid int, cgroupPath string) error
+ collect(ch chan<- prometheus.Metric)
+ cleanup() error
+ currentProcessCgroup() string
+ repoPath(groupID int) string
+}
+
+// CGroupManager is a manager class that implements specific methods related to cgroups
+type CGroupManager struct {
+ cfg cgroupscfg.Config
+ pid int
+
+ handler cgroupHandler
+}
+
+func newCgroupManager(cfg cgroupscfg.Config, pid int) *CGroupManager {
+ return &CGroupManager{
+ cfg: cfg,
+ pid: pid,
+ handler: newV1Handler(cfg, pid),
+ }
+}
+
+// Setup parent cgroups and repository sub cgroups
+func (cgm *CGroupManager) Setup() error {
+ if err := cgm.handler.setupParent(cgm.configParentResources()); err != nil {
+ return err
+ }
+ if err := cgm.handler.setupRepository(cgm.configRepositoryResources()); err != nil {
+ return err
+ }
+ return nil
+}
+
+// AddCommand adds a Cmd to a cgroup
+func (cgm *CGroupManager) AddCommand(cmd *exec.Cmd, opts ...AddCommandOption) (string, error) {
+ var cfg addCommandCfg
+ for _, opt := range opts {
+ opt(&cfg)
+ }
+
+ key := cfg.cgroupKey
+ if key == "" {
+ key = strings.Join(cmd.Args, "/")
+ }
+
+ checksum := crc32.ChecksumIEEE(
+ []byte(key),
+ )
+
+ if cmd.Process == nil {
+ return "", fmt.Errorf("cannot add command that has not yet been started")
+ }
+
+ groupID := uint(checksum) % cgm.cfg.Repositories.Count
+ cgroupPath := cgm.handler.repoPath(int(groupID))
+
+ return cgroupPath, cgm.handler.addToCgroup(cmd.Process.Pid, cgroupPath)
+}
+
+// Cleanup cleans up cgroups created in Setup.
+func (cgm *CGroupManager) Cleanup() error {
+ return cgm.handler.cleanup()
+}
+
+// Describe is used to generate description information for each CGroupManager prometheus metric
+func (cgm *CGroupManager) Describe(ch chan<- *prometheus.Desc) {
+ prometheus.DescribeByCollect(cgm, ch)
+}
+
+// Collect is used to collect the current values of all CGroupManager prometheus metrics
+func (cgm *CGroupManager) Collect(ch chan<- prometheus.Metric) {
+ cgm.handler.collect(ch)
+}
+
+func (cgm *CGroupManager) currentProcessCgroup() string {
+ return cgm.handler.currentProcessCgroup()
+}
+
+func (cgm *CGroupManager) configParentResources() *specs.LinuxResources {
+ cfsPeriodUs := cfsPeriodUs
+ var parentResources specs.LinuxResources
+ // Leave them `nil` so it takes kernel default unless cfg value above `0`.
+ parentResources.CPU = &specs.LinuxCPU{}
+
+ if cgm.cfg.CPUShares > 0 {
+ parentResources.CPU.Shares = &cgm.cfg.CPUShares
+ }
+
+ if cgm.cfg.CPUQuotaUs > 0 {
+ parentResources.CPU.Quota = &cgm.cfg.CPUQuotaUs
+ parentResources.CPU.Period = &cfsPeriodUs
+ }
+
+ if cgm.cfg.MemoryBytes > 0 {
+ parentResources.Memory = &specs.LinuxMemory{Limit: &cgm.cfg.MemoryBytes}
+ }
+ return &parentResources
+}
+
+func (cgm *CGroupManager) configRepositoryResources() *specs.LinuxResources {
+ cfsPeriodUs := cfsPeriodUs
+ var reposResources specs.LinuxResources
+ // Leave them `nil` so it takes kernel default unless cfg value above `0`.
+ reposResources.CPU = &specs.LinuxCPU{}
+
+ if cgm.cfg.Repositories.CPUShares > 0 {
+ reposResources.CPU.Shares = &cgm.cfg.Repositories.CPUShares
+ }
+
+ if cgm.cfg.Repositories.CPUQuotaUs > 0 {
+ reposResources.CPU.Quota = &cgm.cfg.Repositories.CPUQuotaUs
+ reposResources.CPU.Period = &cfsPeriodUs
+ }
+
+ if cgm.cfg.Repositories.MemoryBytes > 0 {
+ reposResources.Memory = &specs.LinuxMemory{Limit: &cgm.cfg.Repositories.MemoryBytes}
+ }
+ return &reposResources
+}
diff --git a/internal/cgroups/mock_linux_test.go b/internal/cgroups/mock_linux_test.go
index 2cf735149..ca94b450d 100644
--- a/internal/cgroups/mock_linux_test.go
+++ b/internal/cgroups/mock_linux_test.go
@@ -1,3 +1,5 @@
+//go:build linux
+
/*
Adapted from https://github.com/containerd/cgroups/blob/f1d9380fd3c028194db9582825512fdf3f39ab2a/mock_test.go
@@ -26,7 +28,9 @@ import (
"testing"
"github.com/containerd/cgroups/v3/cgroup1"
+ "github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
+ cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
"gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
"gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
)
@@ -54,13 +58,9 @@ func newMock(t *testing.T) *mockCgroup {
}
}
-func (m *mockCgroup) hierarchy() ([]cgroup1.Subsystem, error) {
- return m.subsystems, nil
-}
-
func (m *mockCgroup) setupMockCgroupFiles(
t *testing.T,
- manager *CGroupV1Manager,
+ manager *CGroupManager,
memFailCount int,
) {
for _, s := range m.subsystems {
@@ -117,3 +117,11 @@ throttled_time 1000000`
}
}
}
+
+func (m *mockCgroup) newCgroupManager(cfg cgroupscfg.Config, pid int) *CGroupManager {
+ return newCgroupManager(cfg, pid)
+}
+
+func (m *mockCgroup) pruneOldCgroups(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
+ PruneOldCgroups(cfg, logger)
+}
diff --git a/internal/cgroups/v1.go b/internal/cgroups/v1.go
deleted file mode 100644
index 8935bcdc5..000000000
--- a/internal/cgroups/v1.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build !linux
-
-package cgroups
-
-import (
- "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
-)
-
-// For systems other than Linux, we return a noop manager if cgroups was enabled.
-func newV1Manager(cfg cgroups.Config, pid int) *NoopManager {
- return &NoopManager{}
-}
diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go
index 09bf23619..e9b81103a 100644
--- a/internal/cgroups/v1_linux.go
+++ b/internal/cgroups/v1_linux.go
@@ -1,9 +1,9 @@
+//go:build linux
+
package cgroups
import (
"fmt"
- "hash/crc32"
- "os/exec"
"path/filepath"
"strings"
"time"
@@ -11,16 +11,13 @@ import (
"github.com/containerd/cgroups/v3/cgroup1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/client_golang/prometheus"
+ "github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
cgroupscfg "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
"gitlab.com/gitlab-org/gitaly/v16/internal/log"
)
-// cfs_period_us hardcoded to be 100ms.
-const cfsPeriodUs uint64 = 100000
-
-// CGroupV1Manager is the manager for cgroups v1
-type CGroupV1Manager struct {
+type cgroupV1Handler struct {
cfg cgroupscfg.Config
hierarchy func() ([]cgroup1.Subsystem, error)
memoryReclaimAttemptsTotal *prometheus.GaugeVec
@@ -32,8 +29,8 @@ type CGroupV1Manager struct {
pid int
}
-func newV1Manager(cfg cgroupscfg.Config, pid int) *CGroupV1Manager {
- return &CGroupV1Manager{
+func newV1Handler(cfg cgroupscfg.Config, pid int) *cgroupV1Handler {
+ return &cgroupV1Handler{
cfg: cfg,
pid: pid,
hierarchy: func() ([]cgroup1.Subsystem, error) {
@@ -78,100 +75,34 @@ func newV1Manager(cfg cgroupscfg.Config, pid int) *CGroupV1Manager {
}
}
-//nolint:revive // This is unintentionally missing documentation.
-func (cg *CGroupV1Manager) Setup() error {
- cfsPeriodUs := cfsPeriodUs
-
- var parentResources specs.LinuxResources
- // Leave them `nil` so it takes kernel default unless cfg value above `0`.
- parentResources.CPU = &specs.LinuxCPU{}
-
- if cg.cfg.CPUShares > 0 {
- parentResources.CPU.Shares = &cg.cfg.CPUShares
- }
-
- if cg.cfg.CPUQuotaUs > 0 {
- parentResources.CPU.Quota = &cg.cfg.CPUQuotaUs
- parentResources.CPU.Period = &cfsPeriodUs
- }
-
- if cg.cfg.MemoryBytes > 0 {
- parentResources.Memory = &specs.LinuxMemory{Limit: &cg.cfg.MemoryBytes}
- }
-
+func (cvh *cgroupV1Handler) setupParent(parentResources *specs.LinuxResources) error {
if _, err := cgroup1.New(
- cgroup1.StaticPath(cg.currentProcessCgroup()),
- &parentResources,
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.StaticPath(cvh.currentProcessCgroup()),
+ parentResources,
+ cgroup1.WithHiearchy(cvh.hierarchy),
); err != nil {
return fmt.Errorf("failed creating parent cgroup: %w", err)
}
+ return nil
+}
- var reposResources specs.LinuxResources
- // Leave them `nil` so it takes kernel default unless cfg value above `0`.
- reposResources.CPU = &specs.LinuxCPU{}
-
- if cg.cfg.Repositories.CPUShares > 0 {
- reposResources.CPU.Shares = &cg.cfg.Repositories.CPUShares
- }
-
- if cg.cfg.Repositories.CPUQuotaUs > 0 {
- reposResources.CPU.Quota = &cg.cfg.Repositories.CPUQuotaUs
- reposResources.CPU.Period = &cfsPeriodUs
- }
-
- if cg.cfg.Repositories.MemoryBytes > 0 {
- reposResources.Memory = &specs.LinuxMemory{Limit: &cg.cfg.Repositories.MemoryBytes}
- }
-
- for i := 0; i < int(cg.cfg.Repositories.Count); i++ {
+func (cvh *cgroupV1Handler) setupRepository(reposResources *specs.LinuxResources) error {
+ for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
if _, err := cgroup1.New(
- cgroup1.StaticPath(cg.repoPath(i)),
- &reposResources,
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.StaticPath(cvh.repoPath(i)),
+ reposResources,
+ cgroup1.WithHiearchy(cvh.hierarchy),
); err != nil {
return fmt.Errorf("failed creating repository cgroup: %w", err)
}
}
-
return nil
}
-// AddCommand adds the given command to one of the CGroup's buckets. The bucket used for the command
-// is determined by hashing the repository storage and path. No error is returned if the command has already
-// exited.
-func (cg *CGroupV1Manager) AddCommand(
- cmd *exec.Cmd,
- opts ...AddCommandOption,
-) (string, error) {
- var cfg addCommandCfg
- for _, opt := range opts {
- opt(&cfg)
- }
-
- key := cfg.cgroupKey
- if key == "" {
- key = strings.Join(cmd.Args, "/")
- }
-
- checksum := crc32.ChecksumIEEE(
- []byte(key),
- )
-
- if cmd.Process == nil {
- return "", fmt.Errorf("cannot add command that has not yet been started")
- }
-
- groupID := uint(checksum) % cg.cfg.Repositories.Count
- cgroupPath := cg.repoPath(int(groupID))
-
- return cgroupPath, cg.addToCgroup(cmd.Process.Pid, cgroupPath)
-}
-
-func (cg *CGroupV1Manager) addToCgroup(pid int, cgroupPath string) error {
+func (cvh *cgroupV1Handler) addToCgroup(pid int, cgroupPath string) error {
control, err := cgroup1.Load(
cgroup1.StaticPath(cgroupPath),
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.WithHiearchy(cvh.hierarchy),
)
if err != nil {
return fmt.Errorf("failed loading %s cgroup: %w", cgroupPath, err)
@@ -189,18 +120,17 @@ func (cg *CGroupV1Manager) addToCgroup(pid int, cgroupPath string) error {
return nil
}
-// Collect collects metrics from the cgroups controller
-func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
- if !cg.cfg.MetricsEnabled {
+func (cvh *cgroupV1Handler) collect(ch chan<- prometheus.Metric) {
+ if !cvh.cfg.MetricsEnabled {
return
}
- for i := 0; i < int(cg.cfg.Repositories.Count); i++ {
- repoPath := cg.repoPath(i)
+ for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
+ repoPath := cvh.repoPath(i)
logger := log.Default().WithField("cgroup_path", repoPath)
control, err := cgroup1.Load(
cgroup1.StaticPath(repoPath),
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.WithHiearchy(cvh.hierarchy),
)
if err != nil {
logger.WithError(err).Warn("unable to load cgroup controller")
@@ -210,41 +140,41 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
if metrics, err := control.Stat(); err != nil {
logger.WithError(err).Warn("unable to get cgroup stats")
} else {
- memoryMetric := cg.memoryReclaimAttemptsTotal.WithLabelValues(repoPath)
+ memoryMetric := cvh.memoryReclaimAttemptsTotal.WithLabelValues(repoPath)
memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt))
ch <- memoryMetric
- cpuUserMetric := cg.cpuUsage.WithLabelValues(repoPath, "user")
+ cpuUserMetric := cvh.cpuUsage.WithLabelValues(repoPath, "user")
cpuUserMetric.Set(float64(metrics.CPU.Usage.User))
ch <- cpuUserMetric
ch <- prometheus.MustNewConstMetric(
- cg.cpuCFSPeriods,
+ cvh.cpuCFSPeriods,
prometheus.CounterValue,
float64(metrics.CPU.Throttling.Periods),
repoPath,
)
ch <- prometheus.MustNewConstMetric(
- cg.cpuCFSThrottledPeriods,
+ cvh.cpuCFSThrottledPeriods,
prometheus.CounterValue,
float64(metrics.CPU.Throttling.ThrottledPeriods),
repoPath,
)
ch <- prometheus.MustNewConstMetric(
- cg.cpuCFSThrottledTime,
+ cvh.cpuCFSThrottledTime,
prometheus.CounterValue,
float64(metrics.CPU.Throttling.ThrottledTime)/float64(time.Second),
repoPath,
)
- cpuKernelMetric := cg.cpuUsage.WithLabelValues(repoPath, "kernel")
+ cpuKernelMetric := cvh.cpuUsage.WithLabelValues(repoPath, "kernel")
cpuKernelMetric.Set(float64(metrics.CPU.Usage.Kernel))
ch <- cpuKernelMetric
}
- if subsystems, err := cg.hierarchy(); err != nil {
+ if subsystems, err := cvh.hierarchy(); err != nil {
logger.WithError(err).Warn("unable to get cgroup hierarchy")
} else {
for _, subsystem := range subsystems {
@@ -256,7 +186,7 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
continue
}
- procsMetric := cg.procs.WithLabelValues(repoPath, string(subsystem.Name()))
+ procsMetric := cvh.procs.WithLabelValues(repoPath, string(subsystem.Name()))
procsMetric.Set(float64(len(processes)))
ch <- procsMetric
}
@@ -264,18 +194,12 @@ func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) {
}
}
-// Describe describes the cgroup metrics that Collect provides
-func (cg *CGroupV1Manager) Describe(ch chan<- *prometheus.Desc) {
- prometheus.DescribeByCollect(cg, ch)
-}
-
-//nolint:revive // This is unintentionally missing documentation.
-func (cg *CGroupV1Manager) Cleanup() error {
- processCgroupPath := cg.currentProcessCgroup()
+func (cvh *cgroupV1Handler) cleanup() error {
+ processCgroupPath := cvh.currentProcessCgroup()
control, err := cgroup1.Load(
cgroup1.StaticPath(processCgroupPath),
- cgroup1.WithHiearchy(cg.hierarchy),
+ cgroup1.WithHiearchy(cvh.hierarchy),
)
if err != nil {
return fmt.Errorf("failed loading cgroup %s: %w", processCgroupPath, err)
@@ -288,12 +212,12 @@ func (cg *CGroupV1Manager) Cleanup() error {
return nil
}
-func (cg *CGroupV1Manager) repoPath(groupID int) string {
- return filepath.Join(cg.currentProcessCgroup(), fmt.Sprintf("repos-%d", groupID))
+func (cvh *cgroupV1Handler) repoPath(groupID int) string {
+ return filepath.Join(cvh.currentProcessCgroup(), fmt.Sprintf("repos-%d", groupID))
}
-func (cg *CGroupV1Manager) currentProcessCgroup() string {
- return config.GetGitalyProcessTempDir(cg.cfg.HierarchyRoot, cg.pid)
+func (cvh *cgroupV1Handler) currentProcessCgroup() string {
+ return config.GetGitalyProcessTempDir(cvh.cfg.HierarchyRoot, cvh.pid)
}
func defaultSubsystems(root string) ([]cgroup1.Subsystem, error) {
@@ -304,3 +228,21 @@ func defaultSubsystems(root string) ([]cgroup1.Subsystem, error) {
return subsystems, nil
}
+
+func pruneOldCgroupsV1(cfg cgroupscfg.Config, logger logrus.FieldLogger) {
+ if err := config.PruneOldGitalyProcessDirectories(
+ logger,
+ filepath.Join(cfg.Mountpoint, "memory",
+ cfg.HierarchyRoot),
+ ); err != nil {
+ logger.WithError(err).Error("failed to clean up memory cgroups")
+ }
+
+ if err := config.PruneOldGitalyProcessDirectories(
+ logger,
+ filepath.Join(cfg.Mountpoint, "cpu",
+ cfg.HierarchyRoot),
+ ); err != nil {
+ logger.WithError(err).Error("failed to clean up cpu cgroups")
+ }
+}
diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go
index a364d7965..524b4f91f 100644
--- a/internal/cgroups/v1_linux_test.go
+++ b/internal/cgroups/v1_linux_test.go
@@ -1,9 +1,11 @@
+//go:build linux
+
package cgroups
import (
- "bytes"
"fmt"
"hash/crc32"
+ "io/fs"
"os"
"os/exec"
"path/filepath"
@@ -12,6 +14,7 @@ import (
"testing"
"github.com/prometheus/client_golang/prometheus/testutil"
+ "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config/cgroups"
@@ -31,6 +34,13 @@ func defaultCgroupsConfig() cgroups.Config {
}
}
+func TestNewManagerV1(t *testing.T) {
+ cfg := cgroups.Config{Repositories: cgroups.Repositories{Count: 10}}
+
+ manager := newCgroupManager(cfg, 1)
+ require.IsType(t, &cgroupV1Handler{}, manager.handler)
+}
+
func TestSetup_ParentCgroups(t *testing.T) {
tests := []struct {
name string
@@ -84,12 +94,9 @@ func TestSetup_ParentCgroups(t *testing.T) {
mock := newMock(t)
pid := 1
tt.cfg.HierarchyRoot = "gitaly"
+ tt.cfg.Mountpoint = mock.root
- v1Manager := &CGroupV1Manager{
- cfg: tt.cfg,
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ v1Manager := mock.newCgroupManager(tt.cfg, pid)
require.NoError(t, v1Manager.Setup())
memoryLimitPath := filepath.Join(
@@ -167,12 +174,10 @@ func TestSetup_RepoCgroups(t *testing.T) {
cfg := defaultCgroupsConfig()
cfg.Repositories = tt.cfg
cfg.Repositories.Count = 3
+ cfg.HierarchyRoot = "gitaly"
+ cfg.Mountpoint = mock.root
- v1Manager := &CGroupV1Manager{
- cfg: cfg,
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ v1Manager := mock.newCgroupManager(cfg, pid)
require.NoError(t, v1Manager.Setup())
@@ -208,24 +213,18 @@ func TestAddCommand(t *testing.T) {
config.Repositories.Count = 10
config.Repositories.MemoryBytes = 1024
config.Repositories.CPUShares = 16
+ config.HierarchyRoot = "gitaly"
+ config.Mountpoint = mock.root
pid := 1
- v1Manager1 := &CGroupV1Manager{
- cfg: config,
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ v1Manager1 := mock.newCgroupManager(config, pid)
require.NoError(t, v1Manager1.Setup())
ctx := testhelper.Context(t)
cmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
require.NoError(t, cmd2.Run())
- v1Manager2 := &CGroupV1Manager{
- cfg: config,
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ v1Manager2 := mock.newCgroupManager(config, pid)
t.Run("without overridden key", func(t *testing.T) {
_, err := v1Manager2.AddCommand(cmd2)
@@ -270,11 +269,11 @@ func TestCleanup(t *testing.T) {
mock := newMock(t)
pid := 1
- v1Manager := &CGroupV1Manager{
- cfg: defaultCgroupsConfig(),
- hierarchy: mock.hierarchy,
- pid: pid,
- }
+ cfg := defaultCgroupsConfig()
+ cfg.Mountpoint = mock.root
+
+ v1Manager := mock.newCgroupManager(cfg, pid)
+
require.NoError(t, v1Manager.Setup())
require.NoError(t, v1Manager.Cleanup())
@@ -288,48 +287,17 @@ func TestCleanup(t *testing.T) {
}
func TestMetrics(t *testing.T) {
- t.Parallel()
-
- mock := newMock(t)
-
- config := defaultCgroupsConfig()
- config.Repositories.Count = 1
- config.Repositories.MemoryBytes = 1048576
- config.Repositories.CPUShares = 16
-
- v1Manager1 := newV1Manager(config, 1)
- v1Manager1.hierarchy = mock.hierarchy
-
- mock.setupMockCgroupFiles(t, v1Manager1, 2)
-
- require.NoError(t, v1Manager1.Setup())
-
- ctx := testhelper.Context(t)
-
- cmd := exec.CommandContext(ctx, "ls", "-hal", ".")
- require.NoError(t, cmd.Start())
- _, err := v1Manager1.AddCommand(cmd)
- require.NoError(t, err)
-
- gitCmd1 := exec.CommandContext(ctx, "ls", "-hal", ".")
- require.NoError(t, gitCmd1.Start())
- _, err = v1Manager1.AddCommand(gitCmd1)
- require.NoError(t, err)
-
- gitCmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
- require.NoError(t, gitCmd2.Start())
- _, err = v1Manager1.AddCommand(gitCmd2)
- require.NoError(t, err)
- defer func() {
- require.NoError(t, gitCmd2.Wait())
- }()
-
- require.NoError(t, cmd.Wait())
- require.NoError(t, gitCmd1.Wait())
-
- repoCgroupPath := filepath.Join(v1Manager1.currentProcessCgroup(), "repos-0")
-
- expected := strings.NewReader(strings.ReplaceAll(`# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup
+ tests := []struct {
+ name string
+ metricsEnabled bool
+ pid int
+ expect string
+ }{
+ {
+ name: "metrics enabled: true",
+ metricsEnabled: true,
+ pid: 1,
+ expect: `# HELP gitaly_cgroup_cpu_usage_total CPU Usage of Cgroup
# TYPE gitaly_cgroup_cpu_usage_total gauge
gitaly_cgroup_cpu_usage_total{path="%s",type="kernel"} 0
gitaly_cgroup_cpu_usage_total{path="%s",type="user"} 0
@@ -349,20 +317,223 @@ gitaly_cgroup_cpu_cfs_throttled_periods_total{path="%s"} 20
# HELP gitaly_cgroup_cpu_cfs_throttled_seconds_total Total time duration the Cgroup has been throttled
# TYPE gitaly_cgroup_cpu_cfs_throttled_seconds_total counter
gitaly_cgroup_cpu_cfs_throttled_seconds_total{path="%s"} 0.001
-`, "%s", repoCgroupPath))
+`,
+ },
+ {
+ name: "metrics enabled: false",
+ metricsEnabled: false,
+ pid: 2,
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+ mock := newMock(t)
+
+ config := defaultCgroupsConfig()
+ config.Repositories.Count = 1
+ config.Repositories.MemoryBytes = 1048576
+ config.Repositories.CPUShares = 16
+ config.Mountpoint = mock.root
+ config.MetricsEnabled = tt.metricsEnabled
+
+ v1Manager1 := mock.newCgroupManager(config, tt.pid)
+
+ mock.setupMockCgroupFiles(t, v1Manager1, 2)
+ require.NoError(t, v1Manager1.Setup())
+
+ ctx := testhelper.Context(t)
+
+ cmd := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, cmd.Start())
+ _, err := v1Manager1.AddCommand(cmd)
+ require.NoError(t, err)
+
+ gitCmd1 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, gitCmd1.Start())
+ _, err = v1Manager1.AddCommand(gitCmd1)
+ require.NoError(t, err)
+
+ gitCmd2 := exec.CommandContext(ctx, "ls", "-hal", ".")
+ require.NoError(t, gitCmd2.Start())
+ _, err = v1Manager1.AddCommand(gitCmd2)
+ require.NoError(t, err)
+ defer func() {
+ require.NoError(t, gitCmd2.Wait())
+ }()
+
+ require.NoError(t, cmd.Wait())
+ require.NoError(t, gitCmd1.Wait())
- for _, metricsEnabled := range []bool{true, false} {
- t.Run(fmt.Sprintf("metrics enabled: %v", metricsEnabled), func(t *testing.T) {
- v1Manager1.cfg.MetricsEnabled = metricsEnabled
+ repoCgroupPath := filepath.Join(v1Manager1.currentProcessCgroup(), "repos-0")
+
+ expected := strings.NewReader(strings.ReplaceAll(tt.expect, "%s", repoCgroupPath))
+ assert.NoError(t, testutil.CollectAndCompare(v1Manager1, expected))
+ })
+ }
+}
+
+func TestPruneOldCgroups(t *testing.T) {
+ t.Parallel()
+
+ testCases := []struct {
+ desc string
+ cfg cgroups.Config
+ expectedPruned bool
+ // setup returns a pid
+ setup func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int
+ }{
+ {
+ desc: "process belongs to another user",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ pid := 1
+ cgroupManager := newCgroupManager(cfg, pid)
+ require.NoError(t, cgroupManager.Setup())
+
+ return pid
+ },
+ expectedPruned: true,
+ },
+ {
+ desc: "no hierarchy root",
+ cfg: cgroups.Config{
+ HierarchyRoot: "",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ pid := 1
+ cgroupManager := newCgroupManager(cfg, pid)
+ require.NoError(t, cgroupManager.Setup())
+ return 1
+ },
+ expectedPruned: false,
+ },
+ {
+ desc: "pid of finished process",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ cmd := exec.Command("ls")
+ require.NoError(t, cmd.Run())
+ pid := cmd.Process.Pid
+
+ cgroupManager := newCgroupManager(cfg, pid)
+ require.NoError(t, cgroupManager.Setup())
+
+ memoryRoot := filepath.Join(
+ cfg.Mountpoint,
+ "memory",
+ cfg.HierarchyRoot,
+ "memory.limit_in_bytes",
+ )
+ require.NoError(t, os.WriteFile(memoryRoot, []byte{}, fs.ModeAppend))
+
+ return pid
+ },
+ expectedPruned: true,
+ },
+ {
+ desc: "pid of running process",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ pid := os.Getpid()
+
+ cgroupManager := newCgroupManager(cfg, pid)
+ require.NoError(t, cgroupManager.Setup())
+
+ return pid
+ },
+ expectedPruned: false,
+ },
+ {
+ desc: "gitaly-0 directory is deleted",
+ cfg: cgroups.Config{
+ HierarchyRoot: "gitaly",
+ Repositories: cgroups.Repositories{
+ Count: 10,
+ MemoryBytes: 10 * 1024 * 1024,
+ CPUShares: 1024,
+ },
+ },
+ setup: func(t *testing.T, cfg cgroups.Config, mock *mockCgroup) int {
+ cgroupManager := newCgroupManager(cfg, 0)
+ require.NoError(t, cgroupManager.Setup())
+
+ return 0
+ },
+ expectedPruned: true,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ mock := newMock(t)
+ tc.cfg.Mountpoint = mock.root
+
+ memoryRoot := filepath.Join(
+ tc.cfg.Mountpoint,
+ "memory",
+ tc.cfg.HierarchyRoot,
+ )
+ cpuRoot := filepath.Join(
+ tc.cfg.Mountpoint,
+ "cpu",
+ tc.cfg.HierarchyRoot,
+ )
+
+ require.NoError(t, os.MkdirAll(cpuRoot, perm.PublicDir))
+ require.NoError(t, os.MkdirAll(memoryRoot, perm.PublicDir))
+
+ pid := tc.setup(t, tc.cfg, mock)
+
+ logger, hook := test.NewNullLogger()
+
+ mock.pruneOldCgroups(tc.cfg, logger)
+
+ // create cgroups directories with a different pid
+ oldGitalyProcessMemoryDir := filepath.Join(
+ memoryRoot,
+ fmt.Sprintf("gitaly-%d", pid),
+ )
+ oldGitalyProcesssCPUDir := filepath.Join(
+ cpuRoot,
+ fmt.Sprintf("gitaly-%d", pid),
+ )
- if metricsEnabled {
- assert.NoError(t, testutil.CollectAndCompare(
- v1Manager1,
- expected))
+ if tc.expectedPruned {
+ require.NoDirExists(t, oldGitalyProcessMemoryDir)
+ require.NoDirExists(t, oldGitalyProcesssCPUDir)
} else {
- assert.NoError(t, testutil.CollectAndCompare(
- v1Manager1,
- bytes.NewBufferString("")))
+ require.DirExists(t, oldGitalyProcessMemoryDir)
+ require.DirExists(t, oldGitalyProcesssCPUDir)
+ require.Len(t, hook.Entries, 0)
}
})
}