diff options
author | Will Chandler <wchandler@gitlab.com> | 2023-11-16 00:35:01 +0300 |
---|---|---|
committer | Will Chandler <wchandler@gitlab.com> | 2023-11-21 06:15:49 +0300 |
commit | 6e349dcc07d5283a37ff9d1b8bf0332386ea8c14 (patch) | |
tree | ca9a67b830ee11377504712f285a14e340739227 | |
parent | 5f57b0c636f00cdff8049375bdca64ebd7b2710f (diff) |
gitaly: Don't cleanup cgroups on exit
To mitigate the impact of removing `tableflip` we need both startup and
shutdown of Gitaly processes to be as fast as possible. With 105f6dd816
(cgroups: Create repository cgroups on-demand, 2023-10-26) we improved
startup times by creating cgroups individually when needed, rather than
as a blocking task during startup.
We currently block shutdown while removing our existing
cgroups. This is as slow as cgroup creation, taking up to 20 seconds to
remove 1000 cgroups on hosts with large number cgroups.
`Cleanup` is equivalent to an eager `PruneOldCgroups` scoped to the
current processes's cgroups. However, there is no urgent need to remove
cgroups immediately, so long as we ensure they don't build up
excessively. We can rely on the eventual cleanup from `PruneOldCgroups`
run by the next Gitaly process, which allows us to avoid delaying
shutdown.
Stop using `Cleanup` and remove it from the `cgroup.Manager` interface.
-rw-r--r-- | internal/cgroups/cgroups.go | 4 | ||||
-rw-r--r-- | internal/cgroups/handler_linux_test.go | 24 | ||||
-rw-r--r-- | internal/cgroups/manager_linux.go | 6 | ||||
-rw-r--r-- | internal/cgroups/noop.go | 5 | ||||
-rw-r--r-- | internal/cgroups/v1_linux.go | 15 | ||||
-rw-r--r-- | internal/cgroups/v2_linux.go | 15 | ||||
-rw-r--r-- | internal/cli/gitaly/serve.go | 6 | ||||
-rw-r--r-- | internal/limiter/watchers/testhelper_test.go | 1 |
8 files changed, 0 insertions, 76 deletions
diff --git a/internal/cgroups/cgroups.go b/internal/cgroups/cgroups.go index f8bca0609..3a69710e8 100644 --- a/internal/cgroups/cgroups.go +++ b/internal/cgroups/cgroups.go @@ -86,10 +86,6 @@ type Manager interface { // to start the command directly in the correct cgroup. On success, the function returns an io.Closer // that must be closed after the command has been started to close the cgroup's file descriptor. CloneIntoCgroup(*exec.Cmd, ...AddCommandOption) (string, io.Closer, error) - // Cleanup cleans up cgroups created in Setup. - // It is expected to be called once at Gitaly shutdown from any - // instance of the Manager. - Cleanup() error // Stats returns cgroup accounting statistics collected by reading // cgroupfs files. Those statistics are generic for both Cgroup V1 // and Cgroup V2. diff --git a/internal/cgroups/handler_linux_test.go b/internal/cgroups/handler_linux_test.go index ac87939e6..cb2a701af 100644 --- a/internal/cgroups/handler_linux_test.go +++ b/internal/cgroups/handler_linux_test.go @@ -346,30 +346,6 @@ func TestAddCommand(t *testing.T) { } } -func TestCleanup(t *testing.T) { - for _, version := range []int{1, 2} { - t.Run("cgroups-v"+strconv.Itoa(version), func(t *testing.T) { - mock := newMock(t, version) - - pid := 1 - cfg := defaultCgroupsConfig() - cfg.Mountpoint = mock.rootPath() - - manager := mock.newCgroupManager(cfg, testhelper.SharedLogger(t), pid) - mock.setupMockCgroupFiles(t, manager, []uint{0, 1, 2}) - - require.NoError(t, manager.Setup()) - require.NoError(t, manager.Cleanup()) - - for i := uint(0); i < 3; i++ { - for _, path := range mock.repoPaths(pid, i) { - require.NoDirExists(t, path) - } - } - }) - } -} - func TestMetrics(t *testing.T) { tests := []struct { name string diff --git a/internal/cgroups/manager_linux.go b/internal/cgroups/manager_linux.go index f55a7c58f..0ee67acc2 100644 --- a/internal/cgroups/manager_linux.go +++ b/internal/cgroups/manager_linux.go @@ -30,7 +30,6 @@ type cgroupHandler interface { createCgroup(repoResources *specs.LinuxResources, cgroupPath string) error addToCgroup(pid int, cgroupPath string) error collect(repoPath string, ch chan<- prometheus.Metric) - cleanup() error currentProcessCgroup() string repoPath(groupID int) string stats() (Stats, error) @@ -209,11 +208,6 @@ func (cgm *CGroupManager) cgroupPathForCommand(cmd *exec.Cmd, opts []AddCommandO return cgm.handler.repoPath(int(groupID)) } -// Cleanup cleans up cgroups created in Setup. -func (cgm *CGroupManager) Cleanup() error { - return cgm.handler.cleanup() -} - // Describe is used to generate description information for each CGroupManager prometheus metric func (cgm *CGroupManager) Describe(ch chan<- *prometheus.Desc) { prometheus.DescribeByCollect(cgm, ch) diff --git a/internal/cgroups/noop.go b/internal/cgroups/noop.go index 535c7df4b..ed4b936c0 100644 --- a/internal/cgroups/noop.go +++ b/internal/cgroups/noop.go @@ -40,11 +40,6 @@ func (cg *NoopManager) CloneIntoCgroup(*exec.Cmd, ...AddCommandOption) (string, return "", io.NopCloser(nil), nil } -//nolint:revive // This is unintentionally missing documentation. -func (cg *NoopManager) Cleanup() error { - return nil -} - // Describe does nothing func (cg *NoopManager) Describe(ch chan<- *prometheus.Desc) {} diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go index e56829c26..22cec8198 100644 --- a/internal/cgroups/v1_linux.go +++ b/internal/cgroups/v1_linux.go @@ -151,21 +151,6 @@ func (cvh *cgroupV1Handler) collect(repoPath string, ch chan<- prometheus.Metric } } -func (cvh *cgroupV1Handler) cleanup() error { - processCgroupPath := cvh.currentProcessCgroup() - - control, err := cvh.loadCgroup(processCgroupPath) - if err != nil { - return err - } - - if err := control.Delete(); err != nil { - return fmt.Errorf("failed cleaning up cgroup %s: %w", processCgroupPath, err) - } - - return nil -} - func (cvh *cgroupV1Handler) repoPath(groupID int) string { return filepath.Join(cvh.currentProcessCgroup(), fmt.Sprintf("repos-%d", groupID)) } diff --git a/internal/cgroups/v2_linux.go b/internal/cgroups/v2_linux.go index 479b2c5ce..8ef717d36 100644 --- a/internal/cgroups/v2_linux.go +++ b/internal/cgroups/v2_linux.go @@ -147,21 +147,6 @@ func (cvh *cgroupV2Handler) collect(repoPath string, ch chan<- prometheus.Metric } } -func (cvh *cgroupV2Handler) cleanup() error { - processCgroupPath := cvh.currentProcessCgroup() - - control, err := cvh.loadCgroup(processCgroupPath) - if err != nil { - return err - } - - if err := control.Delete(); err != nil { - return fmt.Errorf("failed cleaning up cgroup %s: %w", processCgroupPath, err) - } - - return nil -} - func (cvh *cgroupV2Handler) repoPath(groupID int) string { return filepath.Join(cvh.currentProcessCgroup(), fmt.Sprintf("repos-%d", groupID)) } diff --git a/internal/cli/gitaly/serve.go b/internal/cli/gitaly/serve.go index 8d6739067..2dce430d0 100644 --- a/internal/cli/gitaly/serve.go +++ b/internal/cli/gitaly/serve.go @@ -183,12 +183,6 @@ func run(cfg config.Cfg, logger log.Logger) error { logger.WithField("duration_ms", time.Since(began).Milliseconds()).Info("finished initializing cgroups") defer func() { - if err := cgroupMgr.Cleanup(); err != nil { - logger.WithError(err).Warn("error cleaning up cgroups") - } - }() - - defer func() { if err := os.RemoveAll(cfg.RuntimeDir); err != nil { logger.Warn("could not clean up runtime dir") } diff --git a/internal/limiter/watchers/testhelper_test.go b/internal/limiter/watchers/testhelper_test.go index 81fcef53c..bb4613acc 100644 --- a/internal/limiter/watchers/testhelper_test.go +++ b/internal/limiter/watchers/testhelper_test.go @@ -27,7 +27,6 @@ func (m *testCgroupManager) Stats() (cgroups.Stats, error) { return m.statsList[m.statsIndex-1], m.statsErr } func (m *testCgroupManager) Setup() error { return nil } -func (m *testCgroupManager) Cleanup() error { return nil } func (m *testCgroupManager) Describe(ch chan<- *prometheus.Desc) {} func (m *testCgroupManager) Collect(ch chan<- prometheus.Metric) {} func (m *testCgroupManager) AddCommand(*exec.Cmd, ...cgroups.AddCommandOption) (string, error) { |