Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Chandler <wchandler@gitlab.com>2023-10-26 23:48:56 +0300
committerWill Chandler <wchandler@gitlab.com>2023-11-08 06:53:56 +0300
commit02a39ec23e6f768d3a57d6c4ca119faf9ccb757d (patch)
tree413f136bf0932c91d69728f3099e63442908e91b
parente53929774ee1d3d8f910f706564307526997f2d6 (diff)
cgroups: Update collect to check if cgroup exists
We will shortly start creating cgroups on-demand, rather than up-front as part of Gitaly's start up process. Hoist the loop over repository cgroups into the manager's `Collect` method, passing the path into the version-specific handlers. This allows us to avoid leaking the `cgroupLock` abstraction outside of the manager.
-rw-r--r--internal/cgroups/manager_linux.go17
-rw-r--r--internal/cgroups/v1_linux.go117
-rw-r--r--internal/cgroups/v1_linux_test.go4
-rw-r--r--internal/cgroups/v2_linux.go103
-rw-r--r--internal/cgroups/v2_linux_test.go4
5 files changed, 126 insertions, 119 deletions
diff --git a/internal/cgroups/manager_linux.go b/internal/cgroups/manager_linux.go
index a97a94530..7dc5cae42 100644
--- a/internal/cgroups/manager_linux.go
+++ b/internal/cgroups/manager_linux.go
@@ -30,7 +30,7 @@ type cgroupHandler interface {
setupRepository(status *cgroupStatus, reposResources *specs.LinuxResources) error
createCgroup(repoResources *specs.LinuxResources, cgroupPath string) error
addToCgroup(pid int, cgroupPath string) error
- collect(ch chan<- prometheus.Metric)
+ collect(repoPath string, ch chan<- prometheus.Metric)
cleanup() error
currentProcessCgroup() string
repoPath(groupID int) string
@@ -216,7 +216,20 @@ func (cgm *CGroupManager) Describe(ch chan<- *prometheus.Desc) {
// Collect is used to collect the current values of all CGroupManager prometheus metrics
func (cgm *CGroupManager) Collect(ch chan<- prometheus.Metric) {
- cgm.handler.collect(ch)
+ if !cgm.cfg.MetricsEnabled {
+ return
+ }
+
+ for i := 0; i < int(cgm.cfg.Repositories.Count); i++ {
+ repoPath := cgm.handler.repoPath(i)
+
+ cgLock := cgm.status.getLock(repoPath)
+ if !cgLock.isCreated() {
+ continue
+ }
+
+ cgm.handler.collect(repoPath, ch)
+ }
}
// Stats returns cgroup accounting statistics collected by reading
diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go
index 2b2e2b274..d82668fc4 100644
--- a/internal/cgroups/v1_linux.go
+++ b/internal/cgroups/v1_linux.go
@@ -102,73 +102,66 @@ func (cvh *cgroupV1Handler) loadCgroup(cgroupPath string) (cgroup1.Cgroup, error
return control, nil
}
-func (cvh *cgroupV1Handler) collect(ch chan<- prometheus.Metric) {
- if !cvh.cfg.MetricsEnabled {
+func (cvh *cgroupV1Handler) collect(repoPath string, ch chan<- prometheus.Metric) {
+ logger := cvh.logger.WithField("cgroup_path", repoPath)
+ control, err := cvh.loadCgroup(repoPath)
+ if err != nil {
+ logger.WithError(err).Warn("unable to load cgroup controller")
return
}
- for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
- repoPath := cvh.repoPath(i)
- logger := cvh.logger.WithField("cgroup_path", repoPath)
- control, err := cvh.loadCgroup(repoPath)
- if err != nil {
- logger.WithError(err).Warn("unable to load cgroup controller")
- return
- }
-
- if metrics, err := control.Stat(); err != nil {
- logger.WithError(err).Warn("unable to get cgroup stats")
- } else {
- memoryMetric := cvh.memoryReclaimAttemptsTotal.WithLabelValues(repoPath)
- memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt))
- ch <- memoryMetric
-
- cpuUserMetric := cvh.cpuUsage.WithLabelValues(repoPath, "user")
- cpuUserMetric.Set(float64(metrics.CPU.Usage.User))
- ch <- cpuUserMetric
-
- ch <- prometheus.MustNewConstMetric(
- cvh.cpuCFSPeriods,
- prometheus.CounterValue,
- float64(metrics.CPU.Throttling.Periods),
- repoPath,
- )
-
- ch <- prometheus.MustNewConstMetric(
- cvh.cpuCFSThrottledPeriods,
- prometheus.CounterValue,
- float64(metrics.CPU.Throttling.ThrottledPeriods),
- repoPath,
- )
-
- ch <- prometheus.MustNewConstMetric(
- cvh.cpuCFSThrottledTime,
- prometheus.CounterValue,
- float64(metrics.CPU.Throttling.ThrottledTime)/float64(time.Second),
- repoPath,
- )
-
- cpuKernelMetric := cvh.cpuUsage.WithLabelValues(repoPath, "kernel")
- cpuKernelMetric.Set(float64(metrics.CPU.Usage.Kernel))
- ch <- cpuKernelMetric
- }
+ if metrics, err := control.Stat(); err != nil {
+ logger.WithError(err).Warn("unable to get cgroup stats")
+ } else {
+ memoryMetric := cvh.memoryReclaimAttemptsTotal.WithLabelValues(repoPath)
+ memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt))
+ ch <- memoryMetric
+
+ cpuUserMetric := cvh.cpuUsage.WithLabelValues(repoPath, "user")
+ cpuUserMetric.Set(float64(metrics.CPU.Usage.User))
+ ch <- cpuUserMetric
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSPeriods,
+ prometheus.CounterValue,
+ float64(metrics.CPU.Throttling.Periods),
+ repoPath,
+ )
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSThrottledPeriods,
+ prometheus.CounterValue,
+ float64(metrics.CPU.Throttling.ThrottledPeriods),
+ repoPath,
+ )
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSThrottledTime,
+ prometheus.CounterValue,
+ float64(metrics.CPU.Throttling.ThrottledTime)/float64(time.Second),
+ repoPath,
+ )
+
+ cpuKernelMetric := cvh.cpuUsage.WithLabelValues(repoPath, "kernel")
+ cpuKernelMetric.Set(float64(metrics.CPU.Usage.Kernel))
+ ch <- cpuKernelMetric
+ }
- if subsystems, err := cvh.hierarchy(); err != nil {
- logger.WithError(err).Warn("unable to get cgroup hierarchy")
- } else {
- for _, subsystem := range subsystems {
- processes, err := control.Processes(subsystem.Name(), true)
- if err != nil {
- logger.WithField("subsystem", subsystem.Name()).
- WithError(err).
- Warn("unable to get process list")
- continue
- }
-
- procsMetric := cvh.procs.WithLabelValues(repoPath, string(subsystem.Name()))
- procsMetric.Set(float64(len(processes)))
- ch <- procsMetric
+ if subsystems, err := cvh.hierarchy(); err != nil {
+ logger.WithError(err).Warn("unable to get cgroup hierarchy")
+ } else {
+ for _, subsystem := range subsystems {
+ processes, err := control.Processes(subsystem.Name(), true)
+ if err != nil {
+ logger.WithField("subsystem", subsystem.Name()).
+ WithError(err).
+ Warn("unable to get process list")
+ continue
}
+
+ procsMetric := cvh.procs.WithLabelValues(repoPath, string(subsystem.Name()))
+ procsMetric.Set(float64(len(processes)))
+ ch <- procsMetric
}
}
}
diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go
index dcb4358b8..15361000b 100644
--- a/internal/cgroups/v1_linux_test.go
+++ b/internal/cgroups/v1_linux_test.go
@@ -346,6 +346,10 @@ gitaly_cgroup_cpu_cfs_throttled_seconds_total{path="%s"} 0.001
v1Manager1 := mock.newCgroupManager(config, testhelper.SharedLogger(t), tt.pid)
+ groupID := calcGroupID(cmdArgs, config.Repositories.Count)
+ cgLock := v1Manager1.status.getLock(v1Manager1.handler.repoPath(int(groupID)))
+ cgLock.created.Store(true)
+
mock.setupMockCgroupFiles(t, v1Manager1, mockCgroupFile{"memory.failcnt", "2"})
require.NoError(t, v1Manager1.Setup())
diff --git a/internal/cgroups/v2_linux.go b/internal/cgroups/v2_linux.go
index 407edc90e..ccb1642dc 100644
--- a/internal/cgroups/v2_linux.go
+++ b/internal/cgroups/v2_linux.go
@@ -103,68 +103,61 @@ func (cvh *cgroupV2Handler) loadCgroup(cgroupPath string) (*cgroup2.Manager, err
return control, nil
}
-func (cvh *cgroupV2Handler) collect(ch chan<- prometheus.Metric) {
- if !cvh.cfg.MetricsEnabled {
+func (cvh *cgroupV2Handler) collect(repoPath string, ch chan<- prometheus.Metric) {
+ logger := cvh.logger.WithField("cgroup_path", repoPath)
+ control, err := cvh.loadCgroup(repoPath)
+ if err != nil {
+ logger.WithError(err).Warn("unable to load cgroup controller")
return
}
- for i := 0; i < int(cvh.cfg.Repositories.Count); i++ {
- repoPath := cvh.repoPath(i)
- logger := cvh.logger.WithField("cgroup_path", repoPath)
- control, err := cvh.loadCgroup(repoPath)
+ if metrics, err := control.Stat(); err != nil {
+ logger.WithError(err).Warn("unable to get cgroup stats")
+ } else {
+ cpuUserMetric := cvh.cpuUsage.WithLabelValues(repoPath, "user")
+ cpuUserMetric.Set(float64(metrics.CPU.UserUsec))
+ ch <- cpuUserMetric
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSPeriods,
+ prometheus.CounterValue,
+ float64(metrics.CPU.NrPeriods),
+ repoPath,
+ )
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSThrottledPeriods,
+ prometheus.CounterValue,
+ float64(metrics.CPU.NrThrottled),
+ repoPath,
+ )
+
+ ch <- prometheus.MustNewConstMetric(
+ cvh.cpuCFSThrottledTime,
+ prometheus.CounterValue,
+ float64(metrics.CPU.ThrottledUsec)/float64(time.Second),
+ repoPath,
+ )
+
+ cpuKernelMetric := cvh.cpuUsage.WithLabelValues(repoPath, "kernel")
+ cpuKernelMetric.Set(float64(metrics.CPU.SystemUsec))
+ ch <- cpuKernelMetric
+ }
+
+ if subsystems, err := control.Controllers(); err != nil {
+ logger.WithError(err).Warn("unable to get cgroup hierarchy")
+ } else {
+ processes, err := control.Procs(true)
if err != nil {
- logger.WithError(err).Warn("unable to load cgroup controller")
+ logger.WithError(err).
+ Warn("unable to get process list")
return
}
- if metrics, err := control.Stat(); err != nil {
- logger.WithError(err).Warn("unable to get cgroup stats")
- } else {
- cpuUserMetric := cvh.cpuUsage.WithLabelValues(repoPath, "user")
- cpuUserMetric.Set(float64(metrics.CPU.UserUsec))
- ch <- cpuUserMetric
-
- ch <- prometheus.MustNewConstMetric(
- cvh.cpuCFSPeriods,
- prometheus.CounterValue,
- float64(metrics.CPU.NrPeriods),
- repoPath,
- )
-
- ch <- prometheus.MustNewConstMetric(
- cvh.cpuCFSThrottledPeriods,
- prometheus.CounterValue,
- float64(metrics.CPU.NrThrottled),
- repoPath,
- )
-
- ch <- prometheus.MustNewConstMetric(
- cvh.cpuCFSThrottledTime,
- prometheus.CounterValue,
- float64(metrics.CPU.ThrottledUsec)/float64(time.Second),
- repoPath,
- )
-
- cpuKernelMetric := cvh.cpuUsage.WithLabelValues(repoPath, "kernel")
- cpuKernelMetric.Set(float64(metrics.CPU.SystemUsec))
- ch <- cpuKernelMetric
- }
-
- if subsystems, err := control.Controllers(); err != nil {
- logger.WithError(err).Warn("unable to get cgroup hierarchy")
- } else {
- processes, err := control.Procs(true)
- if err != nil {
- logger.WithError(err).
- Warn("unable to get process list")
- continue
- }
-
- for _, subsystem := range subsystems {
- procsMetric := cvh.procs.WithLabelValues(repoPath, subsystem)
- procsMetric.Set(float64(len(processes)))
- ch <- procsMetric
- }
+ for _, subsystem := range subsystems {
+ procsMetric := cvh.procs.WithLabelValues(repoPath, subsystem)
+ procsMetric.Set(float64(len(processes)))
+ ch <- procsMetric
}
}
}
diff --git a/internal/cgroups/v2_linux_test.go b/internal/cgroups/v2_linux_test.go
index a0042083d..08a62428f 100644
--- a/internal/cgroups/v2_linux_test.go
+++ b/internal/cgroups/v2_linux_test.go
@@ -325,6 +325,10 @@ gitaly_cgroup_procs_total{path="%s",subsystem="memory"} 1
v2Manager1 := mock.newCgroupManager(config, testhelper.SharedLogger(t), tt.pid)
+ groupID := calcGroupID(cmdArgs, config.Repositories.Count)
+ cgLock := v2Manager1.status.getLock(v2Manager1.handler.repoPath(int(groupID)))
+ cgLock.created.Store(true)
+
mock.setupMockCgroupFiles(t, v2Manager1)
require.NoError(t, v2Manager1.Setup())