Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-11-09 07:53:00 +0300
committerQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-11-10 12:29:08 +0300
commit0767596516b0d8457094023b40e28ac883bfc8d4 (patch)
treeadc20c9c5a08cd4f834ef3819f59a0d39547b376
parentc65b631d971809d9e0294356d7892860d4800cf3 (diff)
limiter: Make CpuThrottledThreshold configureable
In the current implementation, the adaptive limiting kicks in when the resource level exceeds some hard-coded thresholds: * 90% of the parent cgroup's memory. * Cgroup's cpu is throttled for 50% of the observation time. Although the current CPU throttled threshold is reasonable, it might not be good for all cases. A more powerful machine can tolerate a higher throttling rate while a less powerful machine wants to lower the limit sooner. This commit adds the ability to customize the CPU throttled threshold.
-rw-r--r--internal/cli/gitaly/serve.go2
-rw-r--r--internal/gitaly/config/config.go18
-rw-r--r--internal/gitaly/config/config_test.go20
-rw-r--r--internal/limiter/watchers/cgroup_cpu_watcher.go23
-rw-r--r--internal/limiter/watchers/cgroup_cpu_watcher_test.go48
5 files changed, 99 insertions, 12 deletions
diff --git a/internal/cli/gitaly/serve.go b/internal/cli/gitaly/serve.go
index edca64585..223880507 100644
--- a/internal/cli/gitaly/serve.go
+++ b/internal/cli/gitaly/serve.go
@@ -353,7 +353,7 @@ func run(cfg config.Cfg, logger log.Logger) error {
logger,
adaptiveLimits,
[]limiter.ResourceWatcher{
- watchers.NewCgroupCPUWatcher(cgroupMgr),
+ watchers.NewCgroupCPUWatcher(cgroupMgr, cfg.AdaptiveLimiting.CPUThrottledThreshold),
watchers.NewCgroupMemoryWatcher(cgroupMgr),
},
)
diff --git a/internal/gitaly/config/config.go b/internal/gitaly/config/config.go
index 735fa2d65..869871f11 100644
--- a/internal/gitaly/config/config.go
+++ b/internal/gitaly/config/config.go
@@ -123,6 +123,7 @@ type Cfg struct {
Backup BackupConfig `toml:"backup,omitempty" json:"backup"`
Timeout TimeoutConfig `toml:"timeout,omitempty" json:"timeout"`
Transactions Transactions `toml:"transactions,omitempty" json:"transactions,omitempty"`
+ AdaptiveLimiting AdaptiveLimiting `toml:"adaptive_limiting,omitempty" json:"adaptive_limiting,omitempty"`
}
// Transactions configures transaction related options.
@@ -487,6 +488,23 @@ func (c Concurrency) Validate() error {
return errs.AsError()
}
+// AdaptiveLimiting defines a set of global config for the adaptive limiter. This config customizes how the resource
+// watchers and calculator works. Specific limits for each RPC or pack-objects operation should be configured
+// individually using the Concurrency and PackObjectsLimiting structs respectively.
+type AdaptiveLimiting struct {
+ // CPUThrottledThreshold defines the CPU throttling ratio threshold for a backoff event. The resource watcher
+ // compares the recorded total throttled time between two polls. If the throttled time exceeds this threshold of
+ // the observation window, it returns a backoff event. By default, the threshold is 0.5 (50%).
+ CPUThrottledThreshold float64 `toml:"cpu_throttled_threshold" json:"cpu_throttled_threshold"`
+}
+
+// Validate runs validation on all fields and compose all found errors.
+func (c AdaptiveLimiting) Validate() error {
+ return cfgerror.New().
+ Append(cfgerror.Comparable(c.CPUThrottledThreshold).GreaterOrEqual(0), "cpu_throttled_threshold").
+ AsError()
+}
+
// RateLimiting allows endpoints to be limited to a maximum request rate per
// second. The rate limiter uses a concept of a "token bucket". In order to serve a
// request, a token is retrieved from the token bucket. The size of the token
diff --git a/internal/gitaly/config/config_test.go b/internal/gitaly/config/config_test.go
index 41c12c800..273c7b8d7 100644
--- a/internal/gitaly/config/config_test.go
+++ b/internal/gitaly/config/config_test.go
@@ -1989,6 +1989,26 @@ func TestConcurrency_Validate(t *testing.T) {
)
}
+func TestAdaptiveLimiting_Validate(t *testing.T) {
+ t.Parallel()
+
+ require.NoError(t, AdaptiveLimiting{CPUThrottledThreshold: 0}.Validate())
+ require.NoError(t, AdaptiveLimiting{CPUThrottledThreshold: 0.1}.Validate())
+ require.NoError(t, AdaptiveLimiting{CPUThrottledThreshold: 0.9}.Validate())
+ require.NoError(t, AdaptiveLimiting{CPUThrottledThreshold: 2.0}.Validate())
+
+ require.Equal(
+ t,
+ cfgerror.ValidationErrors{
+ cfgerror.NewValidationError(
+ fmt.Errorf("%w: -0.1 is not greater than or equal to 0", cfgerror.ErrNotInRange),
+ "cpu_throttled_threshold",
+ ),
+ },
+ AdaptiveLimiting{CPUThrottledThreshold: -0.1}.Validate(),
+ )
+}
+
func TestStorage_Validate(t *testing.T) {
t.Parallel()
diff --git a/internal/limiter/watchers/cgroup_cpu_watcher.go b/internal/limiter/watchers/cgroup_cpu_watcher.go
index 5ef5ba7d0..30f5c8454 100644
--- a/internal/limiter/watchers/cgroup_cpu_watcher.go
+++ b/internal/limiter/watchers/cgroup_cpu_watcher.go
@@ -10,8 +10,8 @@ import (
)
const (
- cgroupCPUWatcherName = "CgroupCpu"
- cpuThrottledThreshold = 0.5
+ cgroupCPUWatcherName = "CgroupCpu"
+ defaultCPUThrottledThreshold = 0.5
)
// CgroupCPUWatcher implements ResourceWatcher interface for watching CPU throttling of cgroup. Cgroup doesn't have an
@@ -19,9 +19,10 @@ const (
// between two polls. If the throttled time exceeds 50% of the observation window, it returns a backoff event. The
// watcher uses `throttled_time` (CgroupV1) or `throttled_usec` (CgroupV2) stats from the cgroup manager.
type CgroupCPUWatcher struct {
- manager cgroups.Manager
- lastPoll time.Time
- lastParentStats cgroups.CgroupStats
+ manager cgroups.Manager
+ cpuThrottledThreshold float64
+ lastPoll time.Time
+ lastParentStats cgroups.CgroupStats
// currentTime is the function that returns the current time. If it's not set, time.Now() is used
// instead. It's used for tests only.
@@ -29,9 +30,13 @@ type CgroupCPUWatcher struct {
}
// NewCgroupCPUWatcher is the initializer of CgroupCPUWatcher
-func NewCgroupCPUWatcher(manager cgroups.Manager) *CgroupCPUWatcher {
+func NewCgroupCPUWatcher(manager cgroups.Manager, cpuThrottledThreshold float64) *CgroupCPUWatcher {
+ if cpuThrottledThreshold == 0 {
+ cpuThrottledThreshold = defaultCPUThrottledThreshold
+ }
return &CgroupCPUWatcher{
- manager: manager,
+ manager: manager,
+ cpuThrottledThreshold: cpuThrottledThreshold,
}
}
@@ -81,7 +86,7 @@ func (c *CgroupCPUWatcher) Poll(ctx context.Context) (*limiter.BackoffEvent, err
timeDiff := currentPoll.Sub(c.lastPoll).Abs().Seconds()
// If the total throttled duration since the last poll exceeds 50%.
- if timeDiff > 0 && throttledDuration/timeDiff > cpuThrottledThreshold {
+ if timeDiff > 0 && throttledDuration/timeDiff > c.cpuThrottledThreshold {
return &limiter.BackoffEvent{
WatcherName: c.Name(),
ShouldBackoff: true,
@@ -89,7 +94,7 @@ func (c *CgroupCPUWatcher) Poll(ctx context.Context) (*limiter.BackoffEvent, err
Stats: map[string]any{
"time_diff": timeDiff,
"throttled_duration": throttledDuration,
- "throttled_threshold": cpuThrottledThreshold,
+ "throttled_threshold": c.cpuThrottledThreshold,
},
}, nil
}
diff --git a/internal/limiter/watchers/cgroup_cpu_watcher_test.go b/internal/limiter/watchers/cgroup_cpu_watcher_test.go
index f7207eded..8b007b399 100644
--- a/internal/limiter/watchers/cgroup_cpu_watcher_test.go
+++ b/internal/limiter/watchers/cgroup_cpu_watcher_test.go
@@ -14,7 +14,7 @@ import (
func TestCgroupCPUWatcher_Name(t *testing.T) {
t.Parallel()
- manager := NewCgroupCPUWatcher(&testCgroupManager{})
+ manager := NewCgroupCPUWatcher(&testCgroupManager{}, 0.5)
require.Equal(t, cgroupCPUWatcherName, manager.Name())
}
@@ -27,6 +27,7 @@ func TestCgroupCPUWatcher_Poll(t *testing.T) {
desc string
manager *testCgroupManager
pollTimes []recentTimeFunc
+ cpuThreshold float64
expectedEvents []*limiter.BackoffEvent
expectedErrs []error
}{
@@ -395,9 +396,52 @@ func TestCgroupCPUWatcher_Poll(t *testing.T) {
},
},
},
+ {
+ desc: "customized CPU threshold",
+ manager: &testCgroupManager{
+ ready: true,
+ statsList: []cgroups.Stats{
+ testCPUStat(1, 100),
+ testCPUStat(2, 108), // 8 seconds - okay
+ testCPUStat(3, 123), // 15 seconds - 15 over 15, exceeding 90%
+ testCPUStat(4, 136), // 13 seconds - fine
+ },
+ },
+ cpuThreshold: 0.9,
+ pollTimes: []recentTimeFunc{
+ mockRecentTime(t, "2023-01-01T11:00:00Z"),
+ mockRecentTime(t, "2023-01-01T11:00:15Z"),
+ mockRecentTime(t, "2023-01-01T11:00:30Z"),
+ mockRecentTime(t, "2023-01-01T11:00:45Z"),
+ },
+ expectedEvents: []*limiter.BackoffEvent{
+ {
+ WatcherName: cgroupCPUWatcherName,
+ ShouldBackoff: false,
+ },
+ {
+ WatcherName: cgroupCPUWatcherName,
+ ShouldBackoff: false,
+ },
+ {
+ WatcherName: cgroupCPUWatcherName,
+ ShouldBackoff: true,
+ Reason: "cgroup CPU throttled too much",
+ Stats: map[string]any{
+ "time_diff": 15.0,
+ "throttled_duration": 15.0,
+ "throttled_threshold": 0.9,
+ },
+ },
+ {
+ WatcherName: cgroupCPUWatcherName,
+ ShouldBackoff: false,
+ },
+ },
+ },
} {
t.Run(tc.desc, func(t *testing.T) {
- watcher := NewCgroupCPUWatcher(tc.manager)
+ watcher := NewCgroupCPUWatcher(tc.manager, tc.cpuThreshold)
if tc.pollTimes != nil {
require.Equal(t, len(tc.expectedEvents), len(tc.pollTimes), "poll times set up incorrectly")