Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2021-03-29 12:43:55 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2021-03-29 12:46:53 +0300
commit74882d5778003bcdf8ef30a1ddfd3902170747fa (patch)
tree9558900782c94d6565abd624f40f0e31c829c19a
parent6cc6d4b4ac491dddbeff72b0b2525c188d0cd23b (diff)
maintenance: Introduce rate limiting for `OptimizeRepo`
With the current implementation of `OptimizeRepo`, we're trying to do as much work as possible in the timeframe dictated by the maintenance schedule. This has proven to be problematic even in times of reduced load on deployments because we were essentially DoS'ing ourselves with so many requests that it caused alerts to trigger. On staging systems, we see more than 200 calls per second for the `OptimizeRepository` RPC. Even if those jobs don't need to do any heavy repacking, it does cause heavy read-load on Gitaly nodes just to determine that nothing needs to be done. As a first iteration towards betterment, this commit introduces rate limiting to the maintenance job. Instead of going as quickly as we can, we limit requests per second to 1. Of course this means that we're now able to optimize less repositories than we previously did. But this is still much better than driving a DoS against ourselves and waking up SREs on weekends. Depending on the typical load we'll see with this rate limiting, we may be able to enable the maintenance task 24/7.
-rw-r--r--internal/gitaly/maintenance/optimize.go45
-rw-r--r--internal/gitaly/maintenance/optimize_test.go19
-rw-r--r--internal/gitaly/server/server_factory.go2
3 files changed, 52 insertions, 14 deletions
diff --git a/internal/gitaly/maintenance/optimize.go b/internal/gitaly/maintenance/optimize.go
index 53f4f323a..854d68a04 100644
--- a/internal/gitaly/maintenance/optimize.go
+++ b/internal/gitaly/maintenance/optimize.go
@@ -11,6 +11,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/internal/gitaly/config"
+ "gitlab.com/gitlab-org/gitaly/internal/helper"
"gitlab.com/gitlab-org/gitaly/internal/storage"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
"google.golang.org/grpc"
@@ -68,12 +69,15 @@ func optimizeRepoAtPath(ctx context.Context, l logrus.FieldLogger, s config.Stor
return nil
}
-func walkReposShuffled(ctx context.Context, walker *randomWalker, l logrus.FieldLogger, s config.Storage, o Optimizer) error {
+func walkReposShuffled(
+ ctx context.Context,
+ walker *randomWalker,
+ l logrus.FieldLogger,
+ s config.Storage,
+ o Optimizer,
+ ticker helper.Ticker,
+) error {
for {
- if err := ctx.Err(); err != nil {
- return err
- }
-
fi, path, err := walker.next()
switch {
case errors.Is(err, errIterOver):
@@ -89,21 +93,33 @@ func walkReposShuffled(ctx context.Context, walker *randomWalker, l logrus.Field
}
walker.skipDir()
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-ticker.C():
+ }
+
+ // Reset the ticker before doing the optimization such that we essentially limit
+ // ourselves to doing optimizations once per tick, not once per tick plus the time
+ // it takes to do the optimization. It's best effort given that traversing the
+ // directory hierarchy takes some time, too, but it should be good enough for now.
+ ticker.Reset()
+
if err := optimizeRepoAtPath(ctx, l, s, path, o); err != nil {
return err
}
}
}
-// OptimizeReposRandomly returns a function to walk through each storage and
-// attempt to optimize any repos encountered.
+// OptimizeReposRandomly returns a function to walk through each storage and attempts to optimize
+// any repos encountered. The ticker is used to rate-limit optimizations.
//
-// Only storage paths that map to an enabled storage name will be walked.
-// Any storage paths shared by multiple storages will only be walked once.
+// Only storage paths that map to an enabled storage name will be walked. Any storage paths shared
+// by multiple storages will only be walked once.
//
-// Any errors during the optimization will be logged. Any other errors will be
-// returned and cause the walk to end prematurely.
-func OptimizeReposRandomly(storages []config.Storage, optimizer Optimizer, rand *rand.Rand) StoragesJob {
+// Any errors during the optimization will be logged. Any other errors will be returned and cause
+// the walk to end prematurely.
+func OptimizeReposRandomly(storages []config.Storage, optimizer Optimizer, ticker helper.Ticker, rand *rand.Rand) StoragesJob {
return func(ctx context.Context, l logrus.FieldLogger, enabledStorageNames []string) error {
enabledNames := map[string]struct{}{}
for _, sName := range enabledStorageNames {
@@ -112,6 +128,9 @@ func OptimizeReposRandomly(storages []config.Storage, optimizer Optimizer, rand
visitedPaths := map[string]bool{}
+ ticker.Reset()
+ defer ticker.Stop()
+
for _, storage := range shuffledStoragesCopy(rand, storages) {
if _, ok := enabledNames[storage.Name]; !ok {
continue // storage not enabled
@@ -126,7 +145,7 @@ func OptimizeReposRandomly(storages []config.Storage, optimizer Optimizer, rand
walker := newRandomWalker(storage.Path, rand)
- if err := walkReposShuffled(ctx, walker, l, storage, optimizer); err != nil {
+ if err := walkReposShuffled(ctx, walker, l, storage, optimizer, ticker); err != nil {
l.WithError(err).
WithField("storage_path", storage.Path).
Errorf("maintenance: unable to completely walk storage")
diff --git a/internal/gitaly/maintenance/optimize_test.go b/internal/gitaly/maintenance/optimize_test.go
index 1cfd91bc0..90d1c9f9a 100644
--- a/internal/gitaly/maintenance/optimize_test.go
+++ b/internal/gitaly/maintenance/optimize_test.go
@@ -12,6 +12,7 @@ import (
"gitlab.com/gitlab-org/gitaly/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/internal/gitaly/service/repository"
"gitlab.com/gitlab-org/gitaly/internal/gitaly/transaction"
+ "gitlab.com/gitlab-org/gitaly/internal/helper"
"gitlab.com/gitlab-org/gitaly/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/internal/testhelper/testcfg"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
@@ -78,14 +79,30 @@ func TestOptimizeReposRandomly(t *testing.T) {
},
} {
t.Run(tc.desc, func(t *testing.T) {
+ tickerDone := false
+ tickerCount := 0
+
+ ticker := helper.NewManualTicker()
+ ticker.ResetFunc = func() {
+ tickerCount++
+ ticker.Tick()
+ }
+ ticker.StopFunc = func() {
+ tickerDone = true
+ }
+
mo := &mockOptimizer{
t: t,
cfg: cfg,
}
- walker := OptimizeReposRandomly(cfg.Storages, mo, rand.New(rand.NewSource(1)))
+ walker := OptimizeReposRandomly(cfg.Storages, mo, ticker, rand.New(rand.NewSource(1)))
require.NoError(t, walker(ctx, testhelper.DiscardTestEntry(t), tc.storages))
require.ElementsMatch(t, tc.expected, mo.actual)
+ require.True(t, tickerDone)
+ // We expect one more tick than optimized repositories because of the
+ // initial tick up front to re-start the timer.
+ require.Equal(t, len(tc.expected)+1, tickerCount)
})
}
}
diff --git a/internal/gitaly/server/server_factory.go b/internal/gitaly/server/server_factory.go
index 60a5bdae9..4de443d69 100644
--- a/internal/gitaly/server/server_factory.go
+++ b/internal/gitaly/server/server_factory.go
@@ -12,6 +12,7 @@ import (
"gitlab.com/gitlab-org/gitaly/client"
"gitlab.com/gitlab-org/gitaly/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/internal/gitaly/maintenance"
+ "gitlab.com/gitlab-org/gitaly/internal/helper"
gitalylog "gitlab.com/gitlab-org/gitaly/internal/log"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
"google.golang.org/grpc"
@@ -56,6 +57,7 @@ func (s *GitalyServerFactory) StartWorkers(ctx context.Context, l logrus.FieldLo
maintenance.OptimizeReposRandomly(
cfg.Storages,
gitalypb.NewRepositoryServiceClient(cc),
+ helper.NewTimerTicker(1*time.Second),
rand.New(rand.NewSource(time.Now().UnixNano())),
),
)