Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavlo Strokov <pstrokov@gitlab.com>2021-12-16 17:12:58 +0300
committerPavlo Strokov <pstrokov@gitlab.com>2021-12-16 17:29:03 +0300
commit7412a98422c47269e18c937672a25e6047577bb1 (patch)
tree8da155d78661c2c466a68bbfb5e3aa146d8c080e
parent1762b3a49fef1f12c2dc6ac153807b2c8c25b1fe (diff)
maintenance: Override deadline for RPC call
Gitaly service runs a maintenance job each day for a certain period of time. The log shows that OptimizeRepository RPC fails with DeadlineExceeded error. The root cause is a usage of the context that is limited by the duration of the maintenance. When job is out of maintenance window the context is cancelled and RPC is cancelled as well. Instead, we should handle it gracefully. That is why before RPC call we first suppress context cancellation and them apply a new default timeout for it. That protects execution from cancellation of the parent context as well as cancels long running RPC calls. If we don't cancel long running RPC the other repositories will suffer from not being optimized for a long time. Closes: https://gitlab.com/gitlab-org/gitaly/-/issues/3963 Changelog: fixed
-rw-r--r--internal/gitaly/maintenance/optimize.go8
-rw-r--r--internal/gitaly/maintenance/optimize_test.go39
2 files changed, 47 insertions, 0 deletions
diff --git a/internal/gitaly/maintenance/optimize.go b/internal/gitaly/maintenance/optimize.go
index f4da15c84..d7231fb24 100644
--- a/internal/gitaly/maintenance/optimize.go
+++ b/internal/gitaly/maintenance/optimize.go
@@ -56,6 +56,14 @@ func optimizeRepoAtPath(ctx context.Context, l logrus.FieldLogger, s config.Stor
Repository: repo,
}
+ // In order to prevent RPC cancellation because of the parent context cancellation
+ // (job execution duration passed) we suppress cancellation of the parent and add a
+ // new time limit to make sure it won't block forever.
+ // It also helps us to be protected over repositories that are taking too long to complete
+ // a request, so no any other repository can be optimized.
+ ctx, cancel := context.WithTimeout(helper.SuppressCancellation(ctx), 5*time.Minute)
+ defer cancel()
+
start := time.Now()
if _, err := o.OptimizeRepository(ctx, optimizeReq); err != nil {
l.WithFields(map[string]interface{}{
diff --git a/internal/gitaly/maintenance/optimize_test.go b/internal/gitaly/maintenance/optimize_test.go
index 2ca182f13..8c03229d4 100644
--- a/internal/gitaly/maintenance/optimize_test.go
+++ b/internal/gitaly/maintenance/optimize_test.go
@@ -5,6 +5,7 @@ import (
"math/rand"
"path/filepath"
"testing"
+ "time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -121,3 +122,41 @@ func TestOptimizeReposRandomly(t *testing.T) {
})
}
}
+
+type mockOptimizerCancel struct {
+ t *testing.T
+ startedAt time.Time
+}
+
+func (m mockOptimizerCancel) OptimizeRepository(ctx context.Context, _ *gitalypb.OptimizeRepositoryRequest, _ ...grpc.CallOption) (*gitalypb.OptimizeRepositoryResponse, error) {
+ timeline, ok := ctx.Deadline()
+ if assert.True(m.t, ok) {
+ assert.True(m.t, timeline.After(m.startedAt), m.startedAt)
+ future := m.startedAt.Add(10 * time.Minute)
+ assert.True(m.t, timeline.Before(future), future)
+ }
+ return &gitalypb.OptimizeRepositoryResponse{}, nil
+}
+
+func TestOptimizeReposRandomly_cancellationOverride(t *testing.T) {
+ cfgBuilder := testcfg.NewGitalyCfgBuilder()
+ cfg := cfgBuilder.Build(t)
+
+ gittest.InitRepo(t, cfg, cfg.Storages[0])
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ // The timeout should be overwritten by the default 5 min timeout.
+ //nolint:forbidigo // We're explicitly testing deadline override.
+ ctx, cancel = context.WithTimeout(ctx, 72*time.Hour)
+ defer cancel()
+
+ ticker := helper.NewManualTicker()
+ ticker.Tick()
+
+ mo := &mockOptimizerCancel{t: t, startedAt: time.Now()}
+ walker := OptimizeReposRandomly(cfg.Storages, mo, ticker, rand.New(rand.NewSource(1)))
+
+ require.NoError(t, walker(ctx, testhelper.NewDiscardingLogEntry(t), []string{cfg.Storages[0].Name}))
+}