Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-05-09 13:29:09 +0300
committerQuang-Minh Nguyen <qmnguyen@gitlab.com>2023-05-12 05:48:28 +0300
commit8007eaff386b51963e4bbf29d61bb35aef673e03 (patch)
tree38dd11d0d457bd05330c47f4eb26e78ea5ca8fc1 /internal/command
parentf3c8cde33d58e30dfdc11ee88295d6ba3dd80c88 (diff)
Return ResourceExhausted code when facing spawn token timeout
The official gRPC documentation (https://grpc.github.io/grpc/core/md_doc_statuscodes.html) clearly distinguishes between different status codes, in particular: - RESOURCE_EXHAUSTED: Some resource has been exhausted, perhaps a per-user quota, or perhaps the entire file system is out of space. - INTERNAL: Internal errors. This means that some invariants expected by the underlying system have been broken. This error code is reserved for serious errors. The spawn token system, developed by our team, serves as a means of managing underlying fork/exec operations. When a process is unable to create due to spawn token shortage, it can be viewed as a resource issue, aligning with the definition of the ResourceExhausted error code. This type of error is common in comparable scenarios, making it a predictable outcome. It would be appropriate to reserve the Internal error code for unexpected occurrences instead. One more thing. In https://gitlab.com/groups/gitlab-org/-/epics/7891, I'm currently implementing a pushback feature for clients who encounter specific error codes. By converting these errors to ResourceExhausted, clients will be forced to perform transparent retries in an exponential and automatic manner when we add the support in client-side. At present, we temporarily disable this ability by setting the retry-after time to 0. This will ultimately have a positive impact on the system.
Diffstat (limited to 'internal/command')
-rw-r--r--internal/command/command_test.go17
-rw-r--r--internal/command/spawntoken.go15
-rw-r--r--internal/command/spawntoken_test.go20
3 files changed, 45 insertions, 7 deletions
diff --git a/internal/command/command_test.go b/internal/command/command_test.go
index 8eda53261..dd23c85d6 100644
--- a/internal/command/command_test.go
+++ b/internal/command/command_test.go
@@ -21,7 +21,11 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v16/internal/cgroups"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
"gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/protobuf/types/known/durationpb"
)
func TestNew_environment(t *testing.T) {
@@ -196,7 +200,18 @@ func TestNew_spawnTimeout(t *testing.T) {
// And after some time we expect that spawning of the command fails due to the configured
// timeout.
- require.Equal(t, fmt.Errorf("process spawn timed out after 200ms"), <-errCh)
+ err := <-errCh
+ var structErr structerr.Error
+ require.ErrorAs(t, err, &structErr)
+ details := structErr.Details()
+ require.Len(t, details, 1)
+
+ limitErr, ok := details[0].(*gitalypb.LimitError)
+ require.True(t, ok)
+
+ testhelper.RequireGrpcCode(t, err, codes.ResourceExhausted)
+ require.Equal(t, "process spawn timed out after 200ms", limitErr.ErrorMessage)
+ require.Equal(t, durationpb.New(0), limitErr.RetryAfter)
}
func TestCommand_Wait_contextCancellationKillsCommand(t *testing.T) {
diff --git a/internal/command/spawntoken.go b/internal/command/spawntoken.go
index bdc4d664c..2b0065bf5 100644
--- a/internal/command/spawntoken.go
+++ b/internal/command/spawntoken.go
@@ -8,7 +8,10 @@ import (
"github.com/kelseyhightower/envconfig"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
"gitlab.com/gitlab-org/gitaly/v16/internal/tracing"
+ "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
+ "google.golang.org/protobuf/types/known/durationpb"
)
var (
@@ -60,22 +63,26 @@ func getSpawnToken(ctx context.Context) (putToken func(), err error) {
select {
case spawnTokens <- struct{}{}:
- logTime(ctx, start, "")
+ recordTime(ctx, start, "")
return func() {
<-spawnTokens
}, nil
case <-time.After(spawnConfig.Timeout):
- logTime(ctx, start, "spawn token timeout")
+ recordTime(ctx, start, "spawn token timeout")
spawnTimeoutCount.Inc()
- return nil, fmt.Errorf("process spawn timed out after %v", spawnConfig.Timeout)
+ msg := fmt.Sprintf("process spawn timed out after %v", spawnConfig.Timeout)
+ return nil, structerr.NewResourceExhausted(msg).WithDetail(&gitalypb.LimitError{
+ ErrorMessage: msg,
+ RetryAfter: durationpb.New(0),
+ })
case <-ctx.Done():
return nil, ctx.Err()
}
}
-func logTime(ctx context.Context, start time.Time, msg string) {
+func recordTime(ctx context.Context, start time.Time, msg string) {
delta := time.Since(start)
if stats := StatsFromContext(ctx); stats != nil {
diff --git a/internal/command/spawntoken_test.go b/internal/command/spawntoken_test.go
index 3f10d5bb3..62f534b7f 100644
--- a/internal/command/spawntoken_test.go
+++ b/internal/command/spawntoken_test.go
@@ -2,9 +2,14 @@ package command
import (
"testing"
+ "time"
"github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
"gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/protobuf/types/known/durationpb"
)
func TestGetSpawnToken_CommandStats(t *testing.T) {
@@ -27,7 +32,7 @@ func TestGetSpawnToken_CommandStats_timeout(t *testing.T) {
priorTimeout := spawnConfig.Timeout
priorSpawnTokens := spawnTokens
- spawnConfig.Timeout = 0
+ spawnConfig.Timeout = 1 * time.Millisecond
spawnTokens = make(chan struct{}, 1)
spawnTokens <- struct{}{}
defer func() {
@@ -39,7 +44,18 @@ func TestGetSpawnToken_CommandStats_timeout(t *testing.T) {
ctx = InitContextStats(ctx)
_, err := getSpawnToken(ctx)
- require.ErrorContains(t, err, "process spawn timed out after")
+
+ var structErr structerr.Error
+ require.ErrorAs(t, err, &structErr)
+ details := structErr.Details()
+ require.Len(t, details, 1)
+
+ limitErr, ok := details[0].(*gitalypb.LimitError)
+ require.True(t, ok)
+
+ testhelper.RequireGrpcCode(t, err, codes.ResourceExhausted)
+ require.Equal(t, "process spawn timed out after 1ms", limitErr.ErrorMessage)
+ require.Equal(t, durationpb.New(0), limitErr.RetryAfter)
stats := StatsFromContext(ctx)
require.NotNil(t, stats)