diff options
author | Jacob Vosmaer (GitLab) <jacob@gitlab.com> | 2017-12-15 20:08:51 +0300 |
---|---|---|
committer | Jacob Vosmaer (GitLab) <jacob@gitlab.com> | 2017-12-15 20:08:51 +0300 |
commit | c4c65e51a4f466ec5398f79dd32e2530c4e9b917 (patch) | |
tree | 215d6bff90d32b0990454c91a992db6ff5996aa0 | |
parent | 3975c425123f1e7994e7f2ee9164145b91bfcf1c (diff) | |
parent | e0b3b9b6719aefe719568a7a3b0ef4bf2e90260d (diff) |
Merge branch 'limit-spawn-concurrency' into 'master'
Limit the number of concurrent process spawns
See merge request gitlab-org/gitaly!492
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | internal/command/command.go | 26 | ||||
-rw-r--r-- | internal/command/command_test.go | 47 | ||||
-rw-r--r-- | internal/command/spawnconfig.go | 36 |
4 files changed, 109 insertions, 2 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 057fb63bd..eb084e760 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ UNRELEASED +- Limit the number of concurrent process spawns + https://gitlab.com/gitlab-org/gitaly/merge_requests/492 - Update vendored gitlab_git to 858edadf781c0cc54b15832239c19fca378518ad https://gitlab.com/gitlab-org/gitaly/merge_requests/493 - Eagerly close logrus writer pipes diff --git a/internal/command/command.go b/internal/command/command.go index a80adca02..ae3da3d2d 100644 --- a/internal/command/command.go +++ b/internal/command/command.go @@ -12,10 +12,9 @@ import ( "syscall" "time" - "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus" - "gitlab.com/gitlab-org/gitaly/internal/config" + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus" log "github.com/sirupsen/logrus" ) @@ -97,6 +96,7 @@ func WaitAllDone() { wg.Wait() } +type spawnTimeoutError error type contextWithoutDonePanic string // New creates a Command from an exec.Cmd. On success, the Command @@ -107,6 +107,28 @@ func New(ctx context.Context, cmd *exec.Cmd, stdin io.Reader, stdout, stderr io. panic(contextWithoutDonePanic("command spawned with context without Done() channel")) } + // Go has a global lock (syscall.ForkLock) for spawning new processes. + // This select statement is a safety valve to prevent lots of Gitaly + // requests from piling up behind the ForkLock if forking for some reason + // slows down. This has happened in real life, see + // https://gitlab.com/gitlab-org/gitaly/issues/823. + select { + case spawnTokens <- struct{}{}: + defer func() { + // This function is deferred so that it runs even if 'newCommand' panics. + <-spawnTokens + }() + + return newCommand(ctx, cmd, stdin, stdout, stderr, env...) + case <-time.After(spawnConfig.Timeout): + return nil, spawnTimeoutError(fmt.Errorf("process spawn timed out after %v", spawnConfig.Timeout)) + case <-ctx.Done(): + return nil, ctx.Err() + } +} + +// Don't call 'newCommand', use 'New'. +func newCommand(ctx context.Context, cmd *exec.Cmd, stdin io.Reader, stdout, stderr io.Writer, env ...string) (*Command, error) { logPid := -1 defer func() { grpc_logrus.Extract(ctx).WithFields(log.Fields{ diff --git a/internal/command/command_test.go b/internal/command/command_test.go index 8b25bc073..2c7495a7b 100644 --- a/internal/command/command_test.go +++ b/internal/command/command_test.go @@ -7,6 +7,7 @@ import ( "os/exec" "strings" "testing" + "time" "github.com/stretchr/testify/require" ) @@ -58,3 +59,49 @@ func TestRejectEmptyContextDone(t *testing.T) { New(context.Background(), exec.Command("true"), nil, nil, nil) } + +func TestNewCommandTimeout(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + defer func(ch chan struct{}, t time.Duration) { + spawnTokens = ch + spawnConfig.Timeout = t + }(spawnTokens, spawnConfig.Timeout) + + // This unbuffered channel will behave like a full/blocked buffered channel. + spawnTokens = make(chan struct{}) + // Speed up the test by lowering the timeout + spawnTimeout := 200 * time.Millisecond + spawnConfig.Timeout = spawnTimeout + + testDeadline := time.After(1 * time.Second) + tick := time.After(spawnTimeout / 2) + + errCh := make(chan error) + go func() { + _, err := New(ctx, exec.Command("true"), nil, nil, nil) + errCh <- err + }() + + var err error + timePassed := false + +wait: + for { + select { + case err = <-errCh: + break wait + case <-tick: + timePassed = true + case <-testDeadline: + t.Fatal("test timed out") + } + } + + require.True(t, timePassed, "time must have passed") + require.Error(t, err) + + _, ok := err.(spawnTimeoutError) + require.True(t, ok, "type of error should be spawnTimeoutError") +} diff --git a/internal/command/spawnconfig.go b/internal/command/spawnconfig.go new file mode 100644 index 000000000..8d7136e7e --- /dev/null +++ b/internal/command/spawnconfig.go @@ -0,0 +1,36 @@ +package command + +import ( + "time" + + "github.com/kelseyhightower/envconfig" +) + +var ( + spawnTokens chan struct{} + spawnConfig SpawnConfig +) + +// SpawnConfig holds configuration for command spawning timeouts and parallelism. +type SpawnConfig struct { + // This default value (10 seconds) is very high. Spawning should take + // milliseconds or less. If we hit 10 seconds, something is wrong, and + // failing the request will create breathing room. Can be modified at + // runtime with the GITALY_COMMAND_SPAWN_TIMEOUT environment variable. + Timeout time.Duration `split_words:"true" default:"10s"` + + // MaxSpawnParallel limits the number of goroutines that can spawn a + // process at the same time. These parallel spawns will contend for a + // single lock (syscall.ForkLock) in exec.Cmd.Start(). Can be modified at + // runtime with the GITALY_COMMAND_SPAWN_MAX_PARALLEL variable. + // + // Note that this does not limit the total number of child processes that + // can be attached to Gitaly at the same time. It only limits the rate at + // which we can create new child processes. + MaxParallel int `split_words:"true" default:"100"` +} + +func init() { + envconfig.MustProcess("gitaly_command_spawn", &spawnConfig) + spawnTokens = make(chan struct{}, spawnConfig.MaxParallel) +} |