diff options
author | Pavlo Strokov <pstrokov@gitlab.com> | 2020-05-27 10:14:04 +0300 |
---|---|---|
committer | Pavlo Strokov <pstrokov@gitlab.com> | 2020-05-27 10:14:04 +0300 |
commit | a1fe1986c75312d557b73f13fe35587462c4ca5f (patch) | |
tree | a4b9efeeec72b029d1e895871282da95fa51eacb | |
parent | e3d5d49d77b1a1b5b2cda1f910711599c9454500 (diff) | |
parent | a3fea6d0096678d162446f307651e9f7e30fe1c1 (diff) |
Merge branch 'ps-praefect-graceful-stop' into 'master'
Praefect graceful stop
See merge request gitlab-org/gitaly!2210
-rw-r--r-- | changelogs/unreleased/ps-praefect-graceful-stop.yml | 5 | ||||
-rw-r--r-- | cmd/gitaly/main.go | 2 | ||||
-rw-r--r-- | cmd/praefect/main.go | 2 | ||||
-rw-r--r-- | config.praefect.toml.example | 3 | ||||
-rw-r--r-- | internal/bootstrap/bootstrap.go | 11 | ||||
-rw-r--r-- | internal/bootstrap/bootstrap_test.go | 28 | ||||
-rw-r--r-- | internal/config/config.go | 4 | ||||
-rw-r--r-- | internal/config/ruby.go | 31 | ||||
-rw-r--r-- | internal/praefect/config/config.go | 34 | ||||
-rw-r--r-- | internal/praefect/config/config_test.go | 30 | ||||
-rw-r--r-- | internal/praefect/config/testdata/config.empty.toml | 0 | ||||
-rw-r--r-- | internal/praefect/config/testdata/config.toml | 1 |
12 files changed, 94 insertions, 57 deletions
diff --git a/changelogs/unreleased/ps-praefect-graceful-stop.yml b/changelogs/unreleased/ps-praefect-graceful-stop.yml new file mode 100644 index 000000000..a7aa5f2bb --- /dev/null +++ b/changelogs/unreleased/ps-praefect-graceful-stop.yml @@ -0,0 +1,5 @@ +--- +title: Praefect graceful stop +merge_request: 2210 +author: +type: fixed diff --git a/cmd/gitaly/main.go b/cmd/gitaly/main.go index 39360c2b4..05f8b599a 100644 --- a/cmd/gitaly/main.go +++ b/cmd/gitaly/main.go @@ -148,5 +148,5 @@ func run(b *bootstrap.Bootstrap) error { return fmt.Errorf("initialize gitaly-ruby: %v", err) } - return b.Wait() + return b.Wait(config.Config.GracefulRestartTimeout) } diff --git a/cmd/praefect/main.go b/cmd/praefect/main.go index 3c8ec20a5..ba4f4572b 100644 --- a/cmd/praefect/main.go +++ b/cmd/praefect/main.go @@ -314,7 +314,7 @@ func run(cfgs []starter.Config, conf config.Config) error { repl.ProcessBacklog(ctx, praefect.ExpBackoffFunc(1*time.Second, 5*time.Second)) - return b.Wait() + return b.Wait(conf.GracefulStopTimeout.Duration()) } func getStarterConfigs(socketPath, listenAddr string) ([]starter.Config, error) { diff --git a/config.praefect.toml.example b/config.praefect.toml.example index cec16030a..8b768a93d 100644 --- a/config.praefect.toml.example +++ b/config.praefect.toml.example @@ -4,6 +4,9 @@ listen_addr = "127.0.0.1:2305" # # Praefect can listen on a socket when placed on the same machine as all clients # socket_path = "/home/git/gitlab/tmp/sockets/private/praefect.socket" +# # Optional: grace period before a praefect process is forcibly terminated (duration) +# # Defaults to "1m" +# graceful_stop_timeout = "30s" # # Optional: export metrics via Prometheus # prometheus_listen_addr = "127.0.01:10101" # # You can optionally configure Praefect to output JSON-formatted log messages to stdout diff --git a/internal/bootstrap/bootstrap.go b/internal/bootstrap/bootstrap.go index 839906003..2c8c35d9a 100644 --- a/internal/bootstrap/bootstrap.go +++ b/internal/bootstrap/bootstrap.go @@ -10,7 +10,6 @@ import ( "github.com/cloudflare/tableflip" log "github.com/sirupsen/logrus" - "gitlab.com/gitlab-org/gitaly/internal/config" "golang.org/x/sys/unix" ) @@ -147,7 +146,7 @@ func (b *Bootstrap) Start() error { // Wait will signal process readiness to the parent and than wait for an exit condition // SIGTERM, SIGINT and a runtime error will trigger an immediate shutdown // in case of an upgrade there will be a grace period to complete the ongoing requests -func (b *Bootstrap) Wait() error { +func (b *Bootstrap) Wait(gracefulTimeout time.Duration) error { signals := []os.Signal{syscall.SIGTERM, syscall.SIGINT} immediateShutdown := make(chan os.Signal, len(signals)) signal.Notify(immediateShutdown, signals...) @@ -163,7 +162,7 @@ func (b *Bootstrap) Wait() error { // the new process signaled its readiness and we started a graceful stop // however no further upgrades can be started until this process is running // we set a grace period and then we force a termination. - waitError := b.waitGracePeriod(immediateShutdown) + waitError := b.waitGracePeriod(gracefulTimeout, immediateShutdown) err = fmt.Errorf("graceful upgrade: %v", waitError) case s := <-immediateShutdown: @@ -174,8 +173,8 @@ func (b *Bootstrap) Wait() error { return err } -func (b *Bootstrap) waitGracePeriod(kill <-chan os.Signal) error { - log.WithField("graceful_restart_timeout", config.Config.GracefulRestartTimeout).Warn("starting grace period") +func (b *Bootstrap) waitGracePeriod(gracefulTimeout time.Duration, kill <-chan os.Signal) error { + log.WithField("graceful_timeout", gracefulTimeout).Warn("starting grace period") allServersDone := make(chan struct{}) go func() { @@ -186,7 +185,7 @@ func (b *Bootstrap) waitGracePeriod(kill <-chan os.Signal) error { }() select { - case <-time.After(config.Config.GracefulRestartTimeout): + case <-time.After(gracefulTimeout): return fmt.Errorf("grace period expired") case <-kill: return fmt.Errorf("force shutdown") diff --git a/internal/bootstrap/bootstrap_test.go b/internal/bootstrap/bootstrap_test.go index 16e85fe65..23c49209b 100644 --- a/internal/bootstrap/bootstrap_test.go +++ b/internal/bootstrap/bootstrap_test.go @@ -14,11 +14,8 @@ import ( "time" "github.com/stretchr/testify/require" - "gitlab.com/gitlab-org/gitaly/internal/config" ) -var testConfigGracefulRestartTimeout = 2 * time.Second - type mockUpgrader struct { exit chan struct{} hasParent bool @@ -107,7 +104,7 @@ func TestImmediateTerminationOnSocketError(t *testing.T) { b, server := makeBootstrap(t) waitCh := make(chan error) - go func() { waitCh <- b.Wait() }() + go func() { waitCh <- b.Wait(2 * time.Second) }() require.NoError(t, server.listeners["tcp"].Close(), "Closing first listener") @@ -124,7 +121,7 @@ func TestImmediateTerminationOnSignal(t *testing.T) { done := server.slowRequest(3 * time.Minute) waitCh := make(chan error) - go func() { waitCh <- b.Wait() }() + go func() { waitCh <- b.Wait(2 * time.Second) }() // make sure we are inside b.Wait() or we'll kill the test suite time.Sleep(100 * time.Millisecond) @@ -148,7 +145,7 @@ func TestImmediateTerminationOnSignal(t *testing.T) { func TestGracefulTerminationStuck(t *testing.T) { b, server := makeBootstrap(t) - err := testGracefulUpdate(t, server, b, testConfigGracefulRestartTimeout+(1*time.Second), nil) + err := testGracefulUpdate(t, server, b, 3*time.Second, 2*time.Second, nil) require.Contains(t, err.Error(), "grace period expired") } @@ -160,7 +157,7 @@ func TestGracefulTerminationWithSignals(t *testing.T) { t.Run(sig.String(), func(t *testing.T) { b, server := makeBootstrap(t) - err := testGracefulUpdate(t, server, b, 1*time.Second, func() { + err := testGracefulUpdate(t, server, b, 1*time.Second, 2*time.Second, func() { require.NoError(t, self.Signal(sig)) }) require.Contains(t, err.Error(), "force shutdown") @@ -178,14 +175,14 @@ func TestGracefulTerminationServerErrors(t *testing.T) { require.NoError(t, server.listeners["unix"].Close()) // we start a new TCP request that if faster than the grace period - req := server.slowRequest(config.Config.GracefulRestartTimeout / 2) + req := server.slowRequest(time.Second) done <- <-req close(done) server.server.Shutdown(context.Background()) } - err := testGracefulUpdate(t, server, b, testConfigGracefulRestartTimeout+(1*time.Second), nil) + err := testGracefulUpdate(t, server, b, 3*time.Second, 2*time.Second, nil) require.Contains(t, err.Error(), "grace period expired") require.NoError(t, <-done) @@ -197,7 +194,7 @@ func TestGracefulTermination(t *testing.T) { // Using server.Close we bypass the graceful shutdown faking a completed shutdown b.StopAction = func() { server.server.Close() } - err := testGracefulUpdate(t, server, b, 1*time.Second, nil) + err := testGracefulUpdate(t, server, b, 1*time.Second, 2*time.Second, nil) require.Contains(t, err.Error(), "completed") } @@ -219,17 +216,12 @@ func TestPortReuse(t *testing.T) { require.NoError(t, l.Close()) } -func testGracefulUpdate(t *testing.T, server *testServer, b *Bootstrap, waitTimeout time.Duration, duringGracePeriodCallback func()) error { - defer func(oldVal time.Duration) { - config.Config.GracefulRestartTimeout = oldVal - }(config.Config.GracefulRestartTimeout) - config.Config.GracefulRestartTimeout = testConfigGracefulRestartTimeout - +func testGracefulUpdate(t *testing.T, server *testServer, b *Bootstrap, waitTimeout, gracefulWait time.Duration, duringGracePeriodCallback func()) error { waitCh := make(chan error) - go func() { waitCh <- b.Wait() }() + go func() { waitCh <- b.Wait(gracefulWait) }() // Start a slow request to keep the old server from shutting down immediately. - req := server.slowRequest(2 * config.Config.GracefulRestartTimeout) + req := server.slowRequest(2 * gracefulWait) // make sure slow request is being handled time.Sleep(100 * time.Millisecond) diff --git a/internal/config/config.go b/internal/config/config.go index 98dbef3cc..306ec3cbf 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -48,7 +48,7 @@ type Cfg struct { Hooks Hooks `toml:"hooks"` Concurrency []Concurrency `toml:"concurrency"` GracefulRestartTimeout time.Duration - GracefulRestartTimeoutToml duration `toml:"graceful_restart_timeout"` + GracefulRestartTimeoutToml Duration `toml:"graceful_restart_timeout"` InternalSocketDir string `toml:"internal_socket_dir"` } @@ -169,7 +169,7 @@ func Validate() error { } func (c *Cfg) setDefaults() { - c.GracefulRestartTimeout = c.GracefulRestartTimeoutToml.Duration + c.GracefulRestartTimeout = c.GracefulRestartTimeoutToml.Duration() if c.GracefulRestartTimeout == 0 { c.GracefulRestartTimeout = 1 * time.Minute } diff --git a/internal/config/ruby.go b/internal/config/ruby.go index 6cf4e2e52..e5e43c170 100644 --- a/internal/config/ruby.go +++ b/internal/config/ruby.go @@ -11,28 +11,39 @@ type Ruby struct { Dir string `toml:"dir"` MaxRSS int `toml:"max_rss"` GracefulRestartTimeout time.Duration - GracefulRestartTimeoutToml duration `toml:"graceful_restart_timeout"` + GracefulRestartTimeoutToml Duration `toml:"graceful_restart_timeout"` RestartDelay time.Duration - RestartDelayToml duration `toml:"restart_delay"` + RestartDelayToml Duration `toml:"restart_delay"` NumWorkers int `toml:"num_workers"` LinguistLanguagesPath string `toml:"linguist_languages_path"` RuggedGitConfigSearchPath string `toml:"rugged_git_config_search_path"` } -// This type is a trick to let our TOML library parse durations from strings. -type duration struct { - time.Duration +// Duration is a trick to let our TOML library parse durations from strings. +type Duration time.Duration + +func (d *Duration) Duration() time.Duration { + if d != nil { + return time.Duration(*d) + } + return 0 } -func (d *duration) UnmarshalText(text []byte) error { - var err error - d.Duration, err = time.ParseDuration(string(text)) +func (d *Duration) UnmarshalText(text []byte) error { + td, err := time.ParseDuration(string(text)) + if err == nil { + *d = Duration(td) + } return err } +func (d Duration) MarshalText() ([]byte, error) { + return []byte(time.Duration(d).String()), nil +} + // ConfigureRuby validates the gitaly-ruby configuration and sets default values. func ConfigureRuby() error { - Config.Ruby.GracefulRestartTimeout = Config.Ruby.GracefulRestartTimeoutToml.Duration + Config.Ruby.GracefulRestartTimeout = Config.Ruby.GracefulRestartTimeoutToml.Duration() if Config.Ruby.GracefulRestartTimeout == 0 { Config.Ruby.GracefulRestartTimeout = 10 * time.Minute } @@ -41,7 +52,7 @@ func ConfigureRuby() error { Config.Ruby.MaxRSS = 200 * 1024 * 1024 } - Config.Ruby.RestartDelay = Config.Ruby.RestartDelayToml.Duration + Config.Ruby.RestartDelay = Config.Ruby.RestartDelayToml.Duration() if Config.Ruby.RestartDelay == 0 { Config.Ruby.RestartDelay = 5 * time.Minute } diff --git a/internal/praefect/config/config.go b/internal/praefect/config/config.go index adfd183dc..98d914ba9 100644 --- a/internal/praefect/config/config.go +++ b/internal/praefect/config/config.go @@ -3,10 +3,11 @@ package config import ( "errors" "fmt" - "os" "strings" + "time" "github.com/BurntSushi/toml" + "gitlab.com/gitlab-org/gitaly/internal/config" "gitlab.com/gitlab-org/gitaly/internal/config/auth" "gitlab.com/gitlab-org/gitaly/internal/config/log" "gitlab.com/gitlab-org/gitaly/internal/config/prometheus" @@ -34,8 +35,9 @@ type Config struct { DB `toml:"database"` Failover Failover `toml:"failover"` // Keep for legacy reasons: remove after Omnibus has switched - FailoverEnabled bool `toml:"failover_enabled"` - MemoryQueueEnabled bool `toml:"memory_queue_enabled"` + FailoverEnabled bool `toml:"failover_enabled"` + MemoryQueueEnabled bool `toml:"memory_queue_enabled"` + GracefulStopTimeout config.Duration `toml:"graceful_stop_timeout"` } // VirtualStorage represents a set of nodes for a storage @@ -46,22 +48,20 @@ type VirtualStorage struct { // FromFile loads the config for the passed file path func FromFile(filePath string) (Config, error) { - config := &Config{} - cfgFile, err := os.Open(filePath) - if err != nil { - return *config, err + conf := &Config{} + if _, err := toml.DecodeFile(filePath, conf); err != nil { + return Config{}, err } - defer cfgFile.Close() - _, err = toml.DecodeReader(cfgFile, config) + conf.setDefaults() // TODO: Remove this after failover_enabled has moved under a separate failover section. This is for // backwards compatibility only - if config.FailoverEnabled { - config.Failover.Enabled = true + if conf.FailoverEnabled { + conf.Failover.Enabled = true } - return *config, err + return *conf, nil } var ( @@ -79,7 +79,7 @@ var ( ) // Validate establishes if the config is valid -func (c Config) Validate() error { +func (c *Config) Validate() error { if c.ListenAddr == "" && c.SocketPath == "" { return errNoListener } @@ -144,10 +144,16 @@ func (c Config) Validate() error { } // NeedsSQL returns true if the driver for SQL needs to be initialized -func (c Config) NeedsSQL() bool { +func (c *Config) NeedsSQL() bool { return !c.MemoryQueueEnabled || (c.Failover.Enabled && c.Failover.ElectionStrategy == "sql") } +func (c *Config) setDefaults() { + if c.GracefulStopTimeout.Duration() == 0 { + c.GracefulStopTimeout = config.Duration(time.Minute) + } +} + // DB holds Postgres client configuration data. type DB struct { Host string `toml:"host"` diff --git a/internal/praefect/config/config_test.go b/internal/praefect/config/config_test.go index c85894b08..9fff7b1d3 100644 --- a/internal/praefect/config/config_test.go +++ b/internal/praefect/config/config_test.go @@ -1,10 +1,14 @@ package config import ( + "errors" + "os" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/config" "gitlab.com/gitlab-org/gitaly/internal/config/log" gitaly_prometheus "gitlab.com/gitlab-org/gitaly/internal/config/prometheus" "gitlab.com/gitlab-org/gitaly/internal/config/sentry" @@ -209,10 +213,13 @@ func TestConfigValidation(t *testing.T) { func TestConfigParsing(t *testing.T) { testCases := []struct { - filePath string - expected Config + desc string + filePath string + expected Config + expectedErr error }{ { + desc: "check all configuration values", filePath: "testdata/config.toml", expected: Config{ Logging: log.Config{ @@ -257,16 +264,29 @@ func TestConfigParsing(t *testing.T) { SSLKey: "/path/to/key", SSLRootCert: "/path/to/root-cert", }, - MemoryQueueEnabled: true, + MemoryQueueEnabled: true, + GracefulStopTimeout: config.Duration(30 * time.Second), }, }, + { + desc: "empty config yields default values", + filePath: "testdata/config.empty.toml", + expected: Config{ + GracefulStopTimeout: config.Duration(time.Minute), + }, + }, + { + desc: "config file does not exist", + filePath: "testdata/config.invalid-path.toml", + expectedErr: os.ErrNotExist, + }, } for _, tc := range testCases { - t.Run(tc.filePath, func(t *testing.T) { + t.Run(tc.desc, func(t *testing.T) { cfg, err := FromFile(tc.filePath) - require.NoError(t, err) require.Equal(t, tc.expected, cfg) + require.True(t, errors.Is(err, tc.expectedErr), "actual error: %v", err) }) } } diff --git a/internal/praefect/config/testdata/config.empty.toml b/internal/praefect/config/testdata/config.empty.toml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/internal/praefect/config/testdata/config.empty.toml diff --git a/internal/praefect/config/testdata/config.toml b/internal/praefect/config/testdata/config.toml index 0ed72932e..0628d9034 100644 --- a/internal/praefect/config/testdata/config.toml +++ b/internal/praefect/config/testdata/config.toml @@ -2,6 +2,7 @@ listen_addr = "" socket_path = "" prometheus_listen_addr = "" memory_queue_enabled = true +graceful_stop_timeout = "30s" [logging] format = "json" |