diff options
author | Pavlo Strokov <pstrokov@gitlab.com> | 2021-11-02 17:11:40 +0300 |
---|---|---|
committer | Toon Claes <toon@gitlab.com> | 2021-11-12 17:34:09 +0300 |
commit | 9e1a05c6e8c33e39a60fee334ca6a384b6c25a29 (patch) | |
tree | 9b980d1063fd99dd5f8c0c4eea613bc9a228e0a2 /cmd | |
parent | 7a421817d51abd1fa830353b706e585d77492608 (diff) |
praefect: Praefect should not hang if db is not reachable
If the database is not accessible the at the time of Praefect
start it hangs forever and doesn't report much information
about the reason. The attempt to fix it by using PingContext
with timeout set on the context failed because of the issue
in the lib/pq that is not yet fixed. The problem is solved
by a separate goroutine that issues the PingContext and main
goroutine watching the timeout. If timeout occurs it returns
an error up to the caller. Another goroutine won't be over
and hangs, but we don't care much as it is a critical error
and the service will be terminated.
Closes: https://gitlab.com/gitlab-org/gitaly/-/issues/3678
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/praefect/main.go | 17 | ||||
-rw-r--r-- | cmd/praefect/subcmd.go | 6 | ||||
-rw-r--r-- | cmd/praefect/subcmd_list_untracked_repositories.go | 4 | ||||
-rw-r--r-- | cmd/praefect/subcmd_remove_repository.go | 7 | ||||
-rw-r--r-- | cmd/praefect/subcmd_remove_repository_test.go | 2 | ||||
-rw-r--r-- | cmd/praefect/subcmd_track_repository.go | 10 | ||||
-rw-r--r-- | cmd/praefect/subcmd_track_repository_test.go | 4 |
7 files changed, 32 insertions, 18 deletions
diff --git a/cmd/praefect/main.go b/cmd/praefect/main.go index 262b2f90d..ed8333b1a 100644 --- a/cmd/praefect/main.go +++ b/cmd/praefect/main.go @@ -208,20 +208,21 @@ func run(cfgs []starter.Config, conf config.Config, b bootstrap.Listener, promre return err } - var db *sql.DB + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + var db *sql.DB if conf.NeedsSQL() { - dbConn, closedb, err := initDatabase(logger, conf) + logger.Infof("establishing database connection to %s:%d ...", conf.DB.Host, conf.DB.Port) + dbConn, closedb, err := initDatabase(ctx, logger, conf) if err != nil { return err } defer closedb() db = dbConn + logger.Info("database connection established") } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - var queue datastore.ReplicationEventQueue var rs datastore.RepositoryStore var csg datastore.ConsistentStoragesGetter @@ -517,8 +518,10 @@ func getStarterConfigs(conf config.Config) ([]starter.Config, error) { return cfgs, nil } -func initDatabase(logger *logrus.Entry, conf config.Config) (*sql.DB, func(), error) { - db, err := glsql.OpenDB(conf.DB) +func initDatabase(ctx context.Context, logger *logrus.Entry, conf config.Config) (*sql.DB, func(), error) { + openDBCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + db, err := glsql.OpenDB(openDBCtx, conf.DB) if err != nil { logger.WithError(err).Error("SQL connection open failed") return nil, nil, err diff --git a/cmd/praefect/subcmd.go b/cmd/praefect/subcmd.go index 21279939c..4edf9573d 100644 --- a/cmd/praefect/subcmd.go +++ b/cmd/praefect/subcmd.go @@ -89,7 +89,11 @@ func getNodeAddress(cfg config.Config) (string, error) { } func openDB(conf config.DB) (*sql.DB, func(), error) { - db, err := glsql.OpenDB(conf) + ctx := context.Background() + + openDBCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + db, err := glsql.OpenDB(openDBCtx, conf) if err != nil { return nil, nil, fmt.Errorf("sql open: %v", err) } diff --git a/cmd/praefect/subcmd_list_untracked_repositories.go b/cmd/praefect/subcmd_list_untracked_repositories.go index e74f02315..0abf9c381 100644 --- a/cmd/praefect/subcmd_list_untracked_repositories.go +++ b/cmd/praefect/subcmd_list_untracked_repositories.go @@ -72,7 +72,9 @@ func (cmd listUntrackedRepositories) Exec(flags *flag.FlagSet, cfg config.Config logger.Debug("connected to gitaly nodes") logger.Debug("connecting to praefect database...") - db, err := glsql.OpenDB(cfg.DB) + openDBCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + db, err := glsql.OpenDB(openDBCtx, cfg.DB) if err != nil { return fmt.Errorf("connect to database: %w", err) } diff --git a/cmd/praefect/subcmd_remove_repository.go b/cmd/praefect/subcmd_remove_repository.go index b9e0bfb07..ded83ad2e 100644 --- a/cmd/praefect/subcmd_remove_repository.go +++ b/cmd/praefect/subcmd_remove_repository.go @@ -62,14 +62,17 @@ func (cmd removeRepository) Exec(flags *flag.FlagSet, cfg config.Config) error { case cmd.relativePath == "": return requiredParameterError(paramRelativePath) } + ctx := context.Background() - db, err := glsql.OpenDB(cfg.DB) + openDBCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + db, err := glsql.OpenDB(openDBCtx, cfg.DB) if err != nil { return fmt.Errorf("connect to database: %w", err) } defer func() { _ = db.Close() }() - ctx := correlation.ContextWithCorrelation(context.Background(), correlation.SafeRandomID()) + ctx = correlation.ContextWithCorrelation(ctx, correlation.SafeRandomID()) logger := cmd.logger.WithField("correlation_id", correlation.ExtractFromContext(ctx)) return cmd.exec(ctx, logger, db, cfg) diff --git a/cmd/praefect/subcmd_remove_repository_test.go b/cmd/praefect/subcmd_remove_repository_test.go index 98ea5a8bd..5f9771234 100644 --- a/cmd/praefect/subcmd_remove_repository_test.go +++ b/cmd/praefect/subcmd_remove_repository_test.go @@ -61,7 +61,7 @@ func TestRemoveRepository_Exec_invalidArgs(t *testing.T) { cfg := config.Config{DB: config.DB{Host: "stub", SSLMode: "disable"}} err := cmd.Exec(flag.NewFlagSet("", flag.PanicOnError), cfg) require.Error(t, err) - require.Contains(t, err.Error(), "connect to database: dial tcp: lookup stub") + require.Contains(t, err.Error(), "connect to database: send ping: dial tcp: lookup stub") }) t.Run("praefect address is not set in config ", func(t *testing.T) { diff --git a/cmd/praefect/subcmd_track_repository.go b/cmd/praefect/subcmd_track_repository.go index 335b353a1..2adf04531 100644 --- a/cmd/praefect/subcmd_track_repository.go +++ b/cmd/praefect/subcmd_track_repository.go @@ -65,15 +65,17 @@ func (cmd trackRepository) Exec(flags *flag.FlagSet, cfg config.Config) error { } } - db, err := glsql.OpenDB(cfg.DB) + ctx := correlation.ContextWithCorrelation(context.Background(), correlation.SafeRandomID()) + logger := cmd.logger.WithField("correlation_id", correlation.ExtractFromContext(ctx)) + + openDBCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + db, err := glsql.OpenDB(openDBCtx, cfg.DB) if err != nil { return fmt.Errorf("connect to database: %w", err) } defer func() { _ = db.Close() }() - ctx := correlation.ContextWithCorrelation(context.Background(), correlation.SafeRandomID()) - logger := cmd.logger.WithField("correlation_id", correlation.ExtractFromContext(ctx)) - return cmd.exec(ctx, logger, db, cfg) } diff --git a/cmd/praefect/subcmd_track_repository_test.go b/cmd/praefect/subcmd_track_repository_test.go index 609f798dd..c6195ad99 100644 --- a/cmd/praefect/subcmd_track_repository_test.go +++ b/cmd/praefect/subcmd_track_repository_test.go @@ -62,11 +62,11 @@ func TestAddRepository_Exec_invalidArgs(t *testing.T) { }) t.Run("db connection error", func(t *testing.T) { - cmd := trackRepository{virtualStorage: "stub", relativePath: "stub", authoritativeStorage: "storage-0"} + cmd := trackRepository{virtualStorage: "stub", relativePath: "stub", authoritativeStorage: "storage-0", logger: testhelper.NewTestLogger(t)} cfg := config.Config{DB: config.DB{Host: "stub", SSLMode: "disable"}} err := cmd.Exec(flag.NewFlagSet("", flag.PanicOnError), cfg) require.Error(t, err) - require.Contains(t, err.Error(), "connect to database: dial tcp: lookup stub") + require.Contains(t, err.Error(), "connect to database: send ping: dial tcp: lookup stub") }) } |