Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'internal/praefect/nodes/health_manager.go')
-rw-r--r--internal/praefect/nodes/health_manager.go21
1 files changed, 16 insertions, 5 deletions
diff --git a/internal/praefect/nodes/health_manager.go b/internal/praefect/nodes/health_manager.go
index 32f174533..b8e34698f 100644
--- a/internal/praefect/nodes/health_manager.go
+++ b/internal/praefect/nodes/health_manager.go
@@ -44,9 +44,14 @@ type HealthManager struct {
praefectName string
// healthCheckTimeout is the duration after a health check attempt times out.
healthCheckTimeout time.Duration
-
- firstUpdate bool
- updated chan struct{}
+ // databaseTimeout applies the timeout for the database update. It returns a context with
+ // the timeout applied and a cancellation function. This should be shorter than the failover
+ // timeout, otherwise it is possible that the updated health checks are immediately considered
+ // outdated after the update has finished. This can be difficult to debug as Gitaly nodes are
+ // seemingly responding to the health checks but are considered outdated by Praefect.
+ databaseTimeout func(context.Context) (context.Context, func())
+ firstUpdate bool
+ updated chan struct{}
locallyHealthy atomic.Value
}
@@ -70,8 +75,11 @@ func NewHealthManager(
},
praefectName: praefectName,
healthCheckTimeout: healthcheckTimeout,
- firstUpdate: true,
- updated: make(chan struct{}, 1),
+ databaseTimeout: func(ctx context.Context) (context.Context, func()) {
+ return context.WithTimeout(ctx, 5*time.Second)
+ },
+ firstUpdate: true,
+ updated: make(chan struct{}, 1),
}
hm.locallyHealthy.Store(make(map[string][]string, len(clients)))
@@ -130,6 +138,9 @@ func (hm *HealthManager) updateHealthChecks(ctx context.Context, virtualStorages
hm.locallyHealthy.Store(locallyHealthy)
+ ctx, cancel := hm.databaseTimeout(ctx)
+ defer cancel()
+
if _, err := hm.db.ExecContext(ctx, `
INSERT INTO node_status (praefect_name, shard_name, node_name, last_contact_attempt_at, last_seen_active_at)
SELECT $1, shard_name, node_name, NOW(), CASE WHEN is_healthy THEN NOW() ELSE NULL END