Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2022-11-30 23:51:01 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2022-11-30 23:54:22 +0300
commit36e2349865ae8c713923f4fe8b7b8973a5104d73 (patch)
tree36688d9b34a5496f7b135205ef43c2302deedaa0
parentab405cca9de3358f498615344c663279eaa9922f (diff)
Support health checking connections locally only
HealthManager is responsible for health checking connections to Gitaly in Praefect. Doing so, it also persists the information in the database so the other Praefect's can take the connection status of other Praefects in consideration when promoting primaries and so forth. As we are about to add a second set of connections behind a feature flag, we want to health check the connections so we don't route traffic to them if they are unhealthy but we don't want to the status of the feature flagged connections affect the health consensus used for the elections. This commit makes this possible with the health manager. If the HealthManager is not provided a database handle, it will simply locally check the connections without persisting any information in the database.
-rw-r--r--internal/praefect/nodes/health_manager.go25
-rw-r--r--internal/praefect/nodes/health_manager_test.go25
2 files changed, 41 insertions, 9 deletions
diff --git a/internal/praefect/nodes/health_manager.go b/internal/praefect/nodes/health_manager.go
index 21cdfc9fe..0743bf520 100644
--- a/internal/praefect/nodes/health_manager.go
+++ b/internal/praefect/nodes/health_manager.go
@@ -58,6 +58,9 @@ type HealthManager struct {
// NewHealthManager returns a new health manager that monitors which nodes in the cluster
// are healthy.
+//
+// If db is nil, the HealthManager checks the connection health normally but doesn't persist
+// any information about the nodes in the database.
func NewHealthManager(
log logrus.FieldLogger,
db glsql.Querier,
@@ -138,10 +141,13 @@ func (hm *HealthManager) updateHealthChecks(ctx context.Context, virtualStorages
hm.locallyHealthy.Store(locallyHealthy)
- ctx, cancel := hm.databaseTimeout(ctx)
- defer cancel()
+ if hm.db != nil {
+ // Database is nil only when an alternative set of connections is being tested behind a feature flag
+ // and we do not want to affect the consensus in the database, just the routing decisions.
+ ctx, cancel := hm.databaseTimeout(ctx)
+ defer cancel()
- if _, err := hm.db.ExecContext(ctx, `
+ if _, err := hm.db.ExecContext(ctx, `
INSERT INTO node_status (praefect_name, shard_name, node_name, last_contact_attempt_at, last_seen_active_at)
SELECT $1, shard_name, node_name, NOW(), CASE WHEN is_healthy THEN NOW() ELSE NULL END
FROM (
@@ -155,12 +161,13 @@ ON CONFLICT (praefect_name, shard_name, node_name)
last_contact_attempt_at = NOW(),
last_seen_active_at = COALESCE(EXCLUDED.last_seen_active_at, node_status.last_seen_active_at)
`,
- hm.praefectName,
- virtualStorages,
- physicalStorages,
- healthy,
- ); err != nil {
- return fmt.Errorf("update checks: %w", err)
+ hm.praefectName,
+ virtualStorages,
+ physicalStorages,
+ healthy,
+ ); err != nil {
+ return fmt.Errorf("update checks: %w", err)
+ }
}
if hm.firstUpdate {
diff --git a/internal/praefect/nodes/health_manager_test.go b/internal/praefect/nodes/health_manager_test.go
index f53297854..15b191cda 100644
--- a/internal/praefect/nodes/health_manager_test.go
+++ b/internal/praefect/nodes/health_manager_test.go
@@ -49,6 +49,31 @@ func getHealthConsensus(t *testing.T, ctx context.Context, db glsql.Querier) map
return consensus
}
+func TestHealthManagerWithoutDatabase(t *testing.T) {
+ t.Parallel()
+
+ hm := NewHealthManager(testhelper.NewDiscardingLogger(t), nil, "ignored", HealthClients{
+ "virtual-storage": {
+ "healthy-storage": mockHealthClient{
+ CheckFunc: func(context.Context, *grpc_health_v1.HealthCheckRequest, ...grpc.CallOption) (*grpc_health_v1.HealthCheckResponse, error) {
+ return &grpc_health_v1.HealthCheckResponse{Status: grpc_health_v1.HealthCheckResponse_SERVING}, nil
+ },
+ },
+ "unhealthy-storage": mockHealthClient{
+ CheckFunc: func(context.Context, *grpc_health_v1.HealthCheckRequest, ...grpc.CallOption) (*grpc_health_v1.HealthCheckResponse, error) {
+ return &grpc_health_v1.HealthCheckResponse{Status: grpc_health_v1.HealthCheckResponse_NOT_SERVING}, nil
+ },
+ },
+ },
+ })
+ hm.handleError = func(err error) error { return err }
+
+ runCtx, cancelRun := context.WithCancel(testhelper.Context(t))
+ require.Equal(t, context.Canceled, hm.Run(runCtx, helper.NewCountTicker(1, cancelRun)))
+ require.Equal(t, map[string][]string{"virtual-storage": {"healthy-storage"}}, hm.HealthyNodes())
+ <-hm.Updated()
+}
+
func TestHealthManager(t *testing.T) {
t.Parallel()
ctx := testhelper.Context(t)