diff options
author | Paul Okstad <pokstad@gitlab.com> | 2020-01-17 18:13:11 +0300 |
---|---|---|
committer | Paul Okstad <pokstad@gitlab.com> | 2020-01-17 18:13:11 +0300 |
commit | 8912bf9de7764e3b7590d3f2c0837bba573308d0 (patch) | |
tree | 52db19212b65f47cf04762d3a7468c7f6fb0386b | |
parent | a786260db7789b0a2badc67354c40cbc46dad24f (diff) | |
parent | 472a1b0b3567465754cd27106374a8ee902c1afe (diff) |
Merge branch 'po-praefect-conn-checker' into 'master'
Praefect subcommand for checking node connections
See merge request gitlab-org/gitaly!1700
-rw-r--r-- | changelogs/unreleased/po-praefect-conn-checker.yml | 5 | ||||
-rw-r--r-- | cmd/praefect/main.go | 26 | ||||
-rw-r--r-- | cmd/praefect/main_test.go | 60 | ||||
-rw-r--r-- | cmd/praefect/subcmd_pingnodes.go | 122 | ||||
-rw-r--r-- | cmd/praefect/subcommand.go | 11 |
5 files changed, 224 insertions, 0 deletions
diff --git a/changelogs/unreleased/po-praefect-conn-checker.yml b/changelogs/unreleased/po-praefect-conn-checker.yml new file mode 100644 index 000000000..85ff1acf0 --- /dev/null +++ b/changelogs/unreleased/po-praefect-conn-checker.yml @@ -0,0 +1,5 @@ +--- +title: Praefect subcommand for checking node connections +merge_request: 1700 +author: +type: added diff --git a/cmd/praefect/main.go b/cmd/praefect/main.go index 8536cdbbe..a0868f36d 100644 --- a/cmd/praefect/main.go +++ b/cmd/praefect/main.go @@ -1,3 +1,29 @@ +// Command praefect provides a reverse-proxy server with high-availability +// specific features for Gitaly. +// +// Additionally, praefect has subcommands for common tasks: +// +// SQL Ping +// +// The subcommand "sql-ping" checks if the database configured in the config +// file is reachable: +// +// praefect -config PATH_TO_CONFIG sql-ping +// +// SQL Migrate +// +// The subcommand "sql-migrate" will apply any outstanding SQL migrations. +// +// praefect -config PATH_TO_CONFIG sql-migrate +// +// Dial Nodes +// +// The subcommand "dial-nodes" helps diagnose connection problems to Gitaly or +// Praefect. The subcommand works by sourcing the connection information from +// the config file, and then dialing and health checking the remote nodes. +// +// praefect -config PATH_TO_CONFIG dial-nodes +// package main import ( diff --git a/cmd/praefect/main_test.go b/cmd/praefect/main_test.go index 65d16a86c..62ffe7786 100644 --- a/cmd/praefect/main_test.go +++ b/cmd/praefect/main_test.go @@ -4,6 +4,9 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/praefect/config" + "gitlab.com/gitlab-org/gitaly/internal/praefect/models" ) func TestNoConfigFlag(t *testing.T) { @@ -11,3 +14,60 @@ func TestNoConfigFlag(t *testing.T) { assert.Equal(t, err, errNoConfigFile) } + +func TestFlattenNodes(t *testing.T) { + for _, tt := range []struct { + desc string + conf config.Config + expect map[string]*nodePing + }{ + { + desc: "Flatten common address between storages", + conf: config.Config{ + VirtualStorages: []*config.VirtualStorage{ + { + Name: "meow", + Nodes: []*models.Node{ + { + Storage: "foo", + Address: "tcp://example.com", + Token: "abc", + DefaultPrimary: true, + }, + }, + }, + { + Name: "woof", + Nodes: []*models.Node{ + { + Storage: "bar", + Address: "tcp://example.com", + Token: "abc", + DefaultPrimary: true, + }, + }, + }, + }, + }, + expect: map[string]*nodePing{ + "tcp://example.com": &nodePing{ + address: "tcp://example.com", + storages: map[string]struct{}{ + "foo": struct{}{}, + "bar": struct{}{}, + }, + vStorages: map[string]struct{}{ + "meow": struct{}{}, + "woof": struct{}{}, + }, + token: "abc", + }, + }, + }, + } { + t.Run(tt.desc, func(t *testing.T) { + actual := flattenNodes(tt.conf) + require.Equal(t, tt.expect, actual) + }) + } +} diff --git a/cmd/praefect/subcmd_pingnodes.go b/cmd/praefect/subcmd_pingnodes.go new file mode 100644 index 000000000..3aba0c484 --- /dev/null +++ b/cmd/praefect/subcmd_pingnodes.go @@ -0,0 +1,122 @@ +package main + +import ( + "context" + "fmt" + "log" + "sync" + "time" + + "gitlab.com/gitlab-org/gitaly/client" + "gitlab.com/gitlab-org/gitaly/internal/praefect/config" + "google.golang.org/grpc" + "google.golang.org/grpc/health/grpc_health_v1" +) + +type nodePing struct { + address string + storages map[string]struct{} // set of storages this node hosts + vStorages map[string]struct{} // set of virtual storages node belongs to + token string // auth token + err error // any error during dial/ping +} + +func flattenNodes(conf config.Config) map[string]*nodePing { + nodeByAddress := map[string]*nodePing{} // key is address + + // flatten nodes between virtual storages + for _, vs := range conf.VirtualStorages { + for _, node := range vs.Nodes { + n, ok := nodeByAddress[node.Address] + if !ok { + n = &nodePing{ + storages: map[string]struct{}{}, + vStorages: map[string]struct{}{}, + } + } + n.address = node.Address + n.storages[node.Storage] = struct{}{} + n.vStorages[vs.Name] = struct{}{} + n.token = node.Token + nodeByAddress[node.Address] = n + } + } + return nodeByAddress +} + +func dialNodes(conf config.Config) int { + nodes := flattenNodes(conf) + + var wg sync.WaitGroup + for _, n := range nodes { + wg.Add(1) + go func(n *nodePing) { + defer wg.Done() + n.checkNode() + }(n) + } + wg.Wait() + + exitCode := 0 + for _, n := range nodes { + if n.err != nil { + exitCode = 1 + } + } + + return exitCode +} + +func (npr *nodePing) dial() (*grpc.ClientConn, error) { + return client.Dial(npr.address, []grpc.DialOption{ + grpc.WithBlock(), + grpc.WithTimeout(30 * time.Second), + }) +} + +func (npr *nodePing) healthCheck(cc *grpc.ClientConn) (grpc_health_v1.HealthCheckResponse_ServingStatus, error) { + hClient := grpc_health_v1.NewHealthClient(cc) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + resp, err := hClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{}) + if err != nil { + return 0, err + } + + return resp.GetStatus(), nil +} + +func (npr *nodePing) log(msg string, args ...interface{}) { + log.Printf("[%s]: %s", npr.address, fmt.Sprintf(msg, args...)) +} + +func (npr *nodePing) checkNode() { + npr.log("dialing...") + cc, err := npr.dial() + if err != nil { + npr.log("ERROR: dialing failed: %v", err) + npr.err = err + return + } + defer cc.Close() + npr.log("dialed successfully!") + + npr.log("checking health...") + health, err := npr.healthCheck(cc) + if err != nil { + npr.log("ERROR: unable to request health check: %v", err) + npr.err = err + return + } + + if health != grpc_health_v1.HealthCheckResponse_SERVING { + npr.err = fmt.Errorf( + "health check did not report serving, instead reported: %s", + health.String()) + npr.log("ERROR: %v", npr.err) + return + } + npr.log("SUCCESS: node is healthy!") +} diff --git a/cmd/praefect/subcommand.go b/cmd/praefect/subcommand.go index 48f5b0387..74e9d7989 100644 --- a/cmd/praefect/subcommand.go +++ b/cmd/praefect/subcommand.go @@ -3,6 +3,7 @@ package main import ( "fmt" "os" + "os/signal" "gitlab.com/gitlab-org/gitaly/internal/praefect/config" "gitlab.com/gitlab-org/gitaly/internal/praefect/datastore" @@ -10,11 +11,21 @@ import ( // subCommand returns an exit code, to be fed into os.Exit. func subCommand(conf config.Config, arg0 string, argRest []string) int { + interrupt := make(chan os.Signal) + signal.Notify(interrupt, os.Interrupt) + + go func() { + <-interrupt + os.Exit(130) // indicates program was interrupted + }() + switch arg0 { case "sql-ping": return sqlPing(conf) case "sql-migrate": return sqlMigrate(conf) + case "dial-nodes": + return dialNodes(conf) default: fmt.Printf("%s: unknown subcommand: %q\n", progname, arg0) return 1 |