From 484cdfb1bea53148fef4263343c7e16649f47127 Mon Sep 17 00:00:00 2001 From: Will Chandler Date: Thu, 7 Jul 2022 09:07:49 -0400 Subject: testserver: Capture Praefect logs if dialing fails In 710409d89c8f31a7b711612b1860b8b2771965c4 we removed the 15 second timeout in `waitHealthy()` to avoid spurious failures in slow CI environments. Since this change we have seen occasional instances of multi-minute waits for Praefect, causing the test process to panic on timeout. To better understand what's going wrong, let's increase Praefect's log verbosity to `info` and print stderr (where all logging is written) when the context deadline is reached in `waitHealthy()`. If we're lucky this will have an error, but even which events were logged should give us clues as to where Praefect got stuck. --- internal/testhelper/testserver/gitaly.go | 16 ++++++++-------- internal/testhelper/testserver/praefect.go | 25 +++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/internal/testhelper/testserver/gitaly.go b/internal/testhelper/testserver/gitaly.go index 0313ba34c..54cf7f155 100644 --- a/internal/testhelper/testserver/gitaly.go +++ b/internal/testhelper/testserver/gitaly.go @@ -1,6 +1,7 @@ package testserver import ( + "context" "net" "os" "testing" @@ -82,7 +83,7 @@ func runPraefectProxy(t testing.TB, gitalyCfg config.Cfg, gitalyAddr string) Pra Replication: praefectconfig.DefaultReplicationConfig(), Logging: gitalylog.Config{ Format: "json", - Level: "panic", + Level: "info", }, VirtualStorages: []*praefectconfig.VirtualStorage{ { @@ -119,9 +120,8 @@ func (gs GitalyServer) Address() string { return gs.address } -// waitHealthy waits until the server hosted at address becomes healthy. Times out after a fixed -// amount of time. -func waitHealthy(t testing.TB, addr string, authToken string) { +// waitHealthy waits until the server hosted at address becomes healthy. +func waitHealthy(ctx context.Context, t testing.TB, addr string, authToken string) { grpcOpts := []grpc.DialOption{ grpc.WithBlock(), } @@ -129,8 +129,6 @@ func waitHealthy(t testing.TB, addr string, authToken string) { grpcOpts = append(grpcOpts, grpc.WithPerRPCCredentials(gitalyauth.RPCCredentialsV2(authToken))) } - ctx := testhelper.Context(t) - conn, err := client.DialContext(ctx, addr, grpcOpts) require.NoError(t, err) defer testhelper.MustClose(t, conn) @@ -178,7 +176,8 @@ func runGitaly(t testing.TB, cfg config.Cfg, rubyServer *rubyserver.Server, regi assert.NoError(t, internalServer.Serve(internalListener), "failure to serve internal gRPC") }() - waitHealthy(t, "unix://"+internalListener.Addr().String(), cfg.Auth.Token) + ctx := testhelper.Context(t) + waitHealthy(ctx, t, "unix://"+internalListener.Addr().String(), cfg.Auth.Token) } externalServer, err := serverFactory.CreateExternal(cfg.TLS.CertPath != "" && cfg.TLS.KeyPath != "") @@ -212,7 +211,8 @@ func runGitaly(t testing.TB, cfg config.Cfg, rubyServer *rubyserver.Server, regi assert.NoError(t, externalServer.Serve(listener), "failure to serve external gRPC") }() - waitHealthy(t, addr, cfg.Auth.Token) + ctx := testhelper.Context(t) + waitHealthy(ctx, t, addr, cfg.Auth.Token) return externalServer, addr, gsd.disablePraefect } diff --git a/internal/testhelper/testserver/praefect.go b/internal/testhelper/testserver/praefect.go index 2f433dd4c..98eb7f89f 100644 --- a/internal/testhelper/testserver/praefect.go +++ b/internal/testhelper/testserver/praefect.go @@ -1,11 +1,14 @@ package testserver import ( + "bytes" + "context" "net" "os" "os/exec" "path/filepath" "testing" + "time" "github.com/pelletier/go-toml" "github.com/stretchr/testify/require" @@ -76,7 +79,9 @@ func StartPraefect(t testing.TB, cfg config.Config) PraefectServer { }) cmd := exec.Command(binaryPath, "-config", configFilePath) - cmd.Stderr = os.Stderr + // Logs are written to stderr, we can ignore stdout. + var stderr bytes.Buffer + cmd.Stderr = &stderr cmd.Stdout = os.Stdout require.NoError(t, cmd.Start()) @@ -90,7 +95,23 @@ func StartPraefect(t testing.TB, cfg config.Config) PraefectServer { } t.Cleanup(praefectServer.Shutdown) - waitHealthy(t, praefectServer.Address(), cfg.Auth.Token) + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + + // Ensure this runs even if context ends in waitHealthy. + defer func() { + select { + case <-ctx.Done(): + switch ctx.Err() { + case context.DeadlineExceeded: + // Capture Praefect logs when waitHealthy takes too long. + t.Errorf("Failed to connect to Praefect:\n%v", stderr.String()) + } + default: + } + }() + + waitHealthy(ctx, t, praefectServer.Address(), cfg.Auth.Token) return praefectServer } -- cgit v1.2.3