From f3c8cde33d58e30dfdc11ee88295d6ba3dd80c88 Mon Sep 17 00:00:00 2001 From: Quang-Minh Nguyen Date: Tue, 9 May 2023 17:16:02 +0700 Subject: Move dedicated spawn token log to gRPC logs Spawn token occasionally dumps some logs in two cases: - The queuing time exceeds a certain threshold - Fail to acquire the spawn token Those logs are also dedicated logs outside of major gRPC logs. This makes debugging and investigating a little bit hard. This commit moves this kind of logs to gRPC logs. This change consolidates the logs in the same place. It also removes redundant "spawn token acquired" logs. --- internal/command/spawntoken.go | 14 ++++---------- internal/command/spawntoken_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/internal/command/spawntoken.go b/internal/command/spawntoken.go index b49a32192..bdc4d664c 100644 --- a/internal/command/spawntoken.go +++ b/internal/command/spawntoken.go @@ -5,15 +5,12 @@ import ( "fmt" "time" - "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" "github.com/kelseyhightower/envconfig" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "gitlab.com/gitlab-org/gitaly/v16/internal/tracing" ) -const logDurationThreshold = 5 * time.Millisecond - var ( spawnTokens chan struct{} spawnConfig SpawnConfig @@ -63,7 +60,7 @@ func getSpawnToken(ctx context.Context) (putToken func(), err error) { select { case spawnTokens <- struct{}{}: - logTime(ctx, start, "spawn token acquired") + logTime(ctx, start, "") return func() { <-spawnTokens @@ -83,11 +80,8 @@ func logTime(ctx context.Context, start time.Time, msg string) { if stats := StatsFromContext(ctx); stats != nil { stats.RecordSum("command.spawn_token_wait_ms", int(delta.Milliseconds())) + if len(msg) != 0 { + stats.RecordMetadata("command.spawn_token_error", msg) + } } - - if delta < logDurationThreshold { - return - } - - ctxlogrus.Extract(ctx).WithField("spawn_queue_ms", delta.Seconds()*1000).Info(msg) } diff --git a/internal/command/spawntoken_test.go b/internal/command/spawntoken_test.go index 014802fff..3f10d5bb3 100644 --- a/internal/command/spawntoken_test.go +++ b/internal/command/spawntoken_test.go @@ -8,6 +8,8 @@ import ( ) func TestGetSpawnToken_CommandStats(t *testing.T) { + t.Parallel() + ctx := testhelper.Context(t) ctx = InitContextStats(ctx) @@ -19,3 +21,30 @@ func TestGetSpawnToken_CommandStats(t *testing.T) { require.NotNil(t, stats) require.Contains(t, stats.Fields(), "command.spawn_token_wait_ms") } + +// This test modifies a global config, hence should never run in parallel +func TestGetSpawnToken_CommandStats_timeout(t *testing.T) { + priorTimeout := spawnConfig.Timeout + priorSpawnTokens := spawnTokens + + spawnConfig.Timeout = 0 + spawnTokens = make(chan struct{}, 1) + spawnTokens <- struct{}{} + defer func() { + spawnConfig.Timeout = priorTimeout + spawnTokens = priorSpawnTokens + }() + + ctx := testhelper.Context(t) + ctx = InitContextStats(ctx) + + _, err := getSpawnToken(ctx) + require.ErrorContains(t, err, "process spawn timed out after") + + stats := StatsFromContext(ctx) + require.NotNil(t, stats) + fields := stats.Fields() + + require.GreaterOrEqual(t, fields["command.spawn_token_wait_ms"], 0) + require.Equal(t, fields["command.spawn_token_error"], "spawn token timeout") +} -- cgit v1.2.3