diff options
author | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-05-09 13:16:02 +0300 |
---|---|---|
committer | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2023-05-12 05:47:12 +0300 |
commit | f3c8cde33d58e30dfdc11ee88295d6ba3dd80c88 (patch) | |
tree | e880cc7985a03a95a25bf19346618a88e3241836 | |
parent | f50a7fe08903948118f49c10e7811f8a40e2ecca (diff) |
Move dedicated spawn token log to gRPC logs
Spawn token occasionally dumps some logs in two cases:
- The queuing time exceeds a certain threshold
- Fail to acquire the spawn token
Those logs are also dedicated logs outside of major gRPC logs. This
makes debugging and investigating a little bit hard. This commit moves
this kind of logs to gRPC logs. This change consolidates the logs in the
same place. It also removes redundant "spawn token acquired" logs.
-rw-r--r-- | internal/command/spawntoken.go | 14 | ||||
-rw-r--r-- | internal/command/spawntoken_test.go | 29 |
2 files changed, 33 insertions, 10 deletions
diff --git a/internal/command/spawntoken.go b/internal/command/spawntoken.go index b49a32192..bdc4d664c 100644 --- a/internal/command/spawntoken.go +++ b/internal/command/spawntoken.go @@ -5,15 +5,12 @@ import ( "fmt" "time" - "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" "github.com/kelseyhightower/envconfig" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "gitlab.com/gitlab-org/gitaly/v16/internal/tracing" ) -const logDurationThreshold = 5 * time.Millisecond - var ( spawnTokens chan struct{} spawnConfig SpawnConfig @@ -63,7 +60,7 @@ func getSpawnToken(ctx context.Context) (putToken func(), err error) { select { case spawnTokens <- struct{}{}: - logTime(ctx, start, "spawn token acquired") + logTime(ctx, start, "") return func() { <-spawnTokens @@ -83,11 +80,8 @@ func logTime(ctx context.Context, start time.Time, msg string) { if stats := StatsFromContext(ctx); stats != nil { stats.RecordSum("command.spawn_token_wait_ms", int(delta.Milliseconds())) + if len(msg) != 0 { + stats.RecordMetadata("command.spawn_token_error", msg) + } } - - if delta < logDurationThreshold { - return - } - - ctxlogrus.Extract(ctx).WithField("spawn_queue_ms", delta.Seconds()*1000).Info(msg) } diff --git a/internal/command/spawntoken_test.go b/internal/command/spawntoken_test.go index 014802fff..3f10d5bb3 100644 --- a/internal/command/spawntoken_test.go +++ b/internal/command/spawntoken_test.go @@ -8,6 +8,8 @@ import ( ) func TestGetSpawnToken_CommandStats(t *testing.T) { + t.Parallel() + ctx := testhelper.Context(t) ctx = InitContextStats(ctx) @@ -19,3 +21,30 @@ func TestGetSpawnToken_CommandStats(t *testing.T) { require.NotNil(t, stats) require.Contains(t, stats.Fields(), "command.spawn_token_wait_ms") } + +// This test modifies a global config, hence should never run in parallel +func TestGetSpawnToken_CommandStats_timeout(t *testing.T) { + priorTimeout := spawnConfig.Timeout + priorSpawnTokens := spawnTokens + + spawnConfig.Timeout = 0 + spawnTokens = make(chan struct{}, 1) + spawnTokens <- struct{}{} + defer func() { + spawnConfig.Timeout = priorTimeout + spawnTokens = priorSpawnTokens + }() + + ctx := testhelper.Context(t) + ctx = InitContextStats(ctx) + + _, err := getSpawnToken(ctx) + require.ErrorContains(t, err, "process spawn timed out after") + + stats := StatsFromContext(ctx) + require.NotNil(t, stats) + fields := stats.Fields() + + require.GreaterOrEqual(t, fields["command.spawn_token_wait_ms"], 0) + require.Equal(t, fields["command.spawn_token_error"], "spawn token timeout") +} |