diff options
author | Toon Claes <toon@gitlab.com> | 2022-04-08 15:28:40 +0300 |
---|---|---|
committer | Toon Claes <toon@gitlab.com> | 2022-04-08 15:28:40 +0300 |
commit | 3217e4f067bc7c6555485b48d96f1b1151acbecc (patch) | |
tree | 663742a59e6ae0b0aadf6bbdc85c2c6f1effe4b6 | |
parent | 36105c4b2f8aa285f1ff1703797274deda811a33 (diff) | |
parent | 549282123914565b8cb2532b028fc195a3e8fef8 (diff) |
Merge branch 'command-stats-metrics' into 'master'
Expose command stats (rusage) metrics via prometheus
See merge request gitlab-org/gitaly!4464
-rw-r--r-- | internal/command/command.go | 102 | ||||
-rw-r--r-- | internal/git/command_factory.go | 2 | ||||
-rw-r--r-- | internal/git2go/executor.go | 5 | ||||
-rw-r--r-- | internal/gitaly/hook/custom.go | 2 | ||||
-rw-r--r-- | internal/gitaly/linguist/linguist.go | 3 | ||||
-rw-r--r-- | internal/metadata/featureflag/ff_command_stats_metrics.go | 4 | ||||
-rw-r--r-- | internal/testhelper/testhelper.go | 3 |
7 files changed, 119 insertions, 2 deletions
diff --git a/internal/command/command.go b/internal/command/command.go index ba964209a..85178d995 100644 --- a/internal/command/command.go +++ b/internal/command/command.go @@ -7,15 +7,20 @@ import ( "io" "os" "os/exec" + "path" "strings" "sync" "syscall" "time" "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + grpcmwtags "github.com/grpc-ecosystem/go-grpc-middleware/tags" "github.com/opentracing/opentracing-go" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitaly/v14/internal/command/commandcounter" + "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag" "gitlab.com/gitlab-org/labkit/tracing" ) @@ -33,6 +38,51 @@ func init() { } } +var ( + cpuSecondsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_command_cpu_seconds_total", + Help: "Sum of CPU time spent by shelling out", + }, + []string{"grpc_service", "grpc_method", "cmd", "subcmd", "mode"}, + ) + realSecondsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_command_real_seconds_total", + Help: "Sum of real time spent by shelling out", + }, + []string{"grpc_service", "grpc_method", "cmd", "subcmd"}, + ) + minorPageFaultsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_command_minor_page_faults_total", + Help: "Sum of minor page faults performed while shelling out", + }, + []string{"grpc_service", "grpc_method", "cmd", "subcmd"}, + ) + majorPageFaultsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_command_major_page_faults_total", + Help: "Sum of major page faults performed while shelling out", + }, + []string{"grpc_service", "grpc_method", "cmd", "subcmd"}, + ) + signalsReceivedTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_command_signals_received_total", + Help: "Sum of signals received while shelling out", + }, + []string{"grpc_service", "grpc_method", "cmd", "subcmd"}, + ) + contextSwitchesTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_command_context_switches_total", + Help: "Sum of context switches performed while shelling out", + }, + []string{"grpc_service", "grpc_method", "cmd", "subcmd", "ctxswitchtype"}, + ) +) + // GitEnv contains the ENV variables for git commands var GitEnv = []string{ // Force english locale for consistency on the output messages @@ -102,7 +152,9 @@ type Command struct { span opentracing.Span - cgroupPath string + metricsCmd string + metricsSubCmd string + cgroupPath string } type stdinSentinel struct{} @@ -151,6 +203,16 @@ func (c *Command) SetCgroupPath(path string) { c.cgroupPath = path } +// SetMetricsCmd overrides the "cmd" label used in metrics +func (c *Command) SetMetricsCmd(metricsCmd string) { + c.metricsCmd = metricsCmd +} + +// SetMetricsSubCmd sets the "subcmd" label used in metrics +func (c *Command) SetMetricsSubCmd(metricsSubCmd string) { + c.metricsSubCmd = metricsSubCmd +} + type contextWithoutDonePanic string // New creates a Command from an exec.Cmd. On success, the Command @@ -392,6 +454,24 @@ func (c *Command) logProcessComplete() { } } + if featureflag.CommandStatsMetrics.IsEnabled(ctx) { + service, method := methodFromContext(ctx) + cmdName := path.Base(c.cmd.Path) + if c.metricsCmd != "" { + cmdName = c.metricsCmd + } + cpuSecondsTotal.WithLabelValues(service, method, cmdName, c.metricsSubCmd, "system").Add(systemTime.Seconds()) + cpuSecondsTotal.WithLabelValues(service, method, cmdName, c.metricsSubCmd, "user").Add(userTime.Seconds()) + realSecondsTotal.WithLabelValues(service, method, cmdName, c.metricsSubCmd).Add(realTime.Seconds()) + if ok { + minorPageFaultsTotal.WithLabelValues(service, method, cmdName, c.metricsSubCmd).Add(float64(rusage.Minflt)) + majorPageFaultsTotal.WithLabelValues(service, method, cmdName, c.metricsSubCmd).Add(float64(rusage.Majflt)) + signalsReceivedTotal.WithLabelValues(service, method, cmdName, c.metricsSubCmd).Add(float64(rusage.Nsignals)) + contextSwitchesTotal.WithLabelValues(service, method, cmdName, c.metricsSubCmd, "voluntary").Add(float64(rusage.Nvcsw)) + contextSwitchesTotal.WithLabelValues(service, method, cmdName, c.metricsSubCmd, "nonvoluntary").Add(float64(rusage.Nivcsw)) + } + } + c.span.LogKV( "pid", cmd.ProcessState.Pid(), "exit_code", exitCode, @@ -411,6 +491,26 @@ func (c *Command) logProcessComplete() { c.span.Finish() } +func methodFromContext(ctx context.Context) (service string, method string) { + tags := grpcmwtags.Extract(ctx) + ctxValue := tags.Values()["grpc.request.fullMethod"] + if ctxValue == nil { + return "", "" + } + + if s, ok := ctxValue.(string); ok { + // Expect: "/foo.BarService/Qux" + split := strings.Split(s, "/") + if len(split) != 3 { + return "", "" + } + + return split[1], split[2] + } + + return "", "" +} + // Command arguments will be passed to the exec syscall as // null-terminated C strings. That means the arguments themselves may not // contain a null byte. The go stdlib checks for null bytes but it diff --git a/internal/git/command_factory.go b/internal/git/command_factory.go index 056b8b170..3a5c58bac 100644 --- a/internal/git/command_factory.go +++ b/internal/git/command_factory.go @@ -390,6 +390,8 @@ func (cf *ExecCommandFactory) newCommand(ctx context.Context, repo repository.Gi return nil, err } + command.SetMetricsSubCmd(sc.Subcommand()) + if featureflag.RunCommandsInCGroup.IsEnabled(ctx) { if err := cf.cgroupsManager.AddCommand(command); err != nil { return nil, err diff --git a/internal/git2go/executor.go b/internal/git2go/executor.go index 45ed5e8c1..73dc095e8 100644 --- a/internal/git2go/executor.go +++ b/internal/git2go/executor.go @@ -49,7 +49,7 @@ func NewExecutor(cfg config.Cfg, gitCmdFactory git.CommandFactory, locator stora } } -func (b *Executor) run(ctx context.Context, repo repository.GitRepo, stdin io.Reader, args ...string) (*bytes.Buffer, error) { +func (b *Executor) run(ctx context.Context, repo repository.GitRepo, stdin io.Reader, subcmd string, args ...string) (*bytes.Buffer, error) { repoPath, err := b.locator.GetRepoPath(repo) if err != nil { return nil, fmt.Errorf("gitaly-git2go: %w", err) @@ -66,6 +66,7 @@ func (b *Executor) run(ctx context.Context, repo repository.GitRepo, stdin io.Re "-log-format", b.logFormat, "-log-level", b.logLevel, "-correlation-id", correlation.ExtractFromContext(ctx), + subcmd, }, args...) var stdout bytes.Buffer @@ -74,6 +75,8 @@ func (b *Executor) run(ctx context.Context, repo repository.GitRepo, stdin io.Re return nil, err } + cmd.SetMetricsSubCmd(subcmd) + if err := cmd.Wait(); err != nil { return nil, err } diff --git a/internal/gitaly/hook/custom.go b/internal/gitaly/hook/custom.go index 791fbeea4..5020f8530 100644 --- a/internal/gitaly/hook/custom.go +++ b/internal/gitaly/hook/custom.go @@ -73,6 +73,8 @@ func (m *GitLabHookManager) newCustomHooksExecutor(repo *gitalypb.Repository, ho return err } + c.SetMetricsSubCmd(hookName) + if err = c.Wait(); err != nil { // Custom hook errors need to be handled specially when we update // refs via updateref.UpdaterWithHooks: their stdout and stderr must diff --git a/internal/gitaly/linguist/linguist.go b/internal/gitaly/linguist/linguist.go index fdd7ee6e7..9786e6547 100644 --- a/internal/gitaly/linguist/linguist.go +++ b/internal/gitaly/linguist/linguist.go @@ -128,6 +128,9 @@ func (inst *Instance) startGitLinguist(ctx context.Context, repoPath string, com return nil, fmt.Errorf("creating command: %w", err) } + internalCmd.SetMetricsCmd("git-linguist") + internalCmd.SetMetricsSubCmd(linguistCommand) + return internalCmd, nil } diff --git a/internal/metadata/featureflag/ff_command_stats_metrics.go b/internal/metadata/featureflag/ff_command_stats_metrics.go new file mode 100644 index 000000000..a11206c52 --- /dev/null +++ b/internal/metadata/featureflag/ff_command_stats_metrics.go @@ -0,0 +1,4 @@ +package featureflag + +// CommandStatsMetrics tracks additional prometheus metrics for each shelled out command +var CommandStatsMetrics = NewFeatureFlag("command_stats_metrics", false) diff --git a/internal/testhelper/testhelper.go b/internal/testhelper/testhelper.go index 2e780398b..f723aaf19 100644 --- a/internal/testhelper/testhelper.go +++ b/internal/testhelper/testhelper.go @@ -174,6 +174,9 @@ func ContextWithoutCancel(opts ...ContextOpt) context.Context { // ConcurrencyQueueMaxWait is in the codepath of every RPC call since it's in the limithandler // middleware. ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.ConcurrencyQueueMaxWait, true) + // CommandStatsMetrics is checked on every shelled out command, which may happen outside of + // RPC context. + ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.CommandStatsMetrics, true) for _, opt := range opts { ctx = opt(ctx) |