diff options
author | James Fargher <proglottis@gmail.com> | 2021-11-18 00:42:30 +0300 |
---|---|---|
committer | James Fargher <proglottis@gmail.com> | 2021-11-18 00:42:30 +0300 |
commit | d69b465fdff3c04f8e3f395aed34aaa59f23fe76 (patch) | |
tree | 6c40a77fca75f8e31f9cfb8d16b3a5cdf4479f73 | |
parent | 61ce7e9b211bd850c8743c88ef59451843ad36bd (diff) | |
parent | 954b4f5ed0e069d6cd739c113d3d599678306248 (diff) |
Merge branch 'jc-query-metrics' into 'master'
Add /db_metrics metrics endpoint for database metrics
Closes #3286
See merge request gitlab-org/gitaly!4085
-rw-r--r-- | cmd/praefect/main.go | 36 | ||||
-rw-r--r-- | cmd/praefect/main_test.go | 88 | ||||
-rw-r--r-- | cmd/praefect/subcmd_list_untracked_repositories_test.go | 2 | ||||
-rw-r--r-- | cmd/praefect/subcmd_remove_repository_test.go | 2 | ||||
-rw-r--r-- | go.mod | 1 | ||||
-rw-r--r-- | internal/praefect/config/config.go | 14 |
6 files changed, 132 insertions, 11 deletions
diff --git a/cmd/praefect/main.go b/cmd/praefect/main.go index e2210e659..1082914c0 100644 --- a/cmd/praefect/main.go +++ b/cmd/praefect/main.go @@ -63,11 +63,13 @@ import ( "flag" "fmt" "math/rand" + "net/http" "os" "strings" "time" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitaly/v14/internal/backchannel" "gitlab.com/gitlab-org/gitaly/v14/internal/bootstrap" @@ -149,7 +151,9 @@ func main() { logger.Fatalf("unable to create a bootstrap: %v", err) } - if err := run(starterConfigs, conf, b, prometheus.DefaultRegisterer); err != nil { + dbPromRegistry := prometheus.NewRegistry() + + if err := run(starterConfigs, conf, b, prometheus.DefaultRegisterer, dbPromRegistry); err != nil { logger.Fatalf("%v", err) } } @@ -192,7 +196,16 @@ func configure(conf config.Config) { sentry.ConfigureSentry(version.GetVersion(), conf.Sentry) } -func run(cfgs []starter.Config, conf config.Config, b bootstrap.Listener, promreg prometheus.Registerer) error { +func run( + cfgs []starter.Config, + conf config.Config, + b bootstrap.Listener, + promreg prometheus.Registerer, + dbPromRegistry interface { + prometheus.Registerer + prometheus.Gatherer + }, +) error { nodeLatencyHistogram, err := metrics.RegisterNodeLatency(conf.Prometheus, promreg) if err != nil { return err @@ -391,9 +404,18 @@ func run(cfgs []starter.Config, conf config.Config, b bootstrap.Listener, promre ) metricsCollectors = append(metricsCollectors, transactionManager, coordinator, repl) if db != nil { - promreg.MustRegister( - datastore.NewRepositoryStoreCollector(logger, conf.VirtualStorageNames(), db, conf.Prometheus.ScrapeTimeout), - ) + repositoryStoreCollector := datastore.NewRepositoryStoreCollector(logger, conf.VirtualStorageNames(), db, conf.Prometheus.ScrapeTimeout) + + // Eventually, database-related metrics will always be exported via a separate + // endpoint such that it's possible to set a different scraping interval and thus to + // reduce database load. For now though, we register the metrics twice, once for the + // standard and once for the database-specific endpoint. This is done to ensure a + // transitory period where deployments can be moved to the new endpoint without + // causing breakage if they still use the old endpoint. + dbPromRegistry.MustRegister(repositoryStoreCollector) + if !conf.PrometheusExcludeDatabaseFromDefaultMetrics { + promreg.MustRegister(repositoryStoreCollector) + } } promreg.MustRegister(metricsCollectors...) @@ -416,9 +438,13 @@ func run(cfgs []starter.Config, conf config.Config, b bootstrap.Listener, promre return err } + serveMux := http.NewServeMux() + serveMux.Handle("/db_metrics", promhttp.HandlerFor(dbPromRegistry, promhttp.HandlerOpts{})) + go func() { if err := monitoring.Start( monitoring.WithListener(l), + monitoring.WithServeMux(serveMux), monitoring.WithBuildInformation(praefect.GetVersion(), praefect.GetBuildTime())); err != nil { logger.WithError(err).Errorf("Unable to start prometheus listener: %v", conf.PrometheusListenAddr) } diff --git a/cmd/praefect/main_test.go b/cmd/praefect/main_test.go index cdbea8f2a..25a615876 100644 --- a/cmd/praefect/main_test.go +++ b/cmd/praefect/main_test.go @@ -2,12 +2,18 @@ package main import ( "errors" + "fmt" "testing" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v14/internal/bootstrap" "gitlab.com/gitlab-org/gitaly/v14/internal/bootstrap/starter" "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/config" + "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore" + "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/glsql" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" ) @@ -148,3 +154,85 @@ func TestGetStarterConfigs(t *testing.T) { }) } } + +func newMockRegisterer() *mockRegisterer { + return &mockRegisterer{} +} + +type mockRegisterer struct { + repositoryCollectorRegistered bool +} + +func (m *mockRegisterer) Register(c prometheus.Collector) error { + if _, ok := c.(*datastore.RepositoryStoreCollector); ok { + m.repositoryCollectorRegistered = true + } + + return nil +} + +func (m *mockRegisterer) MustRegister(collectors ...prometheus.Collector) { + for _, c := range collectors { + if _, ok := c.(*datastore.RepositoryStoreCollector); ok { + m.repositoryCollectorRegistered = true + } + } +} + +func (m *mockRegisterer) Unregister(c prometheus.Collector) bool { + return false +} + +func (m *mockRegisterer) Gather() ([]*dto.MetricFamily, error) { + return nil, nil +} + +func TestExcludeDatabaseMetricsFromDefaultMetrics(t *testing.T) { + t.Parallel() + + db := glsql.NewDB(t) + dbConf := glsql.GetDBConfig(t, db.Name) + + conf := config.Config{ + SocketPath: testhelper.GetTemporaryGitalySocketFileName(t), + VirtualStorages: []*config.VirtualStorage{ + { + Name: "praefect", + Nodes: []*config.Node{ + {Storage: "storage-0", Address: "tcp://someaddress"}, + }, + }, + }, + DB: dbConf, + Failover: config.Failover{ + Enabled: true, + ElectionStrategy: config.ElectionStrategyPerRepository, + }, + MemoryQueueEnabled: true, + } + + starterConfigs, err := getStarterConfigs(conf) + require.NoError(t, err) + + excludeDatabaseMetricsSettings := []bool{true, false} + for _, excludeDatabaseMetrics := range excludeDatabaseMetricsSettings { + t.Run(fmt.Sprintf("exclude database metrics %v", excludeDatabaseMetrics), func(t *testing.T) { + conf.PrometheusExcludeDatabaseFromDefaultMetrics = excludeDatabaseMetrics + + stopped := make(chan struct{}) + bootstrapper := bootstrap.NewNoop() + + metricRegisterer, dbMetricsRegisterer := newMockRegisterer(), newMockRegisterer() + go func() { + defer close(stopped) + assert.NoError(t, run(starterConfigs, conf, bootstrapper, metricRegisterer, dbMetricsRegisterer)) + }() + + bootstrapper.Terminate() + <-stopped + + assert.True(t, dbMetricsRegisterer.repositoryCollectorRegistered) + assert.Equal(t, excludeDatabaseMetrics, !metricRegisterer.repositoryCollectorRegistered) + }) + } +} diff --git a/cmd/praefect/subcmd_list_untracked_repositories_test.go b/cmd/praefect/subcmd_list_untracked_repositories_test.go index 4edc79666..965ad3a8f 100644 --- a/cmd/praefect/subcmd_list_untracked_repositories_test.go +++ b/cmd/praefect/subcmd_list_untracked_repositories_test.go @@ -89,7 +89,7 @@ func TestListUntrackedRepositories_Exec(t *testing.T) { bootstrapper := bootstrap.NewNoop() go func() { defer close(stopped) - assert.NoError(t, run(starterConfigs, conf, bootstrapper, prometheus.NewRegistry())) + assert.NoError(t, run(starterConfigs, conf, bootstrapper, prometheus.NewRegistry(), prometheus.NewRegistry())) }() cc, err := client.Dial("unix://"+conf.SocketPath, nil) diff --git a/cmd/praefect/subcmd_remove_repository_test.go b/cmd/praefect/subcmd_remove_repository_test.go index 5f9771234..7df587b0b 100644 --- a/cmd/praefect/subcmd_remove_repository_test.go +++ b/cmd/praefect/subcmd_remove_repository_test.go @@ -108,7 +108,7 @@ func TestRemoveRepository_Exec(t *testing.T) { bootstrapper := bootstrap.NewNoop() go func() { defer close(stopped) - assert.NoError(t, run(starterConfigs, conf, bootstrapper, prometheus.NewRegistry())) + assert.NoError(t, run(starterConfigs, conf, bootstrapper, prometheus.NewRegistry(), prometheus.NewRegistry())) }() cc, err := client.Dial("unix://"+conf.SocketPath, nil) @@ -30,6 +30,7 @@ require ( github.com/opentracing/opentracing-go v1.2.0 github.com/pelletier/go-toml v1.8.1 github.com/prometheus/client_golang v1.10.0 + github.com/prometheus/client_model v0.2.0 github.com/rubenv/sql-migrate v0.0.0-20191213152630-06338513c237 github.com/sirupsen/logrus v1.8.1 github.com/stretchr/testify v1.7.0 diff --git a/internal/praefect/config/config.go b/internal/praefect/config/config.go index 016539617..835b6a424 100644 --- a/internal/praefect/config/config.go +++ b/internal/praefect/config/config.go @@ -124,10 +124,16 @@ type Config struct { Sentry sentry.Config `toml:"sentry"` PrometheusListenAddr string `toml:"prometheus_listen_addr"` Prometheus prometheus.Config `toml:"prometheus"` - Auth auth.Config `toml:"auth"` - TLS config.TLS `toml:"tls"` - DB `toml:"database"` - Failover Failover `toml:"failover"` + // PrometheusExcludeDatabaseFromDefaultMetrics excludes database-related metrics from the + // default metrics. If set to `false`, then database metrics will be available both via + // `/metrics` and `/db_metrics`. Otherwise, they will only be accessible via `/db_metrics`. + // Defaults to `false`. This is used as a transitory configuration key: eventually, database + // metrics will always be removed from the standard metrics endpoint. + PrometheusExcludeDatabaseFromDefaultMetrics bool `toml:"prometheus_exclude_database_from_default_metrics"` + Auth auth.Config `toml:"auth"` + TLS config.TLS `toml:"tls"` + DB `toml:"database"` + Failover Failover `toml:"failover"` // Keep for legacy reasons: remove after Omnibus has switched FailoverEnabled bool `toml:"failover_enabled"` MemoryQueueEnabled bool `toml:"memory_queue_enabled"` |