Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2021-11-18 17:57:43 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2021-11-18 17:57:43 +0300
commit650cb6e64c077ac89f8e8c4f175f602b504ef143 (patch)
tree680e39c2cf9fa2bdef7059909a759537e8674913
parentb37d420ab5a2c070d7a3b727c4407008be11f912 (diff)
parent7ae773348b34a129794cdabf6a9cf96fab37f554 (diff)
Merge branch 'pks-praefect-datastore-collector-metrics-endpoint-v14.3' into '14-3-stable'
praefect: Backport separate endpoint for datastore collector (v14.3) See merge request gitlab-org/gitaly!4095
-rw-r--r--cmd/gitaly/main.go4
-rw-r--r--cmd/praefect/main.go59
-rw-r--r--cmd/praefect/subcmd_remove_repository_test.go9
-rw-r--r--internal/bootstrap/bootstrap.go74
-rw-r--r--internal/bootstrap/bootstrap_test.go67
-rw-r--r--internal/praefect/config/config.go14
-rw-r--r--internal/praefect/datastore/collector.go40
-rw-r--r--internal/praefect/datastore/collector_test.go34
-rw-r--r--internal/praefect/metrics/prometheus.go12
9 files changed, 230 insertions, 83 deletions
diff --git a/cmd/gitaly/main.go b/cmd/gitaly/main.go
index 3b901c45c..db7c73a06 100644
--- a/cmd/gitaly/main.go
+++ b/cmd/gitaly/main.go
@@ -199,8 +199,6 @@ func run(cfg config.Cfg) error {
return fmt.Errorf("linguist instance creation: %w", err)
}
- b.StopAction = gitalyServerFactory.GracefulStop
-
rubySrv := rubyserver.New(cfg)
if err := rubySrv.Start(); err != nil {
return fmt.Errorf("initialize gitaly-ruby: %v", err)
@@ -303,5 +301,5 @@ func run(cfg config.Cfg) error {
}
}()
- return b.Wait(cfg.GracefulRestartTimeout.Duration())
+ return b.Wait(cfg.GracefulRestartTimeout.Duration(), gitalyServerFactory.GracefulStop)
}
diff --git a/cmd/praefect/main.go b/cmd/praefect/main.go
index ed908551c..a88dc0763 100644
--- a/cmd/praefect/main.go
+++ b/cmd/praefect/main.go
@@ -63,11 +63,13 @@ import (
"flag"
"fmt"
"math/rand"
+ "net/http"
"os"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/v14/internal/backchannel"
"gitlab.com/gitlab-org/gitaly/v14/internal/bootstrap"
@@ -142,7 +144,14 @@ func main() {
logger.Fatalf("%s", err)
}
- if err := run(starterConfigs, conf); err != nil {
+ b, err := bootstrap.New()
+ if err != nil {
+ logger.Fatalf("unable to create a bootstrap: %v", err)
+ }
+
+ dbPromRegistry := prometheus.NewRegistry()
+
+ if err := run(starterConfigs, conf, b, prometheus.DefaultRegisterer, dbPromRegistry); err != nil {
logger.Fatalf("%v", err)
}
}
@@ -185,18 +194,27 @@ func configure(conf config.Config) {
sentry.ConfigureSentry(version.GetVersion(), conf.Sentry)
}
-func run(cfgs []starter.Config, conf config.Config) error {
- nodeLatencyHistogram, err := metrics.RegisterNodeLatency(conf.Prometheus)
+func run(
+ cfgs []starter.Config,
+ conf config.Config,
+ b bootstrap.Listener,
+ promreg prometheus.Registerer,
+ dbPromRegistry interface {
+ prometheus.Registerer
+ prometheus.Gatherer
+ },
+) error {
+ nodeLatencyHistogram, err := metrics.RegisterNodeLatency(conf.Prometheus, promreg)
if err != nil {
return err
}
- delayMetric, err := metrics.RegisterReplicationDelay(conf.Prometheus)
+ delayMetric, err := metrics.RegisterReplicationDelay(conf.Prometheus, promreg)
if err != nil {
return err
}
- latencyMetric, err := metrics.RegisterReplicationLatency(conf.Prometheus)
+ latencyMetric, err := metrics.RegisterReplicationLatency(conf.Prometheus, promreg)
if err != nil {
return err
}
@@ -376,18 +394,21 @@ func run(cfgs []starter.Config, conf config.Config) error {
)
metricsCollectors = append(metricsCollectors, transactionManager, coordinator, repl)
if db != nil {
- prometheus.MustRegister(
- datastore.NewRepositoryStoreCollector(logger, conf.VirtualStorageNames(), db, conf.Prometheus.ScrapeTimeout),
- )
- }
- prometheus.MustRegister(metricsCollectors...)
-
- b, err := bootstrap.New()
- if err != nil {
- return fmt.Errorf("unable to create a bootstrap: %v", err)
+ repositoryStoreCollector := datastore.NewRepositoryStoreCollector(logger, conf.VirtualStorageNames(), db, conf.Prometheus.ScrapeTimeout)
+
+ // Eventually, database-related metrics will always be exported via a separate
+ // endpoint such that it's possible to set a different scraping interval and thus to
+ // reduce database load. For now though, we register the metrics twice, once for the
+ // standard and once for the database-specific endpoint. This is done to ensure a
+ // transitory period where deployments can be moved to the new endpoint without
+ // causing breakage if they still use the old endpoint.
+ dbPromRegistry.MustRegister(repositoryStoreCollector)
+ if !conf.PrometheusExcludeDatabaseFromDefaultMetrics {
+ promreg.MustRegister(repositoryStoreCollector)
+ }
}
+ promreg.MustRegister(metricsCollectors...)
- b.StopAction = srvFactory.GracefulStop
for _, cfg := range cfgs {
srv, err := srvFactory.Create(cfg.IsSecure())
if err != nil {
@@ -405,9 +426,13 @@ func run(cfgs []starter.Config, conf config.Config) error {
return err
}
+ serveMux := http.NewServeMux()
+ serveMux.Handle("/db_metrics", promhttp.HandlerFor(dbPromRegistry, promhttp.HandlerOpts{}))
+
go func() {
if err := monitoring.Start(
monitoring.WithListener(l),
+ monitoring.WithServeMux(serveMux),
monitoring.WithBuildInformation(praefect.GetVersion(), praefect.GetBuildTime())); err != nil {
logger.WithError(err).Errorf("Unable to start prometheus listener: %v", conf.PrometheusListenAddr)
}
@@ -437,12 +462,12 @@ func run(cfgs []starter.Config, conf config.Config) error {
conf.StorageNames(),
conf.Reconciliation.HistogramBuckets,
)
- prometheus.MustRegister(r)
+ promreg.MustRegister(r)
go r.Run(ctx, helper.NewTimerTicker(interval))
}
}
- return b.Wait(conf.GracefulStopTimeout.Duration())
+ return b.Wait(conf.GracefulStopTimeout.Duration(), srvFactory.GracefulStop)
}
func getStarterConfigs(conf config.Config) ([]starter.Config, error) {
diff --git a/cmd/praefect/subcmd_remove_repository_test.go b/cmd/praefect/subcmd_remove_repository_test.go
index 9dab2f0a6..bb56dee3c 100644
--- a/cmd/praefect/subcmd_remove_repository_test.go
+++ b/cmd/praefect/subcmd_remove_repository_test.go
@@ -4,15 +4,16 @@ import (
"flag"
"path/filepath"
"strings"
- "syscall"
"testing"
"time"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v14/client"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/bootstrap"
"gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/service/setup"
"gitlab.com/gitlab-org/gitaly/v14/internal/helper"
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/config"
@@ -108,10 +109,10 @@ func TestRemoveRepository_Exec(t *testing.T) {
starterConfigs, err := getStarterConfigs(conf)
require.NoError(t, err)
stopped := make(chan struct{})
+ bootstrapper := bootstrap.NewNoop()
go func() {
defer close(stopped)
- err := run(starterConfigs, conf)
- assert.EqualError(t, err, `received signal "terminated"`)
+ assert.NoError(t, run(starterConfigs, conf, bootstrapper, prometheus.NewRegistry(), prometheus.NewRegistry()))
}()
cc, err := client.Dial("unix://"+conf.SocketPath, nil)
@@ -229,7 +230,7 @@ func TestRemoveRepository_Exec(t *testing.T) {
requireNoDatabaseInfo(t, db, cmd)
})
- require.NoError(t, syscall.Kill(syscall.Getpid(), syscall.SIGTERM))
+ bootstrapper.Terminate()
<-stopped
}
diff --git a/internal/bootstrap/bootstrap.go b/internal/bootstrap/bootstrap.go
index ef414184d..f1d355926 100644
--- a/internal/bootstrap/bootstrap.go
+++ b/internal/bootstrap/bootstrap.go
@@ -22,11 +22,18 @@ const (
socketReusePortWarning = "Unable to set SO_REUSEPORT: zero downtime upgrades will not work"
)
+// Listener is an interface of the bootstrap manager.
+type Listener interface {
+ // RegisterStarter adds starter to the pool.
+ RegisterStarter(starter Starter)
+ // Start starts all registered starters to accept connections.
+ Start() error
+ // Wait terminates all registered starters.
+ Wait(gracefulTimeout time.Duration, stopAction func()) error
+}
+
// Bootstrap handles graceful upgrades
type Bootstrap struct {
- // StopAction will be invoked during a graceful stop. It must wait until the shutdown is completed
- StopAction func()
-
upgrader upgrader
listenFunc ListenFunc
errChan chan error
@@ -38,6 +45,7 @@ type upgrader interface {
HasParent() bool
Ready() error
Upgrade() error
+ Stop()
}
// New performs tableflip initialization
@@ -151,7 +159,8 @@ func (b *Bootstrap) Start() error {
// Wait will signal process readiness to the parent and than wait for an exit condition
// SIGTERM, SIGINT and a runtime error will trigger an immediate shutdown
// in case of an upgrade there will be a grace period to complete the ongoing requests
-func (b *Bootstrap) Wait(gracefulTimeout time.Duration) error {
+// stopAction will be invoked during a graceful stop. It must wait until the shutdown is completed.
+func (b *Bootstrap) Wait(gracefulTimeout time.Duration, stopAction func()) error {
signals := []os.Signal{syscall.SIGTERM, syscall.SIGINT}
immediateShutdown := make(chan os.Signal, len(signals))
signal.Notify(immediateShutdown, signals...)
@@ -167,24 +176,25 @@ func (b *Bootstrap) Wait(gracefulTimeout time.Duration) error {
// the new process signaled its readiness and we started a graceful stop
// however no further upgrades can be started until this process is running
// we set a grace period and then we force a termination.
- waitError := b.waitGracePeriod(gracefulTimeout, immediateShutdown)
+ waitError := b.waitGracePeriod(gracefulTimeout, immediateShutdown, stopAction)
err = fmt.Errorf("graceful upgrade: %v", waitError)
case s := <-immediateShutdown:
err = fmt.Errorf("received signal %q", s)
+ b.upgrader.Stop()
case err = <-b.errChan:
}
return err
}
-func (b *Bootstrap) waitGracePeriod(gracefulTimeout time.Duration, kill <-chan os.Signal) error {
+func (b *Bootstrap) waitGracePeriod(gracefulTimeout time.Duration, kill <-chan os.Signal, stopAction func()) error {
log.WithField("graceful_timeout", gracefulTimeout).Warn("starting grace period")
allServersDone := make(chan struct{})
go func() {
- if b.StopAction != nil {
- b.StopAction()
+ if stopAction != nil {
+ stopAction()
}
close(allServersDone)
}()
@@ -208,3 +218,51 @@ func (b *Bootstrap) listen(network, path string) (net.Listener, error) {
return b.listenFunc(network, path)
}
+
+// Noop is a bootstrapper that does no additional configurations.
+type Noop struct {
+ starters []Starter
+ shutdown chan struct{}
+ errChan chan error
+}
+
+// NewNoop returns initialized instance of the *Noop.
+func NewNoop() *Noop {
+ return &Noop{shutdown: make(chan struct{})}
+}
+
+// RegisterStarter adds starter to the pool.
+func (n *Noop) RegisterStarter(starter Starter) {
+ n.starters = append(n.starters, starter)
+}
+
+// Start starts all registered starters to accept connections.
+func (n *Noop) Start() error {
+ n.errChan = make(chan error, len(n.starters))
+
+ for _, start := range n.starters {
+ if err := start(net.Listen, n.errChan); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Wait terminates all registered starters.
+func (n *Noop) Wait(_ time.Duration, stopAction func()) error {
+ select {
+ case <-n.shutdown:
+ if stopAction != nil {
+ stopAction()
+ }
+ case err := <-n.errChan:
+ return err
+ }
+
+ return nil
+}
+
+// Terminate unblocks Wait method and executes stopAction call-back passed into it.
+func (n *Noop) Terminate() {
+ close(n.shutdown)
+}
diff --git a/internal/bootstrap/bootstrap_test.go b/internal/bootstrap/bootstrap_test.go
index fce265679..16e463252 100644
--- a/internal/bootstrap/bootstrap_test.go
+++ b/internal/bootstrap/bootstrap_test.go
@@ -26,6 +26,8 @@ func (m *mockUpgrader) Exit() <-chan struct{} {
return m.exit
}
+func (m *mockUpgrader) Stop() {}
+
func (m *mockUpgrader) HasParent() bool {
return m.hasParent
}
@@ -104,10 +106,13 @@ func waitWithTimeout(t *testing.T, waitCh <-chan error, timeout time.Duration) e
}
func TestImmediateTerminationOnSocketError(t *testing.T) {
- b, server := makeBootstrap(t)
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ b, server, stopAction := makeBootstrap(t, ctx)
waitCh := make(chan error)
- go func() { waitCh <- b.Wait(2 * time.Second) }()
+ go func() { waitCh <- b.Wait(2*time.Second, stopAction) }()
require.NoError(t, server.listeners["tcp"].Close(), "Closing first listener")
@@ -119,12 +124,15 @@ func TestImmediateTerminationOnSocketError(t *testing.T) {
func TestImmediateTerminationOnSignal(t *testing.T) {
for _, sig := range []syscall.Signal{syscall.SIGTERM, syscall.SIGINT} {
t.Run(sig.String(), func(t *testing.T) {
- b, server := makeBootstrap(t)
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ b, server, stopAction := makeBootstrap(t, ctx)
done := server.slowRequest(3 * time.Minute)
waitCh := make(chan error)
- go func() { waitCh <- b.Wait(2 * time.Second) }()
+ go func() { waitCh <- b.Wait(2*time.Second, stopAction) }()
// make sure we are inside b.Wait() or we'll kill the test suite
time.Sleep(100 * time.Millisecond)
@@ -146,9 +154,12 @@ func TestImmediateTerminationOnSignal(t *testing.T) {
}
func TestGracefulTerminationStuck(t *testing.T) {
- b, server := makeBootstrap(t)
+ ctx, cancel := testhelper.Context()
+ defer cancel()
- err := testGracefulUpdate(t, server, b, 3*time.Second, 2*time.Second, nil)
+ b, server, stopAction := makeBootstrap(t, ctx)
+
+ err := testGracefulUpdate(t, server, b, 3*time.Second, 2*time.Second, nil, stopAction)
require.Contains(t, err.Error(), "grace period expired")
}
@@ -158,22 +169,26 @@ func TestGracefulTerminationWithSignals(t *testing.T) {
for _, sig := range []syscall.Signal{syscall.SIGTERM, syscall.SIGINT} {
t.Run(sig.String(), func(t *testing.T) {
- b, server := makeBootstrap(t)
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+ b, server, stopAction := makeBootstrap(t, ctx)
err := testGracefulUpdate(t, server, b, 1*time.Second, 2*time.Second, func() {
require.NoError(t, self.Signal(sig))
- })
+ }, stopAction)
require.Contains(t, err.Error(), "force shutdown")
})
}
}
func TestGracefulTerminationServerErrors(t *testing.T) {
- b, server := makeBootstrap(t)
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+ b, server, _ := makeBootstrap(t, ctx)
done := make(chan error, 1)
// This is a simulation of receiving a listener error during waitGracePeriod
- b.StopAction = func() {
+ stopAction := func() {
// we close the unix listener in order to test that the shutdown will not fail, but it keep waiting for the TCP request
require.NoError(t, server.listeners["unix"].Close())
@@ -185,43 +200,45 @@ func TestGracefulTerminationServerErrors(t *testing.T) {
require.NoError(t, server.server.Shutdown(context.Background()))
}
- err := testGracefulUpdate(t, server, b, 3*time.Second, 2*time.Second, nil)
+ err := testGracefulUpdate(t, server, b, 3*time.Second, 2*time.Second, nil, stopAction)
require.Contains(t, err.Error(), "grace period expired")
require.NoError(t, <-done)
}
func TestGracefulTermination(t *testing.T) {
- b, server := makeBootstrap(t)
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+ b, server, _ := makeBootstrap(t, ctx)
// Using server.Close we bypass the graceful shutdown faking a completed shutdown
- b.StopAction = func() { server.server.Close() }
+ stopAction := func() { server.server.Close() }
- err := testGracefulUpdate(t, server, b, 1*time.Second, 2*time.Second, nil)
+ err := testGracefulUpdate(t, server, b, 1*time.Second, 2*time.Second, nil, stopAction)
require.Contains(t, err.Error(), "completed")
}
func TestPortReuse(t *testing.T) {
- var addr string
-
b, err := New()
require.NoError(t, err)
l, err := b.listen("tcp", "localhost:")
require.NoError(t, err, "failed to bind")
- addr = l.Addr().String()
+ addr := l.Addr().String()
_, port, err := net.SplitHostPort(addr)
require.NoError(t, err)
l, err = b.listen("tcp", "localhost:"+port)
require.NoError(t, err, "failed to bind")
require.NoError(t, l.Close())
+
+ b.upgrader.Stop()
}
-func testGracefulUpdate(t *testing.T, server *testServer, b *Bootstrap, waitTimeout, gracefulWait time.Duration, duringGracePeriodCallback func()) error {
+func testGracefulUpdate(t *testing.T, server *testServer, b *Bootstrap, waitTimeout, gracefulWait time.Duration, duringGracePeriodCallback func(), stopAction func()) error {
waitCh := make(chan error)
- go func() { waitCh <- b.Wait(gracefulWait) }()
+ go func() { waitCh <- b.Wait(gracefulWait, stopAction) }()
// Start a slow request to keep the old server from shutting down immediately.
req := server.slowRequest(2 * gracefulWait)
@@ -251,7 +268,7 @@ func testGracefulUpdate(t *testing.T, server *testServer, b *Bootstrap, waitTime
return waitErr
}
-func makeBootstrap(t *testing.T) (*Bootstrap, *testServer) {
+func makeBootstrap(t *testing.T, ctx context.Context) (*Bootstrap, *testServer, func()) {
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(200)
@@ -260,19 +277,21 @@ func makeBootstrap(t *testing.T) (*Bootstrap, *testServer) {
sec, err := strconv.Atoi(r.URL.Query().Get("seconds"))
require.NoError(t, err)
- time.Sleep(time.Duration(sec) * time.Second)
+ select {
+ case <-ctx.Done():
+ case <-time.After(time.Duration(sec) * time.Second):
+ }
w.WriteHeader(200)
})
s := http.Server{Handler: mux}
+ t.Cleanup(func() { testhelper.MustClose(t, &s) })
u := &mockUpgrader{exit: make(chan struct{})}
b, err := _new(u, net.Listen, false)
require.NoError(t, err)
- b.StopAction = func() { require.NoError(t, s.Shutdown(context.Background())) }
-
listeners := make(map[string]net.Listener)
start := func(network, address string) Starter {
return func(listen ListenFunc, errors chan<- error) error {
@@ -312,7 +331,7 @@ func makeBootstrap(t *testing.T) (*Bootstrap, *testServer) {
server: &s,
listeners: listeners,
url: url,
- }
+ }, func() { require.NoError(t, s.Shutdown(context.Background())) }
}
func testAllListeners(t *testing.T, listeners map[string]net.Listener) {
diff --git a/internal/praefect/config/config.go b/internal/praefect/config/config.go
index 6dc66b2b7..de4cb8d0c 100644
--- a/internal/praefect/config/config.go
+++ b/internal/praefect/config/config.go
@@ -117,10 +117,16 @@ type Config struct {
Sentry sentry.Config `toml:"sentry"`
PrometheusListenAddr string `toml:"prometheus_listen_addr"`
Prometheus prometheus.Config `toml:"prometheus"`
- Auth auth.Config `toml:"auth"`
- TLS config.TLS `toml:"tls"`
- DB `toml:"database"`
- Failover Failover `toml:"failover"`
+ // PrometheusExcludeDatabaseFromDefaultMetrics excludes database-related metrics from the
+ // default metrics. If set to `false`, then database metrics will be available both via
+ // `/metrics` and `/db_metrics`. Otherwise, they will only be accessible via `/db_metrics`.
+ // Defaults to `false`. This is used as a transitory configuration key: eventually, database
+ // metrics will always be removed from the standard metrics endpoint.
+ PrometheusExcludeDatabaseFromDefaultMetrics bool `toml:"prometheus_exclude_database_from_default_metrics"`
+ Auth auth.Config `toml:"auth"`
+ TLS config.TLS `toml:"tls"`
+ DB `toml:"database"`
+ Failover Failover `toml:"failover"`
// Keep for legacy reasons: remove after Omnibus has switched
FailoverEnabled bool `toml:"failover_enabled"`
MemoryQueueEnabled bool `toml:"memory_queue_enabled"`
diff --git a/internal/praefect/datastore/collector.go b/internal/praefect/datastore/collector.go
index 71f145e67..40390b47a 100644
--- a/internal/praefect/datastore/collector.go
+++ b/internal/praefect/datastore/collector.go
@@ -11,21 +11,25 @@ import (
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/glsql"
)
-// This is kept for backwards compatibility as some alerting rules depend on this.
-// The unavailable repositories is a more accurate description for the metric and
-// is exported below so we can migrate to it.
-var descReadOnlyRepositories = prometheus.NewDesc(
- "gitaly_praefect_read_only_repositories",
- "Number of repositories in read-only mode within a virtual storage.",
- []string{"virtual_storage"},
- nil,
-)
-
-var descUnavailableRepositories = prometheus.NewDesc(
- "gitaly_praefect_unavailable_repositories",
- "Number of repositories that have no healthy, up to date replicas.",
- []string{"virtual_storage"},
- nil,
+var (
+ // This is kept for backwards compatibility as some alerting rules depend on this.
+ // The unavailable repositories is a more accurate description for the metric and
+ // is exported below so we can migrate to it.
+ descReadOnlyRepositories = prometheus.NewDesc(
+ "gitaly_praefect_read_only_repositories",
+ "Number of repositories in read-only mode within a virtual storage.",
+ []string{"virtual_storage"},
+ nil,
+ )
+
+ descUnavailableRepositories = prometheus.NewDesc(
+ "gitaly_praefect_unavailable_repositories",
+ "Number of repositories that have no healthy, up to date replicas.",
+ []string{"virtual_storage"},
+ nil,
+ )
+
+ descriptions = []*prometheus.Desc{descReadOnlyRepositories, descUnavailableRepositories}
)
// RepositoryStoreCollector collects metrics from the RepositoryStore.
@@ -47,7 +51,9 @@ func NewRepositoryStoreCollector(log logrus.FieldLogger, virtualStorages []strin
}
func (c *RepositoryStoreCollector) Describe(ch chan<- *prometheus.Desc) {
- prometheus.DescribeByCollect(c, ch)
+ for _, desc := range descriptions {
+ ch <- desc
+ }
}
func (c *RepositoryStoreCollector) Collect(ch chan<- prometheus.Metric) {
@@ -61,7 +67,7 @@ func (c *RepositoryStoreCollector) Collect(ch chan<- prometheus.Metric) {
}
for _, vs := range c.virtualStorages {
- for _, desc := range []*prometheus.Desc{descReadOnlyRepositories, descUnavailableRepositories} {
+ for _, desc := range descriptions {
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(unavailableCounts[vs]), vs)
}
}
diff --git a/internal/praefect/datastore/collector_test.go b/internal/praefect/datastore/collector_test.go
index 3514b1239..3cc972e36 100644
--- a/internal/praefect/datastore/collector_test.go
+++ b/internal/praefect/datastore/collector_test.go
@@ -2,14 +2,18 @@ package datastore
import (
"context"
+ "database/sql"
+ "errors"
"fmt"
"strings"
"testing"
"time"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
+ "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/glsql"
"gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
@@ -211,3 +215,33 @@ gitaly_praefect_unavailable_repositories{virtual_storage="virtual-storage-2"} 0
})
}
}
+
+type checkIfQueriedDB struct {
+ queried bool
+}
+
+func (c *checkIfQueriedDB) QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) {
+ c.queried = true
+ return nil, errors.New("QueryContext should not be called")
+}
+
+func (c *checkIfQueriedDB) QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row {
+ c.queried = true
+ return &sql.Row{}
+}
+
+func (c *checkIfQueriedDB) ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
+ c.queried = true
+ return nil, errors.New("ExecContext should not be called")
+}
+
+func TestRepositoryStoreCollector_CollectNotCalledOnRegister(t *testing.T) {
+ logger, _ := test.NewNullLogger()
+
+ var db checkIfQueriedDB
+ c := NewRepositoryStoreCollector(logger, []string{"virtual-storage-1", "virtual-storage-2"}, &db, 2*time.Second)
+ registry := prometheus.NewRegistry()
+ registry.MustRegister(c)
+
+ assert.False(t, db.queried)
+}
diff --git a/internal/praefect/metrics/prometheus.go b/internal/praefect/metrics/prometheus.go
index b09ee9631..88886b574 100644
--- a/internal/praefect/metrics/prometheus.go
+++ b/internal/praefect/metrics/prometheus.go
@@ -9,7 +9,7 @@ import (
// RegisterReplicationDelay creates and registers a prometheus histogram
// to observe replication delay times
-func RegisterReplicationDelay(conf gitalycfgprom.Config) (metrics.HistogramVec, error) {
+func RegisterReplicationDelay(conf gitalycfgprom.Config, registerer prometheus.Registerer) (metrics.HistogramVec, error) {
replicationDelay := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "gitaly",
@@ -20,12 +20,12 @@ func RegisterReplicationDelay(conf gitalycfgprom.Config) (metrics.HistogramVec,
[]string{"type"},
)
- return replicationDelay, prometheus.Register(replicationDelay)
+ return replicationDelay, registerer.Register(replicationDelay)
}
// RegisterReplicationLatency creates and registers a prometheus histogram
// to observe replication latency times
-func RegisterReplicationLatency(conf gitalycfgprom.Config) (metrics.HistogramVec, error) {
+func RegisterReplicationLatency(conf gitalycfgprom.Config, registerer prometheus.Registerer) (metrics.HistogramVec, error) {
replicationLatency := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "gitaly",
@@ -36,12 +36,12 @@ func RegisterReplicationLatency(conf gitalycfgprom.Config) (metrics.HistogramVec
[]string{"type"},
)
- return replicationLatency, prometheus.Register(replicationLatency)
+ return replicationLatency, registerer.Register(replicationLatency)
}
// RegisterNodeLatency creates and registers a prometheus histogram to
// observe internal node latency
-func RegisterNodeLatency(conf gitalycfgprom.Config) (metrics.HistogramVec, error) {
+func RegisterNodeLatency(conf gitalycfgprom.Config, registerer prometheus.Registerer) (metrics.HistogramVec, error) {
nodeLatency := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "gitaly",
@@ -51,7 +51,7 @@ func RegisterNodeLatency(conf gitalycfgprom.Config) (metrics.HistogramVec, error
}, []string{"gitaly_storage"},
)
- return nodeLatency, prometheus.Register(nodeLatency)
+ return nodeLatency, registerer.Register(nodeLatency)
}
var MethodTypeCounter = promauto.NewCounterVec(