diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2020-05-14 10:16:28 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2020-05-15 10:09:11 +0300 |
commit | 068f4821788d23542700ea344939b45bc41da6ec (patch) | |
tree | d1dcb4f239303dcdca538aaa2d9991b8c672b3f5 | |
parent | 4e278bd899cf69c2ec6424f5f77e3d5e2b5dc708 (diff) |
transactions: Implement metrics
We currently have no metrics in the transaction manager, making it hard
to estimate how many transaction succeed, how many fail and how long a
node has to wait until a quorum was reached and the transaction is
committed. This commit thus adds two metrics, one to count the number of
registered, started, invalid and committed transactions and one to
measure the delay between start and finish of the voting process.
-rw-r--r-- | internal/praefect/transaction_test.go | 66 | ||||
-rw-r--r-- | internal/praefect/transactions/manager.go | 53 |
2 files changed, 109 insertions, 10 deletions
diff --git a/internal/praefect/transaction_test.go b/internal/praefect/transaction_test.go index 1f9ce7a39..675de2cc8 100644 --- a/internal/praefect/transaction_test.go +++ b/internal/praefect/transaction_test.go @@ -6,6 +6,8 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/internal/praefect/datastore" "gitlab.com/gitlab-org/gitaly/internal/praefect/transactions" @@ -16,7 +18,7 @@ import ( "google.golang.org/grpc/status" ) -func runPraefectWithTransactionMgr(t *testing.T) (*grpc.ClientConn, *transactions.Manager, testhelper.Cleanup) { +func runPraefectWithTransactionMgr(t *testing.T, opts ...transactions.ManagerOpt) (*grpc.ClientConn, *transactions.Manager, testhelper.Cleanup) { conf := testConfig(1) ds := datastore.Datastore{ @@ -24,14 +26,47 @@ func runPraefectWithTransactionMgr(t *testing.T) (*grpc.ClientConn, *transaction ReplicationEventQueue: datastore.NewMemoryReplicationEventQueue(), } - txMgr := transactions.NewManager() + txMgr := transactions.NewManager(opts...) conn, _, cleanup := runPraefectServer(t, conf, ds, txMgr) return conn, txMgr, cleanup } +func setupMetrics() (*prometheus.CounterVec, []transactions.ManagerOpt) { + counter := prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"status"}) + return counter, []transactions.ManagerOpt{ + transactions.WithCounterMetric(counter), + } +} + +type counterMetrics struct { + registered, started, invalid, committed int +} + +func verifyCounterMetrics(t *testing.T, counter *prometheus.CounterVec, expected counterMetrics) { + t.Helper() + + registered, err := counter.GetMetricWithLabelValues("registered") + require.NoError(t, err) + require.Equal(t, float64(expected.registered), testutil.ToFloat64(registered)) + + started, err := counter.GetMetricWithLabelValues("started") + require.NoError(t, err) + require.Equal(t, float64(expected.started), testutil.ToFloat64(started)) + + invalid, err := counter.GetMetricWithLabelValues("invalid") + require.NoError(t, err) + require.Equal(t, float64(expected.invalid), testutil.ToFloat64(invalid)) + + committed, err := counter.GetMetricWithLabelValues("committed") + require.NoError(t, err) + require.Equal(t, float64(expected.committed), testutil.ToFloat64(committed)) +} + func TestTransactionSucceeds(t *testing.T) { - cc, txMgr, cleanup := runPraefectWithTransactionMgr(t) + counter, opts := setupMetrics() + + cc, txMgr, cleanup := runPraefectWithTransactionMgr(t, opts...) defer cleanup() ctx, cancel := context.WithTimeout(context.Background(), time.Second) @@ -53,6 +88,12 @@ func TestTransactionSucceeds(t *testing.T) { }) require.NoError(t, err) require.Equal(t, gitalypb.StartTransactionResponse_COMMIT, response.State) + + verifyCounterMetrics(t, counter, counterMetrics{ + registered: 1, + started: 1, + committed: 1, + }) } func TestTransactionFailsWithMultipleNodes(t *testing.T) { @@ -67,7 +108,9 @@ func TestTransactionFailsWithMultipleNodes(t *testing.T) { } func TestTransactionFailures(t *testing.T) { - cc, _, cleanup := runPraefectWithTransactionMgr(t) + counter, opts := setupMetrics() + + cc, _, cleanup := runPraefectWithTransactionMgr(t, opts...) defer cleanup() ctx, cancel := context.WithTimeout(context.Background(), time.Second) @@ -83,10 +126,17 @@ func TestTransactionFailures(t *testing.T) { }) require.Error(t, err) require.Equal(t, codes.NotFound, status.Code(err)) + + verifyCounterMetrics(t, counter, counterMetrics{ + started: 1, + invalid: 1, + }) } func TestTransactionCancellation(t *testing.T) { - cc, txMgr, cleanup := runPraefectWithTransactionMgr(t) + counter, opts := setupMetrics() + + cc, txMgr, cleanup := runPraefectWithTransactionMgr(t, opts...) defer cleanup() ctx, cancel := context.WithTimeout(context.Background(), time.Second) @@ -108,4 +158,10 @@ func TestTransactionCancellation(t *testing.T) { }) require.Error(t, err) require.Equal(t, codes.NotFound, status.Code(err)) + + verifyCounterMetrics(t, counter, counterMetrics{ + registered: 1, + started: 1, + invalid: 1, + }) } diff --git a/internal/praefect/transactions/manager.go b/internal/praefect/transactions/manager.go index 1cf68974a..c7f4e647a 100644 --- a/internal/praefect/transactions/manager.go +++ b/internal/praefect/transactions/manager.go @@ -7,25 +7,55 @@ import ( "fmt" "math/rand" "sync" + "time" "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitaly/internal/helper" + "gitlab.com/gitlab-org/gitaly/internal/prometheus/metrics" ) // Manager handles reference transactions for Praefect. It is required in order // for Praefect to handle transactions directly instead of having to reach out // to reference transaction RPCs. type Manager struct { - lock sync.Mutex - transactions map[uint64]string + lock sync.Mutex + transactions map[uint64]string + counterMetric *prometheus.CounterVec + delayMetric metrics.HistogramVec +} + +// ManagerOpt is a self referential option for Manager +type ManagerOpt func(*Manager) + +// WithCounterMetric is an option to set the counter Prometheus metric +func WithCounterMetric(counterMetric *prometheus.CounterVec) ManagerOpt { + return func(mgr *Manager) { + mgr.counterMetric = counterMetric + } +} + +// WithDelayMetric is an option to set the delay Prometheus metric +func WithDelayMetric(delayMetric metrics.HistogramVec) ManagerOpt { + return func(mgr *Manager) { + mgr.delayMetric = delayMetric + } } // NewManager creates a new transactions Manager. -func NewManager() *Manager { - return &Manager{ - transactions: make(map[uint64]string), +func NewManager(opts ...ManagerOpt) *Manager { + mgr := &Manager{ + transactions: make(map[uint64]string), + counterMetric: prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"action"}), + delayMetric: prometheus.NewHistogramVec(prometheus.HistogramOpts{}, []string{"action"}), } + + for _, opt := range opts { + opt(mgr) + } + + return mgr } func (mgr *Manager) log(ctx context.Context) logrus.FieldLogger { @@ -65,6 +95,8 @@ func (mgr *Manager) RegisterTransaction(ctx context.Context, nodes []string) (ui "nodes": nodes, }).Debug("RegisterTransaction") + mgr.counterMetric.WithLabelValues("registered").Inc() + return transactionID, func() { mgr.cancelTransaction(transactionID) }, nil @@ -106,6 +138,14 @@ func (mgr *Manager) verifyTransaction(transactionID uint64, node string, hash [] // In future, it will wait for all clients of a given transaction to start the // transaction and perform a vote. func (mgr *Manager) StartTransaction(ctx context.Context, transactionID uint64, node string, hash []byte) error { + start := time.Now() + defer func() { + delay := time.Since(start) + mgr.delayMetric.WithLabelValues("vote").Observe(delay.Seconds()) + }() + + mgr.counterMetric.WithLabelValues("started").Inc() + mgr.log(ctx).WithFields(logrus.Fields{ "transaction_id": transactionID, "node": node, @@ -118,6 +158,7 @@ func (mgr *Manager) StartTransaction(ctx context.Context, transactionID uint64, "node": node, "hash": hex.EncodeToString(hash), }).WithError(err).Error("StartTransaction: transaction invalid") + mgr.counterMetric.WithLabelValues("invalid").Inc() return err } @@ -127,5 +168,7 @@ func (mgr *Manager) StartTransaction(ctx context.Context, transactionID uint64, "hash": hex.EncodeToString(hash), }).Debug("StartTransaction: transaction committed") + mgr.counterMetric.WithLabelValues("committed").Inc() + return nil } |