1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
package datastore
import (
"context"
"fmt"
"time"
"github.com/lib/pq"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/glsql"
)
// This is kept for backwards compatibility as some alerting rules depend on this.
// The unavailable repositories is a more accurate description for the metric and
// is exported below so we can migrate to it.
var descReadOnlyRepositories = prometheus.NewDesc(
"gitaly_praefect_read_only_repositories",
"Number of repositories in read-only mode within a virtual storage.",
[]string{"virtual_storage"},
nil,
)
var descUnavailableRepositories = prometheus.NewDesc(
"gitaly_praefect_unavailable_repositories",
"Number of repositories that have no healthy, up to date replicas.",
[]string{"virtual_storage"},
nil,
)
// RepositoryStoreCollector collects metrics from the RepositoryStore.
type RepositoryStoreCollector struct {
log logrus.FieldLogger
db glsql.Querier
virtualStorages []string
timeout time.Duration
}
// NewRepositoryStoreCollector returns a new collector.
func NewRepositoryStoreCollector(log logrus.FieldLogger, virtualStorages []string, db glsql.Querier, timeout time.Duration) *RepositoryStoreCollector {
return &RepositoryStoreCollector{
log: log.WithField("component", "RepositoryStoreCollector"),
db: db,
virtualStorages: virtualStorages,
timeout: timeout,
}
}
func (c *RepositoryStoreCollector) Describe(ch chan<- *prometheus.Desc) {
prometheus.DescribeByCollect(c, ch)
}
func (c *RepositoryStoreCollector) Collect(ch chan<- prometheus.Metric) {
ctx, cancel := context.WithTimeout(context.TODO(), c.timeout)
defer cancel()
unavailableCounts, err := c.queryMetrics(ctx)
if err != nil {
c.log.WithError(err).Error("failed collecting read-only repository count metric")
return
}
for _, vs := range c.virtualStorages {
for _, desc := range []*prometheus.Desc{descReadOnlyRepositories, descUnavailableRepositories} {
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(unavailableCounts[vs]), vs)
}
}
}
// queryMetrics queries the number of unavailable repositories from the database.
// A repository is unavailable when it has no replicas that can act as a primary, indicating
// they are either unhealthy or out of date.
func (c *RepositoryStoreCollector) queryMetrics(ctx context.Context) (map[string]int, error) {
rows, err := c.db.QueryContext(ctx, `
SELECT virtual_storage, COUNT(*)
FROM repositories
WHERE NOT EXISTS (
SELECT FROM valid_primaries
WHERE valid_primaries.virtual_storage = repositories.virtual_storage
AND valid_primaries.relative_path = repositories.relative_path
) AND repositories.virtual_storage = ANY($1)
GROUP BY virtual_storage
`, pq.StringArray(c.virtualStorages))
if err != nil {
return nil, fmt.Errorf("query: %w", err)
}
defer rows.Close()
vsUnavailable := make(map[string]int)
for rows.Next() {
var vs string
var count int
if err := rows.Scan(&vs, &count); err != nil {
return nil, fmt.Errorf("scan: %w", err)
}
vsUnavailable[vs] = count
}
return vsUnavailable, rows.Err()
}
|