Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZeger-Jan van de Weg <git@zjvandeweg.nl>2021-07-13 15:06:08 +0300
committerZeger-Jan van de Weg <git@zjvandeweg.nl>2021-07-13 15:06:08 +0300
commit40511f7a14ded77c826809d054d740a66e1c106f (patch)
tree8b83a010c70e4402478774d921aae0e02f0434f9
parentd4ea957f6131538cd78e490a585ea3a455251064 (diff)
parent12061b1c3cfba4c14cb89200ced7e814aa06b813 (diff)
Merge branch 'smh-unavailable-repos-metric' into 'master'
Update read-only repository count metric to account for lazy failover See merge request gitlab-org/gitaly!3548
-rw-r--r--cmd/praefect/main.go7
-rw-r--r--internal/praefect/datastore/collector.go101
-rw-r--r--internal/praefect/datastore/collector_test.go230
3 files changed, 185 insertions, 153 deletions
diff --git a/cmd/praefect/main.go b/cmd/praefect/main.go
index 2418b8a90..a906ee21c 100644
--- a/cmd/praefect/main.go
+++ b/cmd/praefect/main.go
@@ -401,12 +401,7 @@ func run(cfgs []starter.Config, conf config.Config) error {
metricsCollectors = append(metricsCollectors, transactionManager, coordinator, repl)
if db != nil {
prometheus.MustRegister(
- datastore.NewRepositoryStoreCollector(
- logger,
- conf.VirtualStorageNames(),
- db,
- conf.Failover.ElectionStrategy == config.ElectionStrategyPerRepository,
- ),
+ datastore.NewRepositoryStoreCollector(logger, conf.VirtualStorageNames(), db),
)
}
prometheus.MustRegister(metricsCollectors...)
diff --git a/internal/praefect/datastore/collector.go b/internal/praefect/datastore/collector.go
index 098c46dfb..dc9500b47 100644
--- a/internal/praefect/datastore/collector.go
+++ b/internal/praefect/datastore/collector.go
@@ -10,6 +10,9 @@ import (
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/glsql"
)
+// This is kept for backwards compatibility as some alerting rules depend on this.
+// The unavailable repositories is a more accurate description for the metric and
+// is exported below so we can migrate to it.
var descReadOnlyRepositories = prometheus.NewDesc(
"gitaly_praefect_read_only_repositories",
"Number of repositories in read-only mode within a virtual storage.",
@@ -17,21 +20,26 @@ var descReadOnlyRepositories = prometheus.NewDesc(
nil,
)
+var descUnavailableRepositories = prometheus.NewDesc(
+ "gitaly_praefect_unavailable_repositories",
+ "Number of repositories that have no healthy, up to date replicas.",
+ []string{"virtual_storage"},
+ nil,
+)
+
// RepositoryStoreCollector collects metrics from the RepositoryStore.
type RepositoryStoreCollector struct {
- log logrus.FieldLogger
- db glsql.Querier
- virtualStorages []string
- repositoryScoped bool
+ log logrus.FieldLogger
+ db glsql.Querier
+ virtualStorages []string
}
// NewRepositoryStoreCollector returns a new collector.
-func NewRepositoryStoreCollector(log logrus.FieldLogger, virtualStorages []string, db glsql.Querier, repositoryScoped bool) *RepositoryStoreCollector {
+func NewRepositoryStoreCollector(log logrus.FieldLogger, virtualStorages []string, db glsql.Querier) *RepositoryStoreCollector {
return &RepositoryStoreCollector{
- log: log.WithField("component", "RepositoryStoreCollector"),
- db: db,
- virtualStorages: virtualStorages,
- repositoryScoped: repositoryScoped,
+ log: log.WithField("component", "RepositoryStoreCollector"),
+ db: db,
+ virtualStorages: virtualStorages,
}
}
@@ -40,76 +48,39 @@ func (c *RepositoryStoreCollector) Describe(ch chan<- *prometheus.Desc) {
}
func (c *RepositoryStoreCollector) Collect(ch chan<- prometheus.Metric) {
- readOnlyCounts, err := c.queryMetrics(context.TODO())
+ unavailableCounts, err := c.queryMetrics(context.TODO())
if err != nil {
c.log.WithError(err).Error("failed collecting read-only repository count metric")
return
}
for _, vs := range c.virtualStorages {
- ch <- prometheus.MustNewConstMetric(descReadOnlyRepositories, prometheus.GaugeValue, float64(readOnlyCounts[vs]), vs)
+ for _, desc := range []*prometheus.Desc{descReadOnlyRepositories, descUnavailableRepositories} {
+ ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(unavailableCounts[vs]), vs)
+ }
}
}
-// queryMetrics queries the number of read-only repositories from the database.
-// A repository is in read-only mode when its primary storage is not on the latest
-// generation.
-//
-// There are two variants on the query:
-//
-// 1. virtualStorageScopedQuery considers virtual storage scoped primaries. Virtual storage's
-// primary is stored in the `shard_primaries` table in the `node_name` column.
-// 2. repositoryScopedQuery considers repository specific primaries. Repository scoped
-// primaries are stored in the `primary` column in the `repositories` table.
-//
-// Both queries cross-reference the `repositories` and `storage_repositories` tables
-// to see if the primary storage of the repository is on the latest generation. If not,
-// it's added to the returned count.
-//
-// The query operating on virtual storage scoped primaries will be dropped once the migration
-// to repository scoped primaries is finished.
+// queryMetrics queries the number of unavailable repositories from the database.
+// A repository is unavailable when it has no replicas that can act as a primary, indicating
+// they are either unhealthy or out of date.
func (c *RepositoryStoreCollector) queryMetrics(ctx context.Context) (map[string]int, error) {
- const virtualStorageScopedQuery = `
-SELECT repositories.virtual_storage, COUNT(*)
+ rows, err := c.db.QueryContext(ctx, `
+SELECT virtual_storage, COUNT(*)
FROM repositories
-LEFT JOIN shard_primaries ON
- shard_primaries.shard_name = repositories.virtual_storage AND
- shard_primaries.demoted = false
-LEFT JOIN storage_repositories ON
- repositories.virtual_storage = storage_repositories.virtual_storage AND
- repositories.relative_path = storage_repositories.relative_path AND
- shard_primaries.node_name = storage_repositories.storage
-WHERE
- COALESCE(storage_repositories.generation, -1) < repositories.generation AND
- repositories.virtual_storage = ANY($1)
-GROUP BY repositories.virtual_storage;
- `
-
- const repositoryScopedQuery = `
-SELECT repositories.virtual_storage, COUNT(*)
-FROM repositories
-LEFT JOIN storage_repositories ON
- repositories.virtual_storage = storage_repositories.virtual_storage AND
- repositories.relative_path = storage_repositories.relative_path AND
- repositories.primary = storage_repositories.storage
-WHERE
- COALESCE(storage_repositories.generation, -1) < repositories.generation AND
- repositories.virtual_storage = ANY($1)
-GROUP BY repositories.virtual_storage
-`
-
- query := virtualStorageScopedQuery
- if c.repositoryScoped {
- query = repositoryScopedQuery
- }
-
- rows, err := c.db.QueryContext(ctx, query, pq.StringArray(c.virtualStorages))
+WHERE NOT EXISTS (
+ SELECT FROM valid_primaries
+ WHERE valid_primaries.virtual_storage = repositories.virtual_storage
+ AND valid_primaries.relative_path = repositories.relative_path
+) AND repositories.virtual_storage = ANY($1)
+GROUP BY virtual_storage
+ `, pq.StringArray(c.virtualStorages))
if err != nil {
return nil, fmt.Errorf("query: %w", err)
}
defer rows.Close()
- vsReadOnly := make(map[string]int)
+ vsUnavailable := make(map[string]int)
for rows.Next() {
var vs string
var count int
@@ -118,8 +89,8 @@ GROUP BY repositories.virtual_storage
return nil, fmt.Errorf("scan: %w", err)
}
- vsReadOnly[vs] = count
+ vsUnavailable[vs] = count
}
- return vsReadOnly, rows.Err()
+ return vsUnavailable, rows.Err()
}
diff --git a/internal/praefect/datastore/collector_test.go b/internal/praefect/datastore/collector_test.go
index 65248605e..f840a6749 100644
--- a/internal/praefect/datastore/collector_test.go
+++ b/internal/praefect/datastore/collector_test.go
@@ -17,107 +17,173 @@ func TestRepositoryStoreCollector(t *testing.T) {
ctx, cancel := testhelper.Context()
defer cancel()
- db := getDB(t)
- rs := NewPostgresRepositoryStore(db, nil)
+ type replicas map[string]struct {
+ generation int
+ assigned bool
+ }
- state := map[string]map[string]map[string]int{
- "some-read-only": {
- "read-only": {
- "vs-primary": 0,
- "repo-primary": 0,
- "secondary": 1,
- },
- "writable": {
- "vs-primary": 1,
- "repo-primary": 1,
- "secondary": 1,
+ type repositories []struct {
+ deleted bool
+ relativePath string
+ replicas replicas
+ }
+
+ for _, tc := range []struct {
+ desc string
+ healthyNodes []string
+ repositories repositories
+ count int
+ }{
+ {
+ desc: "no repositories",
+ },
+ {
+ desc: "deleted repositories are not considered unavailable",
+ repositories: repositories{
+ {
+ deleted: true,
+ relativePath: "repository-1",
+ replicas: replicas{
+ "storage-1": {generation: 0},
+ },
+ },
},
- "repo-writable": {
- "vs-primary": 0,
- "repo-primary": 1,
- "secondary": 1,
+ },
+ {
+ desc: "repositories without any healthy replicas are counted",
+ repositories: repositories{
+ {
+ relativePath: "repository-1",
+ replicas: replicas{
+ "storage-1": {generation: 0},
+ },
+ },
},
+ count: 1,
},
- "all-writable": {
- "writable": {
- "vs-primary": 0,
- "repo-primary": 0,
+ {
+ desc: "repositories with healthy replicas are not counted",
+ healthyNodes: []string{"storage-1"},
+ repositories: repositories{
+ {
+ relativePath: "repository-1",
+ replicas: replicas{
+ "storage-1": {generation: 0},
+ },
+ },
},
},
- "unconfigured": {
- "read-only": {
- "secondary": 1,
+ {
+ desc: "repositories with only outdated healthy replicas are counted",
+ healthyNodes: []string{"storage-1"},
+ repositories: repositories{
+ {
+ relativePath: "repository-1",
+ replicas: replicas{
+ "storage-1": {generation: 0},
+ "storage-2": {generation: 1},
+ },
+ },
},
+ count: 1,
},
- "no-records": {},
- "no-primary": {
- "read-only": {
- "secondary": 0,
+ {
+ desc: "repositories with unassigned fully up to date healthy replicas are not counted",
+ healthyNodes: []string{"storage-2"},
+ repositories: repositories{
+ {
+ relativePath: "repository-1",
+ replicas: replicas{
+ "storage-1": {generation: 1, assigned: true},
+ "storage-2": {generation: 1},
+ },
+ },
},
},
- }
- for virtualStorage, relativePaths := range state {
- demoted := false
- if virtualStorage == "no-primary" {
- demoted = true
- }
- _, err := db.ExecContext(ctx, `
- INSERT INTO shard_primaries (shard_name, node_name, elected_by_praefect, elected_at, demoted)
- VALUES ($1, 'vs-primary', 'not-needed', now(), $2)
- `, virtualStorage, demoted,
- )
- require.NoError(t, err)
-
- for relativePath, storages := range relativePaths {
- if virtualStorage != "no-primary" {
- _, err := db.ExecContext(ctx, `
- INSERT INTO repositories (virtual_storage, relative_path, "primary")
- VALUES ($1, $2, 'repo-primary')
- `, virtualStorage, relativePath,
- )
- require.NoError(t, err)
- }
-
- for storage, generation := range storages {
- require.NoError(t, rs.SetGeneration(ctx, virtualStorage, relativePath, storage, generation))
- }
- }
- }
-
- var virtualStorages []string
- for vs := range state {
- if vs == "unconfigured" {
- continue
- }
-
- virtualStorages = append(virtualStorages, vs)
- }
-
- for _, tc := range []struct {
- desc string
- repositoryScoped bool
- someReadOnlyCount int
- }{
{
- desc: "repository scoped",
- someReadOnlyCount: 1,
- repositoryScoped: true,
+ desc: "repositories with unassigned, outdated replicas is not unavailable",
+ healthyNodes: []string{"storage-1"},
+ repositories: repositories{
+ {
+ relativePath: "repository-1",
+ replicas: replicas{
+ "storage-1": {generation: 1, assigned: true},
+ "storage-2": {generation: 0},
+ },
+ },
+ },
},
{
- desc: "virtual storage scoped",
- someReadOnlyCount: 2,
+ desc: "multiple unavailable repositories are counted correctly",
+ healthyNodes: []string{"storage-2"},
+ repositories: repositories{
+ {
+ relativePath: "repository-1",
+ replicas: replicas{
+ "storage-1": {generation: 1},
+ "storage-2": {generation: 0},
+ },
+ },
+ {
+ relativePath: "repository-2",
+ replicas: replicas{
+ "storage-1": {generation: 1},
+ "storage-2": {generation: 0},
+ },
+ },
+ },
+ count: 2,
},
} {
t.Run(tc.desc, func(t *testing.T) {
- c := NewRepositoryStoreCollector(logrus.New(), virtualStorages, db, tc.repositoryScoped)
+ tx, err := getDB(t).Begin()
+ require.NoError(t, err)
+ defer tx.Rollback()
+
+ testhelper.SetHealthyNodes(t, ctx, tx, map[string]map[string][]string{
+ "praefect-0": {"virtual-storage-1": tc.healthyNodes},
+ })
+
+ for _, repository := range tc.repositories {
+ if !repository.deleted {
+ _, err := tx.ExecContext(ctx, `
+ INSERT INTO repositories (virtual_storage, relative_path)
+ VALUES ('virtual-storage-1', $1)
+ `, repository.relativePath,
+ )
+ require.NoError(t, err)
+ }
+
+ for storage, replica := range repository.replicas {
+ if replica.assigned {
+ _, err := tx.ExecContext(ctx, `
+ INSERT INTO repository_assignments (virtual_storage, relative_path, storage)
+ VALUES ('virtual-storage-1', $1, $2)
+ `, repository.relativePath, storage,
+ )
+ require.NoError(t, err)
+ }
+
+ _, err := tx.ExecContext(ctx, `
+ INSERT INTO storage_repositories (virtual_storage, relative_path, storage, generation)
+ VALUES ('virtual-storage-1', $1, $2, $3)
+ `, repository.relativePath, storage, replica.generation,
+ )
+ require.NoError(t, err)
+ }
+ }
+
+ c := NewRepositoryStoreCollector(logrus.New(), []string{"virtual-storage-1", "virtual-storage-2"}, tx)
require.NoError(t, testutil.CollectAndCompare(c, strings.NewReader(fmt.Sprintf(`
# HELP gitaly_praefect_read_only_repositories Number of repositories in read-only mode within a virtual storage.
# TYPE gitaly_praefect_read_only_repositories gauge
-gitaly_praefect_read_only_repositories{virtual_storage="all-writable"} 0
-gitaly_praefect_read_only_repositories{virtual_storage="no-records"} 0
-gitaly_praefect_read_only_repositories{virtual_storage="no-primary"} 1
-gitaly_praefect_read_only_repositories{virtual_storage="some-read-only"} %d
-`, tc.someReadOnlyCount))))
+gitaly_praefect_read_only_repositories{virtual_storage="virtual-storage-1"} %d
+gitaly_praefect_read_only_repositories{virtual_storage="virtual-storage-2"} 0
+# HELP gitaly_praefect_unavailable_repositories Number of repositories that have no healthy, up to date replicas.
+# TYPE gitaly_praefect_unavailable_repositories gauge
+gitaly_praefect_unavailable_repositories{virtual_storage="virtual-storage-1"} %d
+gitaly_praefect_unavailable_repositories{virtual_storage="virtual-storage-2"} 0
+ `, tc.count, tc.count))))
})
}
}