diff options
author | John Cai <jcai@gitlab.com> | 2021-12-14 20:26:32 +0300 |
---|---|---|
committer | John Cai <jcai@gitlab.com> | 2021-12-14 20:26:32 +0300 |
commit | 152dd3c663c73f79db4c1ff0f63b9011316e35a0 (patch) | |
tree | feb67c4e3e1d2b5dfa2fcfcc6123d7e91aa32fd0 | |
parent | 0e0aeb5ca4488903f41acd392911bd89d1ad3d6d (diff) | |
parent | 0aa1e36fcf2e4c6f2ece514b523b86cd3177b249 (diff) |
Merge branch 'jc-repositories-missing-primary' into 'master'
praefect: add missing primaries check
See merge request gitlab-org/gitaly!4176
-rw-r--r-- | cmd/praefect/subcmd.go | 1 | ||||
-rw-r--r-- | internal/praefect/checks.go | 41 | ||||
-rw-r--r-- | internal/praefect/checks_test.go | 90 | ||||
-rw-r--r-- | internal/praefect/datastore/collector.go | 10 |
4 files changed, 137 insertions, 5 deletions
diff --git a/cmd/praefect/subcmd.go b/cmd/praefect/subcmd.go index b11d155bd..ccf3f9132 100644 --- a/cmd/praefect/subcmd.go +++ b/cmd/praefect/subcmd.go @@ -47,6 +47,7 @@ var subcommands = map[string]subcmd{ praefect.NewPraefectMigrationCheck, praefect.NewGitalyNodeConnectivityCheck, praefect.NewPostgresReadWriteCheck, + praefect.NewUnavailableReposCheck, ), metadataCmdName: newMetadataSubcommand(os.Stdout), } diff --git a/internal/praefect/checks.go b/internal/praefect/checks.go index 316718e75..9b042907c 100644 --- a/internal/praefect/checks.go +++ b/internal/praefect/checks.go @@ -10,6 +10,7 @@ import ( migrate "github.com/rubenv/sql-migrate" "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/config" + "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore" "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/glsql" "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/migrations" "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/nodes" @@ -140,6 +141,46 @@ func NewPostgresReadWriteCheck(conf config.Config, w io.Writer, quiet bool) *Che } } +// NewUnavailableReposCheck returns a check that finds the number of repositories without a valid primary +func NewUnavailableReposCheck(conf config.Config, w io.Writer, quiet bool) *Check { + return &Check{ + Name: "unavailable repositories", + Description: "lists repositories that are missing a valid primary, hence rendering them unavailable", + Run: func(ctx context.Context) error { + db, err := glsql.OpenDB(ctx, conf.DB) + if err != nil { + return fmt.Errorf("error opening database connection: %w", err) + } + defer db.Close() + + unavailableRepositories, err := datastore.CountUnavailableRepositories( + ctx, + db, + conf.VirtualStorageNames(), + ) + if err != nil { + return err + } + + if len(unavailableRepositories) == 0 { + logMessage(quiet, w, "All repositories are available.") + return nil + } + + for virtualStorage, unavailableCount := range unavailableRepositories { + format := "virtual-storage %q has %d repositories that are unavailable." + if unavailableCount == 1 { + format = "virtual-storage %q has %d repository that is unavailable." + } + logMessage(quiet, w, format, virtualStorage, unavailableCount) + } + + return errors.New("repositories unavailable") + }, + Severity: Warning, + } +} + func logMessage(quiet bool, w io.Writer, format string, a ...interface{}) { if quiet { return diff --git a/internal/praefect/checks_test.go b/internal/praefect/checks_test.go index 3263c9e83..5b1481ba4 100644 --- a/internal/praefect/checks_test.go +++ b/internal/praefect/checks_test.go @@ -3,6 +3,7 @@ package praefect import ( "bytes" "context" + "errors" "fmt" "io" "net" @@ -19,6 +20,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/migrations" "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/nodes" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testdb" "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb" "google.golang.org/grpc" "google.golang.org/grpc/health" @@ -421,3 +423,91 @@ func TestPostgresReadWriteCheck(t *testing.T) { }) } } + +func TestNewUnavailableReposCheck(t *testing.T) { + conf := config.Config{ + VirtualStorages: []*config.VirtualStorage{ + { + Name: "virtual-storage-1", + Nodes: []*config.Node{ + {Storage: "storage-0"}, + {Storage: "storage-1"}, + {Storage: "storage-2"}, + }, + }, + }, + } + + testCases := []struct { + desc string + healthyNodes map[string]map[string][]string + expectedMsg string + expectedErr error + }{ + { + desc: "all repos available", + healthyNodes: map[string]map[string][]string{ + "praefect-0": {"virtual-storage-1": []string{"storage-0", "storage-1", "storage-2"}}, + }, + expectedMsg: "All repositories are available.\n", + expectedErr: nil, + }, + { + desc: "one unavailable", + healthyNodes: map[string]map[string][]string{ + "praefect-0": {"virtual-storage-1": []string{"storage-1", "storage-2"}}, + }, + expectedMsg: "virtual-storage \"virtual-storage-1\" has 1 repository that is unavailable.\n", + expectedErr: errors.New("repositories unavailable"), + }, + { + desc: "three unavailable", + healthyNodes: map[string]map[string][]string{ + "praefect-0": {"virtual-storage-1": []string{}}, + }, + expectedMsg: "virtual-storage \"virtual-storage-1\" has 3 repositories that are unavailable.\n", + expectedErr: errors.New("repositories unavailable"), + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + ctx, cancel := testhelper.Context() + defer cancel() + + db := glsql.NewDB(t) + dbCfg := glsql.GetDBConfig(t, db.Name) + conf.DB = dbCfg + + rs := datastore.NewPostgresRepositoryStore(db, nil) + for path, storage := range map[string]string{ + "repo-0": "storage-0", + "repo-1": "storage-1", + "repo-2": "storage-2", + } { + repositoryID, err := rs.ReserveRepositoryID(ctx, "virtual-storage-1", path) + require.NoError(t, err) + require.NoError(t, rs.CreateRepository( + ctx, + repositoryID, + "virtual-storage-1", + path, + path, + storage, + nil, nil, true, false, + )) + + require.NoError(t, err) + require.NoError(t, rs.SetGeneration(ctx, repositoryID, storage, path, 1)) + require.NoError(t, err) + } + + testdb.SetHealthyNodes(t, ctx, db, tc.healthyNodes) + var stdout bytes.Buffer + check := NewUnavailableReposCheck(conf, &stdout, false) + + assert.Equal(t, tc.expectedErr, check.Run(ctx)) + assert.Equal(t, tc.expectedMsg, stdout.String()) + }) + } +} diff --git a/internal/praefect/datastore/collector.go b/internal/praefect/datastore/collector.go index a99f6514d..54e090087 100644 --- a/internal/praefect/datastore/collector.go +++ b/internal/praefect/datastore/collector.go @@ -62,7 +62,7 @@ func (c *RepositoryStoreCollector) Collect(ch chan<- prometheus.Metric) { ctx, cancel := context.WithTimeout(context.TODO(), c.timeout) defer cancel() - unavailableCounts, err := c.queryMetrics(ctx) + unavailableCounts, err := CountUnavailableRepositories(ctx, c.db, c.virtualStorages) if err != nil { c.log.WithError(err).Error("failed collecting read-only repository count metric") return @@ -75,11 +75,11 @@ func (c *RepositoryStoreCollector) Collect(ch chan<- prometheus.Metric) { } } -// queryMetrics queries the number of unavailable repositories from the database. +// CountUnavailableRepositories queries the number of unavailable repositories from the database. // A repository is unavailable when it has no replicas that can act as a primary, indicating // they are either unhealthy or out of date. -func (c *RepositoryStoreCollector) queryMetrics(ctx context.Context) (map[string]int, error) { - rows, err := c.db.QueryContext(ctx, ` +func CountUnavailableRepositories(ctx context.Context, db glsql.Querier, virtualStorages []string) (map[string]int, error) { + rows, err := db.QueryContext(ctx, ` SELECT virtual_storage, COUNT(*) FROM repositories WHERE NOT EXISTS ( @@ -87,7 +87,7 @@ WHERE NOT EXISTS ( WHERE valid_primaries.repository_id = repositories.repository_id ) AND repositories.virtual_storage = ANY($1) GROUP BY virtual_storage - `, pq.StringArray(c.virtualStorages)) + `, pq.StringArray(virtualStorages)) if err != nil { return nil, fmt.Errorf("query: %w", err) } |