Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Cai <jcai@gitlab.com>2021-12-14 20:26:32 +0300
committerJohn Cai <jcai@gitlab.com>2021-12-14 20:26:32 +0300
commit152dd3c663c73f79db4c1ff0f63b9011316e35a0 (patch)
treefeb67c4e3e1d2b5dfa2fcfcc6123d7e91aa32fd0
parent0e0aeb5ca4488903f41acd392911bd89d1ad3d6d (diff)
parent0aa1e36fcf2e4c6f2ece514b523b86cd3177b249 (diff)
Merge branch 'jc-repositories-missing-primary' into 'master'
praefect: add missing primaries check See merge request gitlab-org/gitaly!4176
-rw-r--r--cmd/praefect/subcmd.go1
-rw-r--r--internal/praefect/checks.go41
-rw-r--r--internal/praefect/checks_test.go90
-rw-r--r--internal/praefect/datastore/collector.go10
4 files changed, 137 insertions, 5 deletions
diff --git a/cmd/praefect/subcmd.go b/cmd/praefect/subcmd.go
index b11d155bd..ccf3f9132 100644
--- a/cmd/praefect/subcmd.go
+++ b/cmd/praefect/subcmd.go
@@ -47,6 +47,7 @@ var subcommands = map[string]subcmd{
praefect.NewPraefectMigrationCheck,
praefect.NewGitalyNodeConnectivityCheck,
praefect.NewPostgresReadWriteCheck,
+ praefect.NewUnavailableReposCheck,
),
metadataCmdName: newMetadataSubcommand(os.Stdout),
}
diff --git a/internal/praefect/checks.go b/internal/praefect/checks.go
index 316718e75..9b042907c 100644
--- a/internal/praefect/checks.go
+++ b/internal/praefect/checks.go
@@ -10,6 +10,7 @@ import (
migrate "github.com/rubenv/sql-migrate"
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/config"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore"
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/glsql"
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/migrations"
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/nodes"
@@ -140,6 +141,46 @@ func NewPostgresReadWriteCheck(conf config.Config, w io.Writer, quiet bool) *Che
}
}
+// NewUnavailableReposCheck returns a check that finds the number of repositories without a valid primary
+func NewUnavailableReposCheck(conf config.Config, w io.Writer, quiet bool) *Check {
+ return &Check{
+ Name: "unavailable repositories",
+ Description: "lists repositories that are missing a valid primary, hence rendering them unavailable",
+ Run: func(ctx context.Context) error {
+ db, err := glsql.OpenDB(ctx, conf.DB)
+ if err != nil {
+ return fmt.Errorf("error opening database connection: %w", err)
+ }
+ defer db.Close()
+
+ unavailableRepositories, err := datastore.CountUnavailableRepositories(
+ ctx,
+ db,
+ conf.VirtualStorageNames(),
+ )
+ if err != nil {
+ return err
+ }
+
+ if len(unavailableRepositories) == 0 {
+ logMessage(quiet, w, "All repositories are available.")
+ return nil
+ }
+
+ for virtualStorage, unavailableCount := range unavailableRepositories {
+ format := "virtual-storage %q has %d repositories that are unavailable."
+ if unavailableCount == 1 {
+ format = "virtual-storage %q has %d repository that is unavailable."
+ }
+ logMessage(quiet, w, format, virtualStorage, unavailableCount)
+ }
+
+ return errors.New("repositories unavailable")
+ },
+ Severity: Warning,
+ }
+}
+
func logMessage(quiet bool, w io.Writer, format string, a ...interface{}) {
if quiet {
return
diff --git a/internal/praefect/checks_test.go b/internal/praefect/checks_test.go
index 3263c9e83..5b1481ba4 100644
--- a/internal/praefect/checks_test.go
+++ b/internal/praefect/checks_test.go
@@ -3,6 +3,7 @@ package praefect
import (
"bytes"
"context"
+ "errors"
"fmt"
"io"
"net"
@@ -19,6 +20,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/datastore/migrations"
"gitlab.com/gitlab-org/gitaly/v14/internal/praefect/nodes"
"gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testdb"
"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
"google.golang.org/grpc"
"google.golang.org/grpc/health"
@@ -421,3 +423,91 @@ func TestPostgresReadWriteCheck(t *testing.T) {
})
}
}
+
+func TestNewUnavailableReposCheck(t *testing.T) {
+ conf := config.Config{
+ VirtualStorages: []*config.VirtualStorage{
+ {
+ Name: "virtual-storage-1",
+ Nodes: []*config.Node{
+ {Storage: "storage-0"},
+ {Storage: "storage-1"},
+ {Storage: "storage-2"},
+ },
+ },
+ },
+ }
+
+ testCases := []struct {
+ desc string
+ healthyNodes map[string]map[string][]string
+ expectedMsg string
+ expectedErr error
+ }{
+ {
+ desc: "all repos available",
+ healthyNodes: map[string]map[string][]string{
+ "praefect-0": {"virtual-storage-1": []string{"storage-0", "storage-1", "storage-2"}},
+ },
+ expectedMsg: "All repositories are available.\n",
+ expectedErr: nil,
+ },
+ {
+ desc: "one unavailable",
+ healthyNodes: map[string]map[string][]string{
+ "praefect-0": {"virtual-storage-1": []string{"storage-1", "storage-2"}},
+ },
+ expectedMsg: "virtual-storage \"virtual-storage-1\" has 1 repository that is unavailable.\n",
+ expectedErr: errors.New("repositories unavailable"),
+ },
+ {
+ desc: "three unavailable",
+ healthyNodes: map[string]map[string][]string{
+ "praefect-0": {"virtual-storage-1": []string{}},
+ },
+ expectedMsg: "virtual-storage \"virtual-storage-1\" has 3 repositories that are unavailable.\n",
+ expectedErr: errors.New("repositories unavailable"),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ db := glsql.NewDB(t)
+ dbCfg := glsql.GetDBConfig(t, db.Name)
+ conf.DB = dbCfg
+
+ rs := datastore.NewPostgresRepositoryStore(db, nil)
+ for path, storage := range map[string]string{
+ "repo-0": "storage-0",
+ "repo-1": "storage-1",
+ "repo-2": "storage-2",
+ } {
+ repositoryID, err := rs.ReserveRepositoryID(ctx, "virtual-storage-1", path)
+ require.NoError(t, err)
+ require.NoError(t, rs.CreateRepository(
+ ctx,
+ repositoryID,
+ "virtual-storage-1",
+ path,
+ path,
+ storage,
+ nil, nil, true, false,
+ ))
+
+ require.NoError(t, err)
+ require.NoError(t, rs.SetGeneration(ctx, repositoryID, storage, path, 1))
+ require.NoError(t, err)
+ }
+
+ testdb.SetHealthyNodes(t, ctx, db, tc.healthyNodes)
+ var stdout bytes.Buffer
+ check := NewUnavailableReposCheck(conf, &stdout, false)
+
+ assert.Equal(t, tc.expectedErr, check.Run(ctx))
+ assert.Equal(t, tc.expectedMsg, stdout.String())
+ })
+ }
+}
diff --git a/internal/praefect/datastore/collector.go b/internal/praefect/datastore/collector.go
index a99f6514d..54e090087 100644
--- a/internal/praefect/datastore/collector.go
+++ b/internal/praefect/datastore/collector.go
@@ -62,7 +62,7 @@ func (c *RepositoryStoreCollector) Collect(ch chan<- prometheus.Metric) {
ctx, cancel := context.WithTimeout(context.TODO(), c.timeout)
defer cancel()
- unavailableCounts, err := c.queryMetrics(ctx)
+ unavailableCounts, err := CountUnavailableRepositories(ctx, c.db, c.virtualStorages)
if err != nil {
c.log.WithError(err).Error("failed collecting read-only repository count metric")
return
@@ -75,11 +75,11 @@ func (c *RepositoryStoreCollector) Collect(ch chan<- prometheus.Metric) {
}
}
-// queryMetrics queries the number of unavailable repositories from the database.
+// CountUnavailableRepositories queries the number of unavailable repositories from the database.
// A repository is unavailable when it has no replicas that can act as a primary, indicating
// they are either unhealthy or out of date.
-func (c *RepositoryStoreCollector) queryMetrics(ctx context.Context) (map[string]int, error) {
- rows, err := c.db.QueryContext(ctx, `
+func CountUnavailableRepositories(ctx context.Context, db glsql.Querier, virtualStorages []string) (map[string]int, error) {
+ rows, err := db.QueryContext(ctx, `
SELECT virtual_storage, COUNT(*)
FROM repositories
WHERE NOT EXISTS (
@@ -87,7 +87,7 @@ WHERE NOT EXISTS (
WHERE valid_primaries.repository_id = repositories.repository_id
) AND repositories.virtual_storage = ANY($1)
GROUP BY virtual_storage
- `, pq.StringArray(c.virtualStorages))
+ `, pq.StringArray(virtualStorages))
if err != nil {
return nil, fmt.Errorf("query: %w", err)
}