Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Cai <jcai@gitlab.com>2022-05-10 21:09:45 +0300
committerJohn Cai <jcai@gitlab.com>2022-05-10 21:09:45 +0300
commit60a7383d965aa6a8e69aa2e33a84792cde486cd3 (patch)
treee689856aba829d2fc0e2562b651f16d25ac4330b
parent4729c4575139660a3aa945bd6df3c66996e26e0f (diff)
parent889d25f3ac44ed491d29d25c42ba0975f8cc071e (diff)
Merge branch 'smh-disable-verifier-deletion' into 'master'
Disable deletions by default in background verifier Closes #4211 See merge request gitlab-org/gitaly!4527
-rw-r--r--cmd/praefect/main.go1
-rw-r--r--internal/praefect/config/config.go5
-rw-r--r--internal/praefect/config/config_test.go1
-rw-r--r--internal/praefect/config/testdata/config.toml1
-rw-r--r--internal/praefect/verifier.go19
-rw-r--r--internal/praefect/verifier_test.go75
6 files changed, 88 insertions, 14 deletions
diff --git a/cmd/praefect/main.go b/cmd/praefect/main.go
index ab28bdea0..e77fe71c3 100644
--- a/cmd/praefect/main.go
+++ b/cmd/praefect/main.go
@@ -358,6 +358,7 @@ func run(
nodeSet.Connections(),
hm,
conf.BackgroundVerification.VerificationInterval,
+ conf.BackgroundVerification.DeleteInvalidRecords,
)
promreg.MustRegister(verifier)
diff --git a/internal/praefect/config/config.go b/internal/praefect/config/config.go
index 7c9a2791c..9b5263e61 100644
--- a/internal/praefect/config/config.go
+++ b/internal/praefect/config/config.go
@@ -83,11 +83,14 @@ type BackgroundVerification struct {
// VerificationInterval determines the duration after a replica due for reverification.
// The feature is disabled if verification interval is 0 or below.
VerificationInterval time.Duration `toml:"verification_interval,omitempty"`
+ // DeleteInvalidRecords controls whether the background verifier will actually delete the metadata
+ // records that point to non-existent replicas.
+ DeleteInvalidRecords bool `toml:"delete_invalid_records"`
}
// DefaultBackgroundVerificationConfig returns the default background verification configuration.
func DefaultBackgroundVerificationConfig() BackgroundVerification {
- return BackgroundVerification{}
+ return BackgroundVerification{VerificationInterval: 7 * 24 * time.Hour}
}
// Reconciliation contains reconciliation specific configuration options.
diff --git a/internal/praefect/config/config_test.go b/internal/praefect/config/config_test.go
index aa0d0b3c5..f98905671 100644
--- a/internal/praefect/config/config_test.go
+++ b/internal/praefect/config/config_test.go
@@ -337,6 +337,7 @@ func TestConfigParsing(t *testing.T) {
},
BackgroundVerification: BackgroundVerification{
VerificationInterval: 24 * time.Hour,
+ DeleteInvalidRecords: true,
},
},
},
diff --git a/internal/praefect/config/testdata/config.toml b/internal/praefect/config/testdata/config.toml
index 3f2a370e2..f7d591eee 100644
--- a/internal/praefect/config/testdata/config.toml
+++ b/internal/praefect/config/testdata/config.toml
@@ -8,6 +8,7 @@ graceful_stop_timeout = "30s"
[background_verification]
verification_interval = "24h"
+delete_invalid_records = true
[replication]
batch_size = 1
diff --git a/internal/praefect/verifier.go b/internal/praefect/verifier.go
index cf23ae7f4..f1e789219 100644
--- a/internal/praefect/verifier.go
+++ b/internal/praefect/verifier.go
@@ -28,6 +28,12 @@ type MetadataVerifier struct {
leaseDuration time.Duration
healthChecker HealthChecker
verificationInterval time.Duration
+ // If performDeletions is set, the worker deletes invalid metadata records. If it is not
+ // set, the worker marks the replica as successfully verified in the database but produces
+ // logs and metrics for the invalid replica. Marking the verification successful in the database
+ // allows the worker to proceed. The invalid replicas will be found again after the configured
+ // verificationInterval has passed.
+ performDeletions bool
dequeuedJobsTotal *prometheus.CounterVec
completedJobsTotal *prometheus.CounterVec
@@ -47,6 +53,7 @@ func NewMetadataVerifier(
conns Connections,
healthChecker HealthChecker,
verificationInterval time.Duration,
+ performDeletions bool,
) *MetadataVerifier {
v := &MetadataVerifier{
log: log,
@@ -56,6 +63,7 @@ func NewMetadataVerifier(
leaseDuration: 30 * time.Second,
healthChecker: healthChecker,
verificationInterval: verificationInterval,
+ performDeletions: performDeletions,
dequeuedJobsTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "gitaly_praefect_verification_jobs_dequeued_total",
@@ -298,7 +306,10 @@ func (v *MetadataVerifier) updateMetadata(ctx context.Context, results []verific
}
if len(logRecords) > 0 {
- v.log.WithField("replicas", logRecords).Info("removing metadata records of non-existent replicas")
+ v.log.WithFields(logrus.Fields{
+ "perform_deletions": v.performDeletions,
+ "replicas": logRecords,
+ }).Info("removing metadata records of non-existent replicas")
}
_, err := v.db.ExecContext(ctx, `
@@ -331,8 +342,10 @@ DELETE FROM storage_repositories
USING results
WHERE storage_repositories.repository_id = results.repository_id
AND storage_repositories.storage = results.storage
-AND successfully_verified AND NOT exists
- `, repositoryIDs, storages, successfullyVerifieds, exists)
+AND successfully_verified
+AND NOT exists
+AND $5
+ `, repositoryIDs, storages, successfullyVerifieds, exists, v.performDeletions)
if err != nil {
return fmt.Errorf("query: %w", err)
}
diff --git a/internal/praefect/verifier_test.go b/internal/praefect/verifier_test.go
index 9c2ffda9b..674089166 100644
--- a/internal/praefect/verifier_test.go
+++ b/internal/praefect/verifier_test.go
@@ -79,14 +79,15 @@ func TestVerifier(t *testing.T) {
}
for _, tc := range []struct {
- desc string
- erroringGitalys map[string]bool
- replicas replicas
- healthyStorages StaticHealthChecker
- batchSize int
- steps []step
- dequeuedJobsTotal map[string]map[string]int
- completedJobsTotal map[string]map[string]map[string]int
+ desc string
+ dontPerformDeletions bool
+ erroringGitalys map[string]bool
+ replicas replicas
+ healthyStorages StaticHealthChecker
+ batchSize int
+ steps []step
+ dequeuedJobsTotal map[string]map[string]int
+ completedJobsTotal map[string]map[string]map[string]int
}{
{
desc: "all replicas exist",
@@ -279,6 +280,60 @@ func TestVerifier(t *testing.T) {
},
},
{
+ desc: "metadata is not deleted if deletions are disabled",
+ dontPerformDeletions: true,
+ batchSize: 2,
+ replicas: replicas{
+ "virtual-storage": {
+ "repository-1": {
+ gitaly1: {exists: true, lastVerified: neverVerified},
+ gitaly2: {exists: false, lastVerified: neverVerified},
+ gitaly3: {exists: false, lastVerified: pendingVerification},
+ },
+ },
+ },
+ steps: []step{
+ {
+ expectedRemovals: logRecord{
+ "virtual-storage": {
+ "repository-1": {gitaly2},
+ },
+ },
+ expectedReplicas: map[string]map[string][]string{
+ "virtual-storage": {
+ "repository-1": {gitaly1, gitaly2, gitaly3},
+ },
+ },
+ },
+ {
+ expectedRemovals: logRecord{
+ "virtual-storage": {
+ "repository-1": {gitaly3},
+ },
+ },
+ expectedReplicas: map[string]map[string][]string{
+ "virtual-storage": {
+ "repository-1": {gitaly1, gitaly2, gitaly3},
+ },
+ },
+ },
+ },
+ dequeuedJobsTotal: map[string]map[string]int{
+ "virtual-storage": {
+ gitaly1: 1,
+ gitaly2: 1,
+ gitaly3: 1,
+ },
+ },
+ completedJobsTotal: map[string]map[string]map[string]int{
+ "virtual-storage": {
+ gitaly1: {"valid": 1},
+ gitaly2: {"invalid": 1},
+ gitaly3: {"invalid": 1},
+ },
+ },
+ },
+ {
desc: "verification time is updated when repository exists",
replicas: replicas{
"virtual-storage": {
@@ -581,7 +636,7 @@ func TestVerifier(t *testing.T) {
healthyStorages = tc.healthyStorages
}
- verifier := NewMetadataVerifier(logger, db, conns, healthyStorages, 24*7*time.Hour)
+ verifier := NewMetadataVerifier(logger, db, conns, healthyStorages, 24*7*time.Hour, !tc.dontPerformDeletions)
if tc.batchSize > 0 {
verifier.batchSize = tc.batchSize
}
@@ -744,7 +799,7 @@ func TestVerifier_runExpiredLeaseReleaser(t *testing.T) {
defer tx.Rollback(t)
logger, hook := test.NewNullLogger()
- verifier := NewMetadataVerifier(logrus.NewEntry(logger), tx, nil, nil, 0)
+ verifier := NewMetadataVerifier(logrus.NewEntry(logger), tx, nil, nil, 0, true)
// set batch size lower than the number of locked leases to ensure the batching works
verifier.batchSize = 2