diff options
author | John Cai <jcai@gitlab.com> | 2022-05-10 21:09:45 +0300 |
---|---|---|
committer | John Cai <jcai@gitlab.com> | 2022-05-10 21:09:45 +0300 |
commit | 60a7383d965aa6a8e69aa2e33a84792cde486cd3 (patch) | |
tree | e689856aba829d2fc0e2562b651f16d25ac4330b | |
parent | 4729c4575139660a3aa945bd6df3c66996e26e0f (diff) | |
parent | 889d25f3ac44ed491d29d25c42ba0975f8cc071e (diff) |
Merge branch 'smh-disable-verifier-deletion' into 'master'
Disable deletions by default in background verifier
Closes #4211
See merge request gitlab-org/gitaly!4527
-rw-r--r-- | cmd/praefect/main.go | 1 | ||||
-rw-r--r-- | internal/praefect/config/config.go | 5 | ||||
-rw-r--r-- | internal/praefect/config/config_test.go | 1 | ||||
-rw-r--r-- | internal/praefect/config/testdata/config.toml | 1 | ||||
-rw-r--r-- | internal/praefect/verifier.go | 19 | ||||
-rw-r--r-- | internal/praefect/verifier_test.go | 75 |
6 files changed, 88 insertions, 14 deletions
diff --git a/cmd/praefect/main.go b/cmd/praefect/main.go index ab28bdea0..e77fe71c3 100644 --- a/cmd/praefect/main.go +++ b/cmd/praefect/main.go @@ -358,6 +358,7 @@ func run( nodeSet.Connections(), hm, conf.BackgroundVerification.VerificationInterval, + conf.BackgroundVerification.DeleteInvalidRecords, ) promreg.MustRegister(verifier) diff --git a/internal/praefect/config/config.go b/internal/praefect/config/config.go index 7c9a2791c..9b5263e61 100644 --- a/internal/praefect/config/config.go +++ b/internal/praefect/config/config.go @@ -83,11 +83,14 @@ type BackgroundVerification struct { // VerificationInterval determines the duration after a replica due for reverification. // The feature is disabled if verification interval is 0 or below. VerificationInterval time.Duration `toml:"verification_interval,omitempty"` + // DeleteInvalidRecords controls whether the background verifier will actually delete the metadata + // records that point to non-existent replicas. + DeleteInvalidRecords bool `toml:"delete_invalid_records"` } // DefaultBackgroundVerificationConfig returns the default background verification configuration. func DefaultBackgroundVerificationConfig() BackgroundVerification { - return BackgroundVerification{} + return BackgroundVerification{VerificationInterval: 7 * 24 * time.Hour} } // Reconciliation contains reconciliation specific configuration options. diff --git a/internal/praefect/config/config_test.go b/internal/praefect/config/config_test.go index aa0d0b3c5..f98905671 100644 --- a/internal/praefect/config/config_test.go +++ b/internal/praefect/config/config_test.go @@ -337,6 +337,7 @@ func TestConfigParsing(t *testing.T) { }, BackgroundVerification: BackgroundVerification{ VerificationInterval: 24 * time.Hour, + DeleteInvalidRecords: true, }, }, }, diff --git a/internal/praefect/config/testdata/config.toml b/internal/praefect/config/testdata/config.toml index 3f2a370e2..f7d591eee 100644 --- a/internal/praefect/config/testdata/config.toml +++ b/internal/praefect/config/testdata/config.toml @@ -8,6 +8,7 @@ graceful_stop_timeout = "30s" [background_verification] verification_interval = "24h" +delete_invalid_records = true [replication] batch_size = 1 diff --git a/internal/praefect/verifier.go b/internal/praefect/verifier.go index cf23ae7f4..f1e789219 100644 --- a/internal/praefect/verifier.go +++ b/internal/praefect/verifier.go @@ -28,6 +28,12 @@ type MetadataVerifier struct { leaseDuration time.Duration healthChecker HealthChecker verificationInterval time.Duration + // If performDeletions is set, the worker deletes invalid metadata records. If it is not + // set, the worker marks the replica as successfully verified in the database but produces + // logs and metrics for the invalid replica. Marking the verification successful in the database + // allows the worker to proceed. The invalid replicas will be found again after the configured + // verificationInterval has passed. + performDeletions bool dequeuedJobsTotal *prometheus.CounterVec completedJobsTotal *prometheus.CounterVec @@ -47,6 +53,7 @@ func NewMetadataVerifier( conns Connections, healthChecker HealthChecker, verificationInterval time.Duration, + performDeletions bool, ) *MetadataVerifier { v := &MetadataVerifier{ log: log, @@ -56,6 +63,7 @@ func NewMetadataVerifier( leaseDuration: 30 * time.Second, healthChecker: healthChecker, verificationInterval: verificationInterval, + performDeletions: performDeletions, dequeuedJobsTotal: prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "gitaly_praefect_verification_jobs_dequeued_total", @@ -298,7 +306,10 @@ func (v *MetadataVerifier) updateMetadata(ctx context.Context, results []verific } if len(logRecords) > 0 { - v.log.WithField("replicas", logRecords).Info("removing metadata records of non-existent replicas") + v.log.WithFields(logrus.Fields{ + "perform_deletions": v.performDeletions, + "replicas": logRecords, + }).Info("removing metadata records of non-existent replicas") } _, err := v.db.ExecContext(ctx, ` @@ -331,8 +342,10 @@ DELETE FROM storage_repositories USING results WHERE storage_repositories.repository_id = results.repository_id AND storage_repositories.storage = results.storage -AND successfully_verified AND NOT exists - `, repositoryIDs, storages, successfullyVerifieds, exists) +AND successfully_verified +AND NOT exists +AND $5 + `, repositoryIDs, storages, successfullyVerifieds, exists, v.performDeletions) if err != nil { return fmt.Errorf("query: %w", err) } diff --git a/internal/praefect/verifier_test.go b/internal/praefect/verifier_test.go index 9c2ffda9b..674089166 100644 --- a/internal/praefect/verifier_test.go +++ b/internal/praefect/verifier_test.go @@ -79,14 +79,15 @@ func TestVerifier(t *testing.T) { } for _, tc := range []struct { - desc string - erroringGitalys map[string]bool - replicas replicas - healthyStorages StaticHealthChecker - batchSize int - steps []step - dequeuedJobsTotal map[string]map[string]int - completedJobsTotal map[string]map[string]map[string]int + desc string + dontPerformDeletions bool + erroringGitalys map[string]bool + replicas replicas + healthyStorages StaticHealthChecker + batchSize int + steps []step + dequeuedJobsTotal map[string]map[string]int + completedJobsTotal map[string]map[string]map[string]int }{ { desc: "all replicas exist", @@ -279,6 +280,60 @@ func TestVerifier(t *testing.T) { }, }, { + desc: "metadata is not deleted if deletions are disabled", + dontPerformDeletions: true, + batchSize: 2, + replicas: replicas{ + "virtual-storage": { + "repository-1": { + gitaly1: {exists: true, lastVerified: neverVerified}, + gitaly2: {exists: false, lastVerified: neverVerified}, + gitaly3: {exists: false, lastVerified: pendingVerification}, + }, + }, + }, + steps: []step{ + { + expectedRemovals: logRecord{ + "virtual-storage": { + "repository-1": {gitaly2}, + }, + }, + expectedReplicas: map[string]map[string][]string{ + "virtual-storage": { + "repository-1": {gitaly1, gitaly2, gitaly3}, + }, + }, + }, + { + expectedRemovals: logRecord{ + "virtual-storage": { + "repository-1": {gitaly3}, + }, + }, + expectedReplicas: map[string]map[string][]string{ + "virtual-storage": { + "repository-1": {gitaly1, gitaly2, gitaly3}, + }, + }, + }, + }, + dequeuedJobsTotal: map[string]map[string]int{ + "virtual-storage": { + gitaly1: 1, + gitaly2: 1, + gitaly3: 1, + }, + }, + completedJobsTotal: map[string]map[string]map[string]int{ + "virtual-storage": { + gitaly1: {"valid": 1}, + gitaly2: {"invalid": 1}, + gitaly3: {"invalid": 1}, + }, + }, + }, + { desc: "verification time is updated when repository exists", replicas: replicas{ "virtual-storage": { @@ -581,7 +636,7 @@ func TestVerifier(t *testing.T) { healthyStorages = tc.healthyStorages } - verifier := NewMetadataVerifier(logger, db, conns, healthyStorages, 24*7*time.Hour) + verifier := NewMetadataVerifier(logger, db, conns, healthyStorages, 24*7*time.Hour, !tc.dontPerformDeletions) if tc.batchSize > 0 { verifier.batchSize = tc.batchSize } @@ -744,7 +799,7 @@ func TestVerifier_runExpiredLeaseReleaser(t *testing.T) { defer tx.Rollback(t) logger, hook := test.NewNullLogger() - verifier := NewMetadataVerifier(logrus.NewEntry(logger), tx, nil, nil, 0) + verifier := NewMetadataVerifier(logrus.NewEntry(logger), tx, nil, nil, 0, true) // set batch size lower than the number of locked leases to ensure the batching works verifier.batchSize = 2 |