Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavlo Strokov <pstrokov@gitlab.com>2021-07-21 14:34:57 +0300
committerPavlo Strokov <pstrokov@gitlab.com>2021-07-21 14:34:57 +0300
commit996a4adda765e8ced18c72eca0ebd27848afa3c9 (patch)
tree8174c9b80709fb103c4ca9e6d0ea84f280e4bbab
parentbcb19d3596a4df74ff1fb3a4a3822c6ef9470d34 (diff)
parent943fdd846af8ccb755b104dceb65bd8926f960b4 (diff)
Merge branch 'ps-remove-orphan-repos' into 'master'
Remove orphan repository replicas Closes #3480 See merge request gitlab-org/gitaly!3674
-rw-r--r--internal/praefect/reconciler/reconciler.go27
-rw-r--r--internal/praefect/reconciler/reconciler_test.go109
2 files changed, 130 insertions, 6 deletions
diff --git a/internal/praefect/reconciler/reconciler.go b/internal/praefect/reconciler/reconciler.go
index 6b2a7a594..308a7aff1 100644
--- a/internal/praefect/reconciler/reconciler.go
+++ b/internal/praefect/reconciler/reconciler.go
@@ -120,7 +120,7 @@ func (r *Reconciler) reconcile(ctx context.Context) error {
for virtualStorage, healthyStorages := range r.hc.HealthyNodes() {
if len(healthyStorages) < 2 {
- // minimum two healthy storages within a virtual stoage needed for valid
+ // minimum two healthy storages within a virtual storage needed for valid
// replication source and target
r.log.WithField("virtual_storage", virtualStorage).Info("reconciliation skipped for virtual storage due to not having enough healthy storages")
continue
@@ -190,6 +190,31 @@ delete_jobs AS (
AND job->>'relative_path' = relative_path
AND job->>'change' = 'delete_replica'
)
+ UNION ALL
+ -- Schedules repository removal operations for the storages where repository exists, but shouldn't.
+ -- That can happen when repository removal replication job failed on the secondaries and
+ -- repository remains on storage unused (because it doesn't exist in the 'repositories' table anymore).
+ SELECT * FROM (
+ SELECT DISTINCT ON (virtual_storage, relative_path)
+ virtual_storage,
+ relative_path,
+ storage
+ FROM storage_repositories
+ JOIN healthy_storages USING (virtual_storage, storage)
+ LEFT JOIN repositories USING (virtual_storage, relative_path)
+ WHERE repositories.virtual_storage IS NULL AND
+ NOT EXISTS (
+ -- Ensure there are no scheduled 'delete_replica' type jobs for the repository.
+ SELECT FROM replication_queue
+ WHERE state NOT IN ('completed', 'cancelled', 'dead')
+ AND job->>'virtual_storage' = virtual_storage
+ AND job->>'relative_path' = relative_path
+ AND job->>'change' = 'delete_replica'
+ )
+ -- this sorting is not required, but it simplifies testing and doesn't influence
+ -- performance as a noticeable degradation
+ ORDER BY virtual_storage, relative_path, storage
+ ) orphans
),
update_jobs AS (
diff --git a/internal/praefect/reconciler/reconciler_test.go b/internal/praefect/reconciler/reconciler_test.go
index 7f034b60b..9ba2d1c79 100644
--- a/internal/praefect/reconciler/reconciler_test.go
+++ b/internal/praefect/reconciler/reconciler_test.go
@@ -96,11 +96,12 @@ func TestReconciler(t *testing.T) {
}
for _, tc := range []struct {
- desc string
- healthyStorages storages
- repositories repositories
- existingJobs existingJobs
- reconciliationJobs jobs
+ desc string
+ healthyStorages storages
+ repositories repositories
+ existingJobs existingJobs
+ deletedRepositories map[string][]string
+ reconciliationJobs jobs
}{
{
desc: "no repositories",
@@ -984,6 +985,94 @@ func TestReconciler(t *testing.T) {
},
},
},
+ {
+ desc: "orphan repositories replicas should be scheduled for deletion",
+ healthyStorages: configuredStoragesWithout("storage-3"),
+ repositories: repositories{
+ "virtual-storage-1": {
+ "relative-path-1": {
+ "storage-1": {generation: 1},
+ "storage-2": {generation: 1},
+ },
+ "relative-path-2": {
+ "storage-1": {generation: 1},
+ "storage-2": {generation: 1},
+ },
+ "relative-path-3": {
+ "storage-1": {generation: 1},
+ "storage-2": {generation: 1},
+ },
+ },
+ },
+ deletedRepositories: map[string][]string{"virtual-storage-1": {"relative-path-2", "relative-path-3"}},
+ reconciliationJobs: jobs{
+ {
+ Change: datastore.DeleteReplica,
+ VirtualStorage: "virtual-storage-1",
+ RelativePath: "relative-path-2",
+ TargetNodeStorage: "storage-1",
+ },
+ {
+ Change: datastore.DeleteReplica,
+ VirtualStorage: "virtual-storage-1",
+ RelativePath: "relative-path-3",
+ TargetNodeStorage: "storage-1",
+ },
+ },
+ },
+ {
+ desc: "orphan repositories replicas should be scheduled for deletion (only for healthy nodes)",
+ healthyStorages: configuredStoragesWithout("storage-1"),
+ repositories: repositories{
+ "virtual-storage-1": {
+ "relative-path-1": {
+ "storage-1": {generation: 1},
+ "storage-2": {generation: 1},
+ "storage-3": {generation: 1},
+ },
+ "relative-path-2": {
+ "storage-1": {generation: 1},
+ "storage-2": {generation: 1},
+ "storage-3": {generation: 1},
+ },
+ },
+ },
+ deletedRepositories: map[string][]string{"virtual-storage-1": {"relative-path-2"}},
+ reconciliationJobs: jobs{
+ {
+ Change: datastore.DeleteReplica,
+ VirtualStorage: "virtual-storage-1",
+ RelativePath: "relative-path-2",
+ TargetNodeStorage: "storage-2",
+ },
+ },
+ },
+ {
+ desc: "orphan repositories replicas should not be scheduled if replication event already exists",
+ healthyStorages: configuredStorages,
+ repositories: repositories{
+ "virtual-storage-1": {
+ "relative-path-1": {
+ "storage-1": {generation: 1},
+ "storage-2": {generation: 1},
+ "storage-3": {generation: 1},
+ },
+ },
+ },
+ deletedRepositories: map[string][]string{"virtual-storage-1": {"relative-path-1"}},
+ existingJobs: []datastore.ReplicationEvent{
+ {
+ State: datastore.JobStateReady,
+ Job: datastore.ReplicationJob{
+ Change: datastore.DeleteReplica,
+ VirtualStorage: "virtual-storage-1",
+ TargetNodeStorage: "storage-1",
+ RelativePath: "relative-path-1",
+ },
+ },
+ },
+ reconciliationJobs: nil,
+ },
} {
t.Run(tc.desc, func(t *testing.T) {
ctx, cancel := testhelper.Context()
@@ -1055,6 +1144,16 @@ func TestReconciler(t *testing.T) {
require.True(t, resetted)
}
+ for vs, repos := range tc.deletedRepositories {
+ for _, repo := range repos {
+ _, err := db.Exec(
+ "DELETE FROM repositories WHERE virtual_storage = $1 AND relative_path = $2",
+ vs, repo,
+ )
+ require.NoError(t, err)
+ }
+ }
+
// run reconcile in two concurrent transactions to ensure everything works
// as expected with multiple Praefect's reconciling at the same time
firstTx, err := db.Begin()