diff options
author | Pavlo Strokov <pstrokov@gitlab.com> | 2021-07-21 14:34:57 +0300 |
---|---|---|
committer | Pavlo Strokov <pstrokov@gitlab.com> | 2021-07-21 14:34:57 +0300 |
commit | 996a4adda765e8ced18c72eca0ebd27848afa3c9 (patch) | |
tree | 8174c9b80709fb103c4ca9e6d0ea84f280e4bbab | |
parent | bcb19d3596a4df74ff1fb3a4a3822c6ef9470d34 (diff) | |
parent | 943fdd846af8ccb755b104dceb65bd8926f960b4 (diff) |
Merge branch 'ps-remove-orphan-repos' into 'master'
Remove orphan repository replicas
Closes #3480
See merge request gitlab-org/gitaly!3674
-rw-r--r-- | internal/praefect/reconciler/reconciler.go | 27 | ||||
-rw-r--r-- | internal/praefect/reconciler/reconciler_test.go | 109 |
2 files changed, 130 insertions, 6 deletions
diff --git a/internal/praefect/reconciler/reconciler.go b/internal/praefect/reconciler/reconciler.go index 6b2a7a594..308a7aff1 100644 --- a/internal/praefect/reconciler/reconciler.go +++ b/internal/praefect/reconciler/reconciler.go @@ -120,7 +120,7 @@ func (r *Reconciler) reconcile(ctx context.Context) error { for virtualStorage, healthyStorages := range r.hc.HealthyNodes() { if len(healthyStorages) < 2 { - // minimum two healthy storages within a virtual stoage needed for valid + // minimum two healthy storages within a virtual storage needed for valid // replication source and target r.log.WithField("virtual_storage", virtualStorage).Info("reconciliation skipped for virtual storage due to not having enough healthy storages") continue @@ -190,6 +190,31 @@ delete_jobs AS ( AND job->>'relative_path' = relative_path AND job->>'change' = 'delete_replica' ) + UNION ALL + -- Schedules repository removal operations for the storages where repository exists, but shouldn't. + -- That can happen when repository removal replication job failed on the secondaries and + -- repository remains on storage unused (because it doesn't exist in the 'repositories' table anymore). + SELECT * FROM ( + SELECT DISTINCT ON (virtual_storage, relative_path) + virtual_storage, + relative_path, + storage + FROM storage_repositories + JOIN healthy_storages USING (virtual_storage, storage) + LEFT JOIN repositories USING (virtual_storage, relative_path) + WHERE repositories.virtual_storage IS NULL AND + NOT EXISTS ( + -- Ensure there are no scheduled 'delete_replica' type jobs for the repository. + SELECT FROM replication_queue + WHERE state NOT IN ('completed', 'cancelled', 'dead') + AND job->>'virtual_storage' = virtual_storage + AND job->>'relative_path' = relative_path + AND job->>'change' = 'delete_replica' + ) + -- this sorting is not required, but it simplifies testing and doesn't influence + -- performance as a noticeable degradation + ORDER BY virtual_storage, relative_path, storage + ) orphans ), update_jobs AS ( diff --git a/internal/praefect/reconciler/reconciler_test.go b/internal/praefect/reconciler/reconciler_test.go index 7f034b60b..9ba2d1c79 100644 --- a/internal/praefect/reconciler/reconciler_test.go +++ b/internal/praefect/reconciler/reconciler_test.go @@ -96,11 +96,12 @@ func TestReconciler(t *testing.T) { } for _, tc := range []struct { - desc string - healthyStorages storages - repositories repositories - existingJobs existingJobs - reconciliationJobs jobs + desc string + healthyStorages storages + repositories repositories + existingJobs existingJobs + deletedRepositories map[string][]string + reconciliationJobs jobs }{ { desc: "no repositories", @@ -984,6 +985,94 @@ func TestReconciler(t *testing.T) { }, }, }, + { + desc: "orphan repositories replicas should be scheduled for deletion", + healthyStorages: configuredStoragesWithout("storage-3"), + repositories: repositories{ + "virtual-storage-1": { + "relative-path-1": { + "storage-1": {generation: 1}, + "storage-2": {generation: 1}, + }, + "relative-path-2": { + "storage-1": {generation: 1}, + "storage-2": {generation: 1}, + }, + "relative-path-3": { + "storage-1": {generation: 1}, + "storage-2": {generation: 1}, + }, + }, + }, + deletedRepositories: map[string][]string{"virtual-storage-1": {"relative-path-2", "relative-path-3"}}, + reconciliationJobs: jobs{ + { + Change: datastore.DeleteReplica, + VirtualStorage: "virtual-storage-1", + RelativePath: "relative-path-2", + TargetNodeStorage: "storage-1", + }, + { + Change: datastore.DeleteReplica, + VirtualStorage: "virtual-storage-1", + RelativePath: "relative-path-3", + TargetNodeStorage: "storage-1", + }, + }, + }, + { + desc: "orphan repositories replicas should be scheduled for deletion (only for healthy nodes)", + healthyStorages: configuredStoragesWithout("storage-1"), + repositories: repositories{ + "virtual-storage-1": { + "relative-path-1": { + "storage-1": {generation: 1}, + "storage-2": {generation: 1}, + "storage-3": {generation: 1}, + }, + "relative-path-2": { + "storage-1": {generation: 1}, + "storage-2": {generation: 1}, + "storage-3": {generation: 1}, + }, + }, + }, + deletedRepositories: map[string][]string{"virtual-storage-1": {"relative-path-2"}}, + reconciliationJobs: jobs{ + { + Change: datastore.DeleteReplica, + VirtualStorage: "virtual-storage-1", + RelativePath: "relative-path-2", + TargetNodeStorage: "storage-2", + }, + }, + }, + { + desc: "orphan repositories replicas should not be scheduled if replication event already exists", + healthyStorages: configuredStorages, + repositories: repositories{ + "virtual-storage-1": { + "relative-path-1": { + "storage-1": {generation: 1}, + "storage-2": {generation: 1}, + "storage-3": {generation: 1}, + }, + }, + }, + deletedRepositories: map[string][]string{"virtual-storage-1": {"relative-path-1"}}, + existingJobs: []datastore.ReplicationEvent{ + { + State: datastore.JobStateReady, + Job: datastore.ReplicationJob{ + Change: datastore.DeleteReplica, + VirtualStorage: "virtual-storage-1", + TargetNodeStorage: "storage-1", + RelativePath: "relative-path-1", + }, + }, + }, + reconciliationJobs: nil, + }, } { t.Run(tc.desc, func(t *testing.T) { ctx, cancel := testhelper.Context() @@ -1055,6 +1144,16 @@ func TestReconciler(t *testing.T) { require.True(t, resetted) } + for vs, repos := range tc.deletedRepositories { + for _, repo := range repos { + _, err := db.Exec( + "DELETE FROM repositories WHERE virtual_storage = $1 AND relative_path = $2", + vs, repo, + ) + require.NoError(t, err) + } + } + // run reconcile in two concurrent transactions to ensure everything works // as expected with multiple Praefect's reconciling at the same time firstTx, err := db.Begin() |