From f7d0bd088a94a0da5c49f5fa64b7f03f4c112c31 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Mon, 10 Jan 2022 06:47:57 -0800 Subject: Optimize link repository ID migration The previous migration was slow at times because the update would cause PostgreSQL to do a merge join and then filter out rows matching `repository_id IS NULL`. As more rows migrated gained a `repository_id`, this would increase the query time significantly for each batch. The batching was added to deal with limiting the payload size of a trigger update. We can make this migration go faster by disabling the triggers in the transactions, rollback to 2bbec66c, and re-enable the trigger. Relates to https://gitlab.com/gitlab-org/gitaly/-/issues/3973 Changelog: fixed --- .../20210906145021_link_repository_id.go | 38 ++++++---------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/internal/praefect/datastore/migrations/20210906145021_link_repository_id.go b/internal/praefect/datastore/migrations/20210906145021_link_repository_id.go index 8ee84d968..39060d230 100644 --- a/internal/praefect/datastore/migrations/20210906145021_link_repository_id.go +++ b/internal/praefect/datastore/migrations/20210906145021_link_repository_id.go @@ -6,40 +6,22 @@ func init() { m := &migrate.Migration{ Id: "20210906145021_link_repository_id", Up: []string{ - `-- +migrate StatementBegin -DO $BODY$ - DECLARE - count_val integer DEFAULT 0; - BEGIN - LOOP - WITH updated_rows AS ( - UPDATE storage_repositories - SET repository_id = sub.repository_id - FROM ( - SELECT storage_repositories.virtual_storage, storage_repositories.storage, storage_repositories.relative_path, repositories.repository_id - FROM storage_repositories JOIN repositories USING (virtual_storage, relative_path) - WHERE storage_repositories.repository_id IS NULL - LIMIT 14 - ) AS sub - WHERE storage_repositories.virtual_storage = sub.virtual_storage - AND storage_repositories.storage = sub.storage - AND storage_repositories.relative_path = sub.relative_path - RETURNING storage_repositories.repository_id - ) - SELECT COUNT(*) INTO count_val FROM updated_rows; - EXIT WHEN count_val = 0; - END LOOP; - END -$BODY$ -LANGUAGE plpgsql --- +migrate StatementEnd`, + "ALTER TABLE storage_repositories DISABLE TRIGGER notify_on_update", + ` +UPDATE storage_repositories +SET repository_id = repositories.repository_id +FROM repositories +WHERE storage_repositories.virtual_storage = repositories.virtual_storage +AND storage_repositories.relative_path = repositories.relative_path + `, ` UPDATE repository_assignments SET repository_id = repositories.repository_id FROM repositories WHERE repository_assignments.virtual_storage = repositories.virtual_storage AND repository_assignments.relative_path = repositories.relative_path - `, + `, + "ALTER TABLE storage_repositories ENABLE TRIGGER notify_on_update", }, Down: []string{}, } -- cgit v1.2.3