From bedcb7f43ddb8d6e2cce9424e3d988fe1d5a7cc8 Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Thu, 7 Sep 2017 10:58:50 -0700 Subject: Delete conflicting redirects in background --- ...907170235_delete_conflicting_redirect_routes.rb | 66 ++++++++++++++++++++++ .../delete_conflicting_redirect_routes.rb | 52 +++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb create mode 100644 lib/gitlab/background_migration/delete_conflicting_redirect_routes.rb diff --git a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb new file mode 100644 index 00000000000..4111c884c28 --- /dev/null +++ b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb @@ -0,0 +1,66 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class DeleteConflictingRedirectRoutes < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 1000 # Number of rows to process per job + JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time + MIGRATION = 'DeleteConflictingRedirectRoutes'.freeze + + disable_ddl_transaction! + + class Route < ActiveRecord::Base + include EachBatch + + self.table_name = 'routes' + end + + def up + jobs = [] + + say opening_message + + queue_background_migration_jobs(Route, MIGRATION) + end + + def down + # nothing + end + + def opening_message + <<~MSG + Clean up redirect routes that conflict with regular routes. + See initial bug fix: + https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/13357 + MSG + end + + def queue_background_migration_jobs(model_class, job_class_name, batch_size = BATCH_SIZE) + jobs = [] + + model_class.each_batch(of: batch_size) do |relation| + start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + + # Note: This conditional will only be true if JOB_BUFFER_SIZE * batch_size < (total number of rows) + if jobs.length >= JOB_BUFFER_SIZE + # We push multiple jobs at a time to reduce the time spent in + # Sidekiq/Redis operations. We're using this buffer based approach so we + # don't need to run additional queries for every range. + bulk_queue_jobs(jobs) + jobs.clear + end + + jobs << [job_class_name, [start_id, end_id]] + end + + bulk_queue_jobs(jobs) unless jobs.empty? + end + + def bulk_queue_jobs(jobs) + say "Queuing #{jobs.size} BackgroundMigrationWorker jobs..." + + BackgroundMigrationWorker.perform_bulk(jobs) + end +end diff --git a/lib/gitlab/background_migration/delete_conflicting_redirect_routes.rb b/lib/gitlab/background_migration/delete_conflicting_redirect_routes.rb new file mode 100644 index 00000000000..c33b6d513da --- /dev/null +++ b/lib/gitlab/background_migration/delete_conflicting_redirect_routes.rb @@ -0,0 +1,52 @@ +module Gitlab + module BackgroundMigration + class DeleteConflictingRedirectRoutes + class Route < ActiveRecord::Base + self.table_name = 'routes' + end + + class RedirectRoute < ActiveRecord::Base + self.table_name = 'redirect_routes' + end + + # start_id - The start ID of the range of events to process + # end_id - The end ID of the range to process. + def perform(start_id, end_id) + return unless migrate? + + conflicts = RedirectRoute.where(routes_match_redirects_clause(start_id, end_id)) + num_rows = conflicts.delete_all + + Rails.logger.info("Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutes [#{start_id}, #{end_id}] - Deleted #{num_rows} redirect routes that were conflicting with routes.") + end + + def migrate? + Route.table_exists? && RedirectRoute.table_exists? + end + + def routes_match_redirects_clause(start_id, end_id) + <<~ROUTES_MATCH_REDIRECTS + EXISTS ( + SELECT 1 FROM routes + WHERE (#{route_paths_match_redirects}) + AND routes.id BETWEEN #{start_id} AND #{end_id} + ) + ROUTES_MATCH_REDIRECTS + end + + def route_paths_match_redirects + if Gitlab::Database.postgresql? + <<~ROUTE_PATHS_MATCH_REDIRECTS + LOWER(redirect_routes.path) = LOWER(routes.path) + OR LOWER(redirect_routes.path) LIKE LOWER(CONCAT(routes.path, '/%')) + ROUTE_PATHS_MATCH_REDIRECTS + else + <<~ROUTE_PATHS_MATCH_REDIRECTS + redirect_routes.path = routes.path + OR redirect_routes.path LIKE CONCAT(routes.path, '/%') + ROUTE_PATHS_MATCH_REDIRECTS + end + end + end + end +end -- cgit v1.2.3 From f1e963bd89e737868a3a49358d714a288738c2e8 Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Thu, 7 Sep 2017 18:50:03 -0700 Subject: Add specs for deleting conflicting redirects --- ...907170235_delete_conflicting_redirect_routes.rb | 2 +- .../delete_conflicting_redirect_routes.rb | 52 ---------------------- .../delete_conflicting_redirect_routes_range.rb | 52 ++++++++++++++++++++++ ...elete_conflicting_redirect_routes_range_spec.rb | 40 +++++++++++++++++ .../delete_conflicting_redirect_routes_spec.rb | 51 +++++++++++++++++++++ 5 files changed, 144 insertions(+), 53 deletions(-) delete mode 100644 lib/gitlab/background_migration/delete_conflicting_redirect_routes.rb create mode 100644 lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb create mode 100644 spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb create mode 100644 spec/migrations/delete_conflicting_redirect_routes_spec.rb diff --git a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb index 4111c884c28..73bb4603eb6 100644 --- a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb +++ b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb @@ -7,7 +7,7 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration DOWNTIME = false BATCH_SIZE = 1000 # Number of rows to process per job JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time - MIGRATION = 'DeleteConflictingRedirectRoutes'.freeze + MIGRATION = 'DeleteConflictingRedirectRoutesRange'.freeze disable_ddl_transaction! diff --git a/lib/gitlab/background_migration/delete_conflicting_redirect_routes.rb b/lib/gitlab/background_migration/delete_conflicting_redirect_routes.rb deleted file mode 100644 index c33b6d513da..00000000000 --- a/lib/gitlab/background_migration/delete_conflicting_redirect_routes.rb +++ /dev/null @@ -1,52 +0,0 @@ -module Gitlab - module BackgroundMigration - class DeleteConflictingRedirectRoutes - class Route < ActiveRecord::Base - self.table_name = 'routes' - end - - class RedirectRoute < ActiveRecord::Base - self.table_name = 'redirect_routes' - end - - # start_id - The start ID of the range of events to process - # end_id - The end ID of the range to process. - def perform(start_id, end_id) - return unless migrate? - - conflicts = RedirectRoute.where(routes_match_redirects_clause(start_id, end_id)) - num_rows = conflicts.delete_all - - Rails.logger.info("Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutes [#{start_id}, #{end_id}] - Deleted #{num_rows} redirect routes that were conflicting with routes.") - end - - def migrate? - Route.table_exists? && RedirectRoute.table_exists? - end - - def routes_match_redirects_clause(start_id, end_id) - <<~ROUTES_MATCH_REDIRECTS - EXISTS ( - SELECT 1 FROM routes - WHERE (#{route_paths_match_redirects}) - AND routes.id BETWEEN #{start_id} AND #{end_id} - ) - ROUTES_MATCH_REDIRECTS - end - - def route_paths_match_redirects - if Gitlab::Database.postgresql? - <<~ROUTE_PATHS_MATCH_REDIRECTS - LOWER(redirect_routes.path) = LOWER(routes.path) - OR LOWER(redirect_routes.path) LIKE LOWER(CONCAT(routes.path, '/%')) - ROUTE_PATHS_MATCH_REDIRECTS - else - <<~ROUTE_PATHS_MATCH_REDIRECTS - redirect_routes.path = routes.path - OR redirect_routes.path LIKE CONCAT(routes.path, '/%') - ROUTE_PATHS_MATCH_REDIRECTS - end - end - end - end -end diff --git a/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb b/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb new file mode 100644 index 00000000000..a45fb6b6d0c --- /dev/null +++ b/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb @@ -0,0 +1,52 @@ +module Gitlab + module BackgroundMigration + class DeleteConflictingRedirectRoutesRange + class Route < ActiveRecord::Base + self.table_name = 'routes' + end + + class RedirectRoute < ActiveRecord::Base + self.table_name = 'redirect_routes' + end + + # start_id - The start ID of the range of events to process + # end_id - The end ID of the range to process. + def perform(start_id, end_id) + return unless migrate? + + conflicts = RedirectRoute.where(routes_match_redirects_clause(start_id, end_id)) + num_rows = conflicts.delete_all + + Rails.logger.info("Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutesRange [#{start_id}, #{end_id}] - Deleted #{num_rows} redirect routes that were conflicting with routes.") + end + + def migrate? + Route.table_exists? && RedirectRoute.table_exists? + end + + def routes_match_redirects_clause(start_id, end_id) + <<~ROUTES_MATCH_REDIRECTS + EXISTS ( + SELECT 1 FROM routes + WHERE (#{route_paths_match_redirects}) + AND routes.id BETWEEN #{start_id} AND #{end_id} + ) + ROUTES_MATCH_REDIRECTS + end + + def route_paths_match_redirects + if Gitlab::Database.postgresql? + <<~ROUTE_PATHS_MATCH_REDIRECTS + LOWER(redirect_routes.path) = LOWER(routes.path) + OR LOWER(redirect_routes.path) LIKE LOWER(CONCAT(routes.path, '/%')) + ROUTE_PATHS_MATCH_REDIRECTS + else + <<~ROUTE_PATHS_MATCH_REDIRECTS + redirect_routes.path = routes.path + OR redirect_routes.path LIKE CONCAT(routes.path, '/%') + ROUTE_PATHS_MATCH_REDIRECTS + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb b/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb new file mode 100644 index 00000000000..5c471cbdeda --- /dev/null +++ b/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb @@ -0,0 +1,40 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutesRange, :migration, schema: 20170907170235 do + let!(:redirect_routes) { table(:redirect_routes) } + let!(:routes) { table(:routes) } + + before do + routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1') + routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2') + routes.create!(id: 3, source_id: 3, source_type: 'Namespace', path: 'foo3') + routes.create!(id: 4, source_id: 4, source_type: 'Namespace', path: 'foo4') + routes.create!(id: 5, source_id: 5, source_type: 'Namespace', path: 'foo5') + + # Valid redirects + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar2') + redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'bar3') + + # Conflicting redirects + redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'foo1') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo2') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo3') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo4') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo5') + end + + it 'deletes the conflicting redirect_routes in the range' do + expect(redirect_routes.count).to eq(8) + + expect do + described_class.new.perform(1, 3) + end.to change { redirect_routes.where("path like 'foo%'").count }.from(5).to(2) + + expect do + described_class.new.perform(4, 5) + end.to change { redirect_routes.where("path like 'foo%'").count }.from(2).to(0) + + expect(redirect_routes.count).to eq(3) + end +end diff --git a/spec/migrations/delete_conflicting_redirect_routes_spec.rb b/spec/migrations/delete_conflicting_redirect_routes_spec.rb new file mode 100644 index 00000000000..5d277028b96 --- /dev/null +++ b/spec/migrations/delete_conflicting_redirect_routes_spec.rb @@ -0,0 +1,51 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20170907170235_delete_conflicting_redirect_routes') + +describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do + let!(:redirect_routes) { table(:redirect_routes) } + let!(:routes) { table(:routes) } + + before do + stub_const("#{described_class.name}::BATCH_SIZE", 2) + stub_const("#{described_class.name}::JOB_BUFFER_SIZE", 2) + + routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1') + routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2') + routes.create!(id: 3, source_id: 3, source_type: 'Namespace', path: 'foo3') + routes.create!(id: 4, source_id: 4, source_type: 'Namespace', path: 'foo4') + routes.create!(id: 5, source_id: 5, source_type: 'Namespace', path: 'foo5') + + # Valid redirects + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar2') + redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'bar3') + + # Conflicting redirects + redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'foo1') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo2') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo3') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo4') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo5') + end + + it 'correctly schedules background migrations' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([described_class::MIGRATION, [1, 2]]) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([described_class::MIGRATION, [3, 4]]) + expect(BackgroundMigrationWorker.jobs[2]['args']).to eq([described_class::MIGRATION, [5, 5]]) + expect(BackgroundMigrationWorker.jobs.size).to eq 3 + end + end + end + + it 'schedules background migrations' do + Sidekiq::Testing.inline! do + expect do + migrate! + end.to change { redirect_routes.count }.from(8).to(3) + end + end +end -- cgit v1.2.3 From dbf924c57dc026747dae00141c1da67fbf856c80 Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Fri, 8 Sep 2017 13:09:52 -0700 Subject: Simplify query Performance does not need to be optimized in this case because this will be run in batches at a controlled rate. --- .../delete_conflicting_redirect_routes_range.rb | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb b/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb index a45fb6b6d0c..b1411be3016 100644 --- a/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb +++ b/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb @@ -28,25 +28,14 @@ module Gitlab <<~ROUTES_MATCH_REDIRECTS EXISTS ( SELECT 1 FROM routes - WHERE (#{route_paths_match_redirects}) + WHERE ( + LOWER(redirect_routes.path) = LOWER(routes.path) + OR LOWER(redirect_routes.path) LIKE LOWER(CONCAT(routes.path, '/%')) + ) AND routes.id BETWEEN #{start_id} AND #{end_id} ) ROUTES_MATCH_REDIRECTS end - - def route_paths_match_redirects - if Gitlab::Database.postgresql? - <<~ROUTE_PATHS_MATCH_REDIRECTS - LOWER(redirect_routes.path) = LOWER(routes.path) - OR LOWER(redirect_routes.path) LIKE LOWER(CONCAT(routes.path, '/%')) - ROUTE_PATHS_MATCH_REDIRECTS - else - <<~ROUTE_PATHS_MATCH_REDIRECTS - redirect_routes.path = routes.path - OR redirect_routes.path LIKE CONCAT(routes.path, '/%') - ROUTE_PATHS_MATCH_REDIRECTS - end - end end end end -- cgit v1.2.3 From ee4f73916f586112e6479d80b3769e174414eb7e Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Fri, 8 Sep 2017 13:10:53 -0700 Subject: Extract helper for queuing background jobs --- ...907170235_delete_conflicting_redirect_routes.rb | 33 +----------- lib/gitlab/database/migration_helpers.rb | 48 +++++++++++++++++ spec/lib/gitlab/database/migration_helpers_spec.rb | 62 ++++++++++++++++++++++ .../delete_conflicting_redirect_routes_spec.rb | 4 +- 4 files changed, 113 insertions(+), 34 deletions(-) diff --git a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb index 73bb4603eb6..021b52b9ed1 100644 --- a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb +++ b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb @@ -5,8 +5,6 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers DOWNTIME = false - BATCH_SIZE = 1000 # Number of rows to process per job - JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time MIGRATION = 'DeleteConflictingRedirectRoutesRange'.freeze disable_ddl_transaction! @@ -18,11 +16,9 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration end def up - jobs = [] - say opening_message - queue_background_migration_jobs(Route, MIGRATION) + queue_background_migration_jobs_by_range(Route, MIGRATION) end def down @@ -36,31 +32,4 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/13357 MSG end - - def queue_background_migration_jobs(model_class, job_class_name, batch_size = BATCH_SIZE) - jobs = [] - - model_class.each_batch(of: batch_size) do |relation| - start_id, end_id = relation.pluck('MIN(id), MAX(id)').first - - # Note: This conditional will only be true if JOB_BUFFER_SIZE * batch_size < (total number of rows) - if jobs.length >= JOB_BUFFER_SIZE - # We push multiple jobs at a time to reduce the time spent in - # Sidekiq/Redis operations. We're using this buffer based approach so we - # don't need to run additional queries for every range. - bulk_queue_jobs(jobs) - jobs.clear - end - - jobs << [job_class_name, [start_id, end_id]] - end - - bulk_queue_jobs(jobs) unless jobs.empty? - end - - def bulk_queue_jobs(jobs) - say "Queuing #{jobs.size} BackgroundMigrationWorker jobs..." - - BackgroundMigrationWorker.perform_bulk(jobs) - end end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index fb14798efe6..18aefc9558b 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1,6 +1,9 @@ module Gitlab module Database module MigrationHelpers + BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job + BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time + # Adds `created_at` and `updated_at` columns with timezone information. # # This method is an improved version of Rails' built-in method `add_timestamps`. @@ -653,6 +656,51 @@ into similar problems in the future (e.g. when new tables are created). EOF end end + + # Queues background migration jobs for an entire table, batched by ID range. + # + # model_class - The table being iterated over + # job_class_name - The background migration job class as a string + # batch_size - The maximum number of rows per job + # + # Example: + # + # class Route < ActiveRecord::Base + # include EachBatch + # self.table_name = 'routes' + # end + # + # queue_background_migration_jobs_by_range(Route, 'ProcessRoutes') + # + # Where the model_class includes EachBatch, and the background migration exists: + # + # class Gitlab::BackgroundMigration::ProcessRoutes + # def perform(start_id, end_id) + # # do something + # end + # end + def queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size = BACKGROUND_MIGRATION_BATCH_SIZE) + raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') + + jobs = [] + + model_class.each_batch(of: batch_size) do |relation| + start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + + if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE + # Note: This code path generally only helps with many millions of rows + # We push multiple jobs at a time to reduce the time spent in + # Sidekiq/Redis operations. We're using this buffer based approach so we + # don't need to run additional queries for every range. + BackgroundMigrationWorker.perform_bulk(jobs) + jobs.clear + end + + jobs << [job_class_name, [start_id, end_id]] + end + + BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty? + end end end end diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb index 1bcdc369c44..fab77889059 100644 --- a/spec/lib/gitlab/database/migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -914,4 +914,66 @@ describe Gitlab::Database::MigrationHelpers do .to raise_error(RuntimeError, /Your database user is not allowed/) end end + + describe '#queue_background_migration_jobs_by_range', :sidekiq do + context 'when the model has an ID column' do + let!(:id1) { create(:user).id } + let!(:id2) { create(:user).id } + let!(:id3) { create(:user).id } + + before do + User.class_eval do + include EachBatch + end + end + + context 'with enough rows to bulk queue jobs more than once' do + before do + stub_const('Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE', 1) + end + + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + end + end + + it 'queues jobs in groups of buffer size 1' do + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]]]) + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id3, id3]]]) + + model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + end + end + + context 'with not enough rows to bulk queue jobs more than once' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + end + end + + it 'queues jobs in bulk all at once (big buffer size)' do + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]], + ['FooJob', [id3, id3]]]) + + model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + end + end + end + + context "when the model doesn't have an ID column" do + it 'raises error (for now)' do + expect do + model.queue_background_migration_jobs_by_range(ProjectAuthorization, 'FooJob') + end.to raise_error(StandardError, /does not have an ID/) + end + end + end end diff --git a/spec/migrations/delete_conflicting_redirect_routes_spec.rb b/spec/migrations/delete_conflicting_redirect_routes_spec.rb index 5d277028b96..b79d04c1a21 100644 --- a/spec/migrations/delete_conflicting_redirect_routes_spec.rb +++ b/spec/migrations/delete_conflicting_redirect_routes_spec.rb @@ -6,8 +6,8 @@ describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do let!(:routes) { table(:routes) } before do - stub_const("#{described_class.name}::BATCH_SIZE", 2) - stub_const("#{described_class.name}::JOB_BUFFER_SIZE", 2) + stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_BATCH_SIZE", 2) + stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE", 2) routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1') routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2') -- cgit v1.2.3 From c9232087218e03a5eb880e4b4fabe7d0f5a23727 Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Mon, 11 Sep 2017 12:20:04 -0700 Subject: Spread out the work a little --- ...907170235_delete_conflicting_redirect_routes.rb | 2 +- lib/gitlab/database/migration_helpers.rb | 46 +++++++++++++- spec/lib/gitlab/database/migration_helpers_spec.rb | 72 ++++++++++++++++++++-- .../delete_conflicting_redirect_routes_spec.rb | 7 +++ 4 files changed, 117 insertions(+), 10 deletions(-) diff --git a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb index 021b52b9ed1..e2e1cddd9e8 100644 --- a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb +++ b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb @@ -18,7 +18,7 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration def up say opening_message - queue_background_migration_jobs_by_range(Route, MIGRATION) + queue_background_migration_jobs_by_range_at_intervals(Route, MIGRATION, 1.minute) end def down diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 18aefc9558b..2c35da8f1aa 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -657,7 +657,9 @@ into similar problems in the future (e.g. when new tables are created). end end - # Queues background migration jobs for an entire table, batched by ID range. + # Bulk queues background migration jobs for an entire table, batched by ID range. + # "Bulk" meaning many jobs will be pushed at a time for efficiency. + # If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`. # # model_class - The table being iterated over # job_class_name - The background migration job class as a string @@ -670,7 +672,7 @@ into similar problems in the future (e.g. when new tables are created). # self.table_name = 'routes' # end # - # queue_background_migration_jobs_by_range(Route, 'ProcessRoutes') + # bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes') # # Where the model_class includes EachBatch, and the background migration exists: # @@ -679,7 +681,7 @@ into similar problems in the future (e.g. when new tables are created). # # do something # end # end - def queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size = BACKGROUND_MIGRATION_BATCH_SIZE) + def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE) raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') jobs = [] @@ -701,6 +703,44 @@ into similar problems in the future (e.g. when new tables are created). BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty? end + + # Queues background migration jobs for an entire table, batched by ID range. + # Each job is scheduled with a `delay_interval` in between. + # If you use a small interval, then some jobs may run at the same time. + # + # model_class - The table being iterated over + # job_class_name - The background migration job class as a string + # delay_interval - The duration between each job's scheduled time (must respond to `to_f`) + # batch_size - The maximum number of rows per job + # + # Example: + # + # class Route < ActiveRecord::Base + # include EachBatch + # self.table_name = 'routes' + # end + # + # queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute) + # + # Where the model_class includes EachBatch, and the background migration exists: + # + # class Gitlab::BackgroundMigration::ProcessRoutes + # def perform(start_id, end_id) + # # do something + # end + # end + def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE) + raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') + + model_class.each_batch(of: batch_size) do |relation, index| + start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + + # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for + # the same time, which is not helpful in most cases where we wish to + # spread the work over time. + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id]) + end + end end end end diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb index fab77889059..3c8350b3aad 100644 --- a/spec/lib/gitlab/database/migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -915,7 +915,7 @@ describe Gitlab::Database::MigrationHelpers do end end - describe '#queue_background_migration_jobs_by_range', :sidekiq do + describe '#bulk_queue_background_migration_jobs_by_range', :sidekiq do context 'when the model has an ID column' do let!(:id1) { create(:user).id } let!(:id2) { create(:user).id } @@ -934,7 +934,7 @@ describe Gitlab::Database::MigrationHelpers do it 'queues jobs correctly' do Sidekiq::Testing.fake! do - model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2) expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) @@ -945,14 +945,14 @@ describe Gitlab::Database::MigrationHelpers do expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]]]) expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id3, id3]]]) - model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2) end end context 'with not enough rows to bulk queue jobs more than once' do it 'queues jobs correctly' do Sidekiq::Testing.fake! do - model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2) expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) @@ -963,7 +963,67 @@ describe Gitlab::Database::MigrationHelpers do expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]], ['FooJob', [id3, id3]]]) - model.queue_background_migration_jobs_by_range(User, 'FooJob', 2) + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2) + end + end + + context 'without specifying batch_size' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob') + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]]) + end + end + end + end + + context "when the model doesn't have an ID column" do + it 'raises error (for now)' do + expect do + model.bulk_queue_background_migration_jobs_by_range(ProjectAuthorization, 'FooJob') + end.to raise_error(StandardError, /does not have an ID/) + end + end + end + + describe '#queue_background_migration_jobs_by_range_at_intervals', :sidekiq do + context 'when the model has an ID column' do + let!(:id1) { create(:user).id } + let!(:id2) { create(:user).id } + let!(:id3) { create(:user).id } + + around do |example| + Timecop.freeze { example.run } + end + + before do + User.class_eval do + include EachBatch + end + end + + context 'with batch_size option' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds, batch_size: 2) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.seconds.from_now.to_f) + end + end + end + + context 'without batch_size option' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]]) + expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f) + end end end end @@ -971,7 +1031,7 @@ describe Gitlab::Database::MigrationHelpers do context "when the model doesn't have an ID column" do it 'raises error (for now)' do expect do - model.queue_background_migration_jobs_by_range(ProjectAuthorization, 'FooJob') + model.queue_background_migration_jobs_by_range_at_intervals(ProjectAuthorization, 'FooJob', 10.seconds) end.to raise_error(StandardError, /does not have an ID/) end end diff --git a/spec/migrations/delete_conflicting_redirect_routes_spec.rb b/spec/migrations/delete_conflicting_redirect_routes_spec.rb index b79d04c1a21..cf872595f21 100644 --- a/spec/migrations/delete_conflicting_redirect_routes_spec.rb +++ b/spec/migrations/delete_conflicting_redirect_routes_spec.rb @@ -5,6 +5,10 @@ describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do let!(:redirect_routes) { table(:redirect_routes) } let!(:routes) { table(:routes) } + around do |example| + Timecop.freeze { example.run } + end + before do stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_BATCH_SIZE", 2) stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE", 2) @@ -34,8 +38,11 @@ describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do migrate! expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([described_class::MIGRATION, [1, 2]]) + expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(1.minute.from_now.to_f) expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([described_class::MIGRATION, [3, 4]]) + expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(2.minutes.from_now.to_f) expect(BackgroundMigrationWorker.jobs[2]['args']).to eq([described_class::MIGRATION, [5, 5]]) + expect(BackgroundMigrationWorker.jobs[2]['at']).to eq(3.minutes.from_now.to_f) expect(BackgroundMigrationWorker.jobs.size).to eq 3 end end -- cgit v1.2.3 From 7b7fb754118c7e86f94c4e5efaea632929d293da Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Thu, 14 Sep 2017 08:57:00 -0700 Subject: Reduce batch size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …to stay within our query timeout of 60s. Also reduce the job interval to keep the same overall migration time of ~3.3 days. --- .../20170907170235_delete_conflicting_redirect_routes.rb | 4 +++- spec/migrations/delete_conflicting_redirect_routes_spec.rb | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb index e2e1cddd9e8..3e84b295be4 100644 --- a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb +++ b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb @@ -6,6 +6,8 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration DOWNTIME = false MIGRATION = 'DeleteConflictingRedirectRoutesRange'.freeze + BATCH_SIZE = 200 # At 200, I expect under 20s per batch, which is under our query timeout of 60s. + DELAY_INTERVAL = 12.seconds disable_ddl_transaction! @@ -18,7 +20,7 @@ class DeleteConflictingRedirectRoutes < ActiveRecord::Migration def up say opening_message - queue_background_migration_jobs_by_range_at_intervals(Route, MIGRATION, 1.minute) + queue_background_migration_jobs_by_range_at_intervals(Route, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) end def down diff --git a/spec/migrations/delete_conflicting_redirect_routes_spec.rb b/spec/migrations/delete_conflicting_redirect_routes_spec.rb index cf872595f21..1df2477da51 100644 --- a/spec/migrations/delete_conflicting_redirect_routes_spec.rb +++ b/spec/migrations/delete_conflicting_redirect_routes_spec.rb @@ -10,7 +10,7 @@ describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do end before do - stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_BATCH_SIZE", 2) + stub_const("DeleteConflictingRedirectRoutes::BATCH_SIZE", 2) stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE", 2) routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1') @@ -38,11 +38,11 @@ describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do migrate! expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([described_class::MIGRATION, [1, 2]]) - expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(1.minute.from_now.to_f) + expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(12.seconds.from_now.to_f) expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([described_class::MIGRATION, [3, 4]]) - expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(2.minutes.from_now.to_f) + expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(24.seconds.from_now.to_f) expect(BackgroundMigrationWorker.jobs[2]['args']).to eq([described_class::MIGRATION, [5, 5]]) - expect(BackgroundMigrationWorker.jobs[2]['at']).to eq(3.minutes.from_now.to_f) + expect(BackgroundMigrationWorker.jobs[2]['at']).to eq(36.seconds.from_now.to_f) expect(BackgroundMigrationWorker.jobs.size).to eq 3 end end -- cgit v1.2.3