diff options
author | Douwe Maan <douwe@gitlab.com> | 2017-09-18 11:24:52 +0300 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2017-09-18 11:24:52 +0300 |
commit | 36ad91d75a91bacb9b1b1c9997e734166cf4e201 (patch) | |
tree | 03e52e684612d3aa6f83363372940e3ea769b2ed | |
parent | b7cd5897098ec99b5eca7a3ebcc1c70d7501cd23 (diff) | |
parent | 7b7fb754118c7e86f94c4e5efaea632929d293da (diff) |
Merge branch 'mk-delete-conflicting-redirects-mysql' into 'master'
Clean up redirect routes that conflict with regular routes
Closes #36229
See merge request gitlab-org/gitlab-ce!13783
6 files changed, 386 insertions, 0 deletions
diff --git a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb new file mode 100644 index 00000000000..3e84b295be4 --- /dev/null +++ b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb @@ -0,0 +1,37 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class DeleteConflictingRedirectRoutes < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + MIGRATION = 'DeleteConflictingRedirectRoutesRange'.freeze + BATCH_SIZE = 200 # At 200, I expect under 20s per batch, which is under our query timeout of 60s. + DELAY_INTERVAL = 12.seconds + + disable_ddl_transaction! + + class Route < ActiveRecord::Base + include EachBatch + + self.table_name = 'routes' + end + + def up + say opening_message + + queue_background_migration_jobs_by_range_at_intervals(Route, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end + + def down + # nothing + end + + def opening_message + <<~MSG + Clean up redirect routes that conflict with regular routes. + See initial bug fix: + https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/13357 + MSG + end +end diff --git a/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb b/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb new file mode 100644 index 00000000000..b1411be3016 --- /dev/null +++ b/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb @@ -0,0 +1,41 @@ +module Gitlab + module BackgroundMigration + class DeleteConflictingRedirectRoutesRange + class Route < ActiveRecord::Base + self.table_name = 'routes' + end + + class RedirectRoute < ActiveRecord::Base + self.table_name = 'redirect_routes' + end + + # start_id - The start ID of the range of events to process + # end_id - The end ID of the range to process. + def perform(start_id, end_id) + return unless migrate? + + conflicts = RedirectRoute.where(routes_match_redirects_clause(start_id, end_id)) + num_rows = conflicts.delete_all + + Rails.logger.info("Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutesRange [#{start_id}, #{end_id}] - Deleted #{num_rows} redirect routes that were conflicting with routes.") + end + + def migrate? + Route.table_exists? && RedirectRoute.table_exists? + end + + def routes_match_redirects_clause(start_id, end_id) + <<~ROUTES_MATCH_REDIRECTS + EXISTS ( + SELECT 1 FROM routes + WHERE ( + LOWER(redirect_routes.path) = LOWER(routes.path) + OR LOWER(redirect_routes.path) LIKE LOWER(CONCAT(routes.path, '/%')) + ) + AND routes.id BETWEEN #{start_id} AND #{end_id} + ) + ROUTES_MATCH_REDIRECTS + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index fb14798efe6..2c35da8f1aa 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1,6 +1,9 @@ module Gitlab module Database module MigrationHelpers + BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job + BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time + # Adds `created_at` and `updated_at` columns with timezone information. # # This method is an improved version of Rails' built-in method `add_timestamps`. @@ -653,6 +656,91 @@ into similar problems in the future (e.g. when new tables are created). EOF end end + + # Bulk queues background migration jobs for an entire table, batched by ID range. + # "Bulk" meaning many jobs will be pushed at a time for efficiency. + # If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`. + # + # model_class - The table being iterated over + # job_class_name - The background migration job class as a string + # batch_size - The maximum number of rows per job + # + # Example: + # + # class Route < ActiveRecord::Base + # include EachBatch + # self.table_name = 'routes' + # end + # + # bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes') + # + # Where the model_class includes EachBatch, and the background migration exists: + # + # class Gitlab::BackgroundMigration::ProcessRoutes + # def perform(start_id, end_id) + # # do something + # end + # end + def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE) + raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') + + jobs = [] + + model_class.each_batch(of: batch_size) do |relation| + start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + + if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE + # Note: This code path generally only helps with many millions of rows + # We push multiple jobs at a time to reduce the time spent in + # Sidekiq/Redis operations. We're using this buffer based approach so we + # don't need to run additional queries for every range. + BackgroundMigrationWorker.perform_bulk(jobs) + jobs.clear + end + + jobs << [job_class_name, [start_id, end_id]] + end + + BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty? + end + + # Queues background migration jobs for an entire table, batched by ID range. + # Each job is scheduled with a `delay_interval` in between. + # If you use a small interval, then some jobs may run at the same time. + # + # model_class - The table being iterated over + # job_class_name - The background migration job class as a string + # delay_interval - The duration between each job's scheduled time (must respond to `to_f`) + # batch_size - The maximum number of rows per job + # + # Example: + # + # class Route < ActiveRecord::Base + # include EachBatch + # self.table_name = 'routes' + # end + # + # queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute) + # + # Where the model_class includes EachBatch, and the background migration exists: + # + # class Gitlab::BackgroundMigration::ProcessRoutes + # def perform(start_id, end_id) + # # do something + # end + # end + def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE) + raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') + + model_class.each_batch(of: batch_size) do |relation, index| + start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + + # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for + # the same time, which is not helpful in most cases where we wish to + # spread the work over time. + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id]) + end + end end end end diff --git a/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb b/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb new file mode 100644 index 00000000000..5c471cbdeda --- /dev/null +++ b/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb @@ -0,0 +1,40 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutesRange, :migration, schema: 20170907170235 do + let!(:redirect_routes) { table(:redirect_routes) } + let!(:routes) { table(:routes) } + + before do + routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1') + routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2') + routes.create!(id: 3, source_id: 3, source_type: 'Namespace', path: 'foo3') + routes.create!(id: 4, source_id: 4, source_type: 'Namespace', path: 'foo4') + routes.create!(id: 5, source_id: 5, source_type: 'Namespace', path: 'foo5') + + # Valid redirects + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar2') + redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'bar3') + + # Conflicting redirects + redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'foo1') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo2') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo3') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo4') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo5') + end + + it 'deletes the conflicting redirect_routes in the range' do + expect(redirect_routes.count).to eq(8) + + expect do + described_class.new.perform(1, 3) + end.to change { redirect_routes.where("path like 'foo%'").count }.from(5).to(2) + + expect do + described_class.new.perform(4, 5) + end.to change { redirect_routes.where("path like 'foo%'").count }.from(2).to(0) + + expect(redirect_routes.count).to eq(3) + end +end diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb index 1bcdc369c44..3c8350b3aad 100644 --- a/spec/lib/gitlab/database/migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -914,4 +914,126 @@ describe Gitlab::Database::MigrationHelpers do .to raise_error(RuntimeError, /Your database user is not allowed/) end end + + describe '#bulk_queue_background_migration_jobs_by_range', :sidekiq do + context 'when the model has an ID column' do + let!(:id1) { create(:user).id } + let!(:id2) { create(:user).id } + let!(:id3) { create(:user).id } + + before do + User.class_eval do + include EachBatch + end + end + + context 'with enough rows to bulk queue jobs more than once' do + before do + stub_const('Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE', 1) + end + + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + end + end + + it 'queues jobs in groups of buffer size 1' do + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]]]) + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id3, id3]]]) + + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2) + end + end + + context 'with not enough rows to bulk queue jobs more than once' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + end + end + + it 'queues jobs in bulk all at once (big buffer size)' do + expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]], + ['FooJob', [id3, id3]]]) + + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2) + end + end + + context 'without specifying batch_size' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob') + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]]) + end + end + end + end + + context "when the model doesn't have an ID column" do + it 'raises error (for now)' do + expect do + model.bulk_queue_background_migration_jobs_by_range(ProjectAuthorization, 'FooJob') + end.to raise_error(StandardError, /does not have an ID/) + end + end + end + + describe '#queue_background_migration_jobs_by_range_at_intervals', :sidekiq do + context 'when the model has an ID column' do + let!(:id1) { create(:user).id } + let!(:id2) { create(:user).id } + let!(:id3) { create(:user).id } + + around do |example| + Timecop.freeze { example.run } + end + + before do + User.class_eval do + include EachBatch + end + end + + context 'with batch_size option' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds, batch_size: 2) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.seconds.from_now.to_f) + end + end + end + + context 'without batch_size option' do + it 'queues jobs correctly' do + Sidekiq::Testing.fake! do + model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds) + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]]) + expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f) + end + end + end + end + + context "when the model doesn't have an ID column" do + it 'raises error (for now)' do + expect do + model.queue_background_migration_jobs_by_range_at_intervals(ProjectAuthorization, 'FooJob', 10.seconds) + end.to raise_error(StandardError, /does not have an ID/) + end + end + end end diff --git a/spec/migrations/delete_conflicting_redirect_routes_spec.rb b/spec/migrations/delete_conflicting_redirect_routes_spec.rb new file mode 100644 index 00000000000..1df2477da51 --- /dev/null +++ b/spec/migrations/delete_conflicting_redirect_routes_spec.rb @@ -0,0 +1,58 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20170907170235_delete_conflicting_redirect_routes') + +describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do + let!(:redirect_routes) { table(:redirect_routes) } + let!(:routes) { table(:routes) } + + around do |example| + Timecop.freeze { example.run } + end + + before do + stub_const("DeleteConflictingRedirectRoutes::BATCH_SIZE", 2) + stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE", 2) + + routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1') + routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2') + routes.create!(id: 3, source_id: 3, source_type: 'Namespace', path: 'foo3') + routes.create!(id: 4, source_id: 4, source_type: 'Namespace', path: 'foo4') + routes.create!(id: 5, source_id: 5, source_type: 'Namespace', path: 'foo5') + + # Valid redirects + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar2') + redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'bar3') + + # Conflicting redirects + redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'foo1') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo2') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo3') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo4') + redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo5') + end + + it 'correctly schedules background migrations' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([described_class::MIGRATION, [1, 2]]) + expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(12.seconds.from_now.to_f) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([described_class::MIGRATION, [3, 4]]) + expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(24.seconds.from_now.to_f) + expect(BackgroundMigrationWorker.jobs[2]['args']).to eq([described_class::MIGRATION, [5, 5]]) + expect(BackgroundMigrationWorker.jobs[2]['at']).to eq(36.seconds.from_now.to_f) + expect(BackgroundMigrationWorker.jobs.size).to eq 3 + end + end + end + + it 'schedules background migrations' do + Sidekiq::Testing.inline! do + expect do + migrate! + end.to change { redirect_routes.count }.from(8).to(3) + end + end +end |