Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2017-09-18 11:24:52 +0300
committerDouwe Maan <douwe@gitlab.com>2017-09-18 11:24:52 +0300
commit36ad91d75a91bacb9b1b1c9997e734166cf4e201 (patch)
tree03e52e684612d3aa6f83363372940e3ea769b2ed
parentb7cd5897098ec99b5eca7a3ebcc1c70d7501cd23 (diff)
parent7b7fb754118c7e86f94c4e5efaea632929d293da (diff)
Merge branch 'mk-delete-conflicting-redirects-mysql' into 'master'
Clean up redirect routes that conflict with regular routes Closes #36229 See merge request gitlab-org/gitlab-ce!13783
-rw-r--r--db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb37
-rw-r--r--lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb41
-rw-r--r--lib/gitlab/database/migration_helpers.rb88
-rw-r--r--spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb40
-rw-r--r--spec/lib/gitlab/database/migration_helpers_spec.rb122
-rw-r--r--spec/migrations/delete_conflicting_redirect_routes_spec.rb58
6 files changed, 386 insertions, 0 deletions
diff --git a/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb
new file mode 100644
index 00000000000..3e84b295be4
--- /dev/null
+++ b/db/post_migrate/20170907170235_delete_conflicting_redirect_routes.rb
@@ -0,0 +1,37 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class DeleteConflictingRedirectRoutes < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ MIGRATION = 'DeleteConflictingRedirectRoutesRange'.freeze
+ BATCH_SIZE = 200 # At 200, I expect under 20s per batch, which is under our query timeout of 60s.
+ DELAY_INTERVAL = 12.seconds
+
+ disable_ddl_transaction!
+
+ class Route < ActiveRecord::Base
+ include EachBatch
+
+ self.table_name = 'routes'
+ end
+
+ def up
+ say opening_message
+
+ queue_background_migration_jobs_by_range_at_intervals(Route, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
+ end
+
+ def down
+ # nothing
+ end
+
+ def opening_message
+ <<~MSG
+ Clean up redirect routes that conflict with regular routes.
+ See initial bug fix:
+ https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/13357
+ MSG
+ end
+end
diff --git a/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb b/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb
new file mode 100644
index 00000000000..b1411be3016
--- /dev/null
+++ b/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range.rb
@@ -0,0 +1,41 @@
+module Gitlab
+ module BackgroundMigration
+ class DeleteConflictingRedirectRoutesRange
+ class Route < ActiveRecord::Base
+ self.table_name = 'routes'
+ end
+
+ class RedirectRoute < ActiveRecord::Base
+ self.table_name = 'redirect_routes'
+ end
+
+ # start_id - The start ID of the range of events to process
+ # end_id - The end ID of the range to process.
+ def perform(start_id, end_id)
+ return unless migrate?
+
+ conflicts = RedirectRoute.where(routes_match_redirects_clause(start_id, end_id))
+ num_rows = conflicts.delete_all
+
+ Rails.logger.info("Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutesRange [#{start_id}, #{end_id}] - Deleted #{num_rows} redirect routes that were conflicting with routes.")
+ end
+
+ def migrate?
+ Route.table_exists? && RedirectRoute.table_exists?
+ end
+
+ def routes_match_redirects_clause(start_id, end_id)
+ <<~ROUTES_MATCH_REDIRECTS
+ EXISTS (
+ SELECT 1 FROM routes
+ WHERE (
+ LOWER(redirect_routes.path) = LOWER(routes.path)
+ OR LOWER(redirect_routes.path) LIKE LOWER(CONCAT(routes.path, '/%'))
+ )
+ AND routes.id BETWEEN #{start_id} AND #{end_id}
+ )
+ ROUTES_MATCH_REDIRECTS
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index fb14798efe6..2c35da8f1aa 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -1,6 +1,9 @@
module Gitlab
module Database
module MigrationHelpers
+ BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
+ BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
+
# Adds `created_at` and `updated_at` columns with timezone information.
#
# This method is an improved version of Rails' built-in method `add_timestamps`.
@@ -653,6 +656,91 @@ into similar problems in the future (e.g. when new tables are created).
EOF
end
end
+
+ # Bulk queues background migration jobs for an entire table, batched by ID range.
+ # "Bulk" meaning many jobs will be pushed at a time for efficiency.
+ # If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
+ #
+ # model_class - The table being iterated over
+ # job_class_name - The background migration job class as a string
+ # batch_size - The maximum number of rows per job
+ #
+ # Example:
+ #
+ # class Route < ActiveRecord::Base
+ # include EachBatch
+ # self.table_name = 'routes'
+ # end
+ #
+ # bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
+ #
+ # Where the model_class includes EachBatch, and the background migration exists:
+ #
+ # class Gitlab::BackgroundMigration::ProcessRoutes
+ # def perform(start_id, end_id)
+ # # do something
+ # end
+ # end
+ def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
+ raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
+
+ jobs = []
+
+ model_class.each_batch(of: batch_size) do |relation|
+ start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
+
+ if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
+ # Note: This code path generally only helps with many millions of rows
+ # We push multiple jobs at a time to reduce the time spent in
+ # Sidekiq/Redis operations. We're using this buffer based approach so we
+ # don't need to run additional queries for every range.
+ BackgroundMigrationWorker.perform_bulk(jobs)
+ jobs.clear
+ end
+
+ jobs << [job_class_name, [start_id, end_id]]
+ end
+
+ BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty?
+ end
+
+ # Queues background migration jobs for an entire table, batched by ID range.
+ # Each job is scheduled with a `delay_interval` in between.
+ # If you use a small interval, then some jobs may run at the same time.
+ #
+ # model_class - The table being iterated over
+ # job_class_name - The background migration job class as a string
+ # delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
+ # batch_size - The maximum number of rows per job
+ #
+ # Example:
+ #
+ # class Route < ActiveRecord::Base
+ # include EachBatch
+ # self.table_name = 'routes'
+ # end
+ #
+ # queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
+ #
+ # Where the model_class includes EachBatch, and the background migration exists:
+ #
+ # class Gitlab::BackgroundMigration::ProcessRoutes
+ # def perform(start_id, end_id)
+ # # do something
+ # end
+ # end
+ def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
+ raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
+
+ model_class.each_batch(of: batch_size) do |relation, index|
+ start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
+
+ # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
+ # the same time, which is not helpful in most cases where we wish to
+ # spread the work over time.
+ BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id])
+ end
+ end
end
end
end
diff --git a/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb b/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb
new file mode 100644
index 00000000000..5c471cbdeda
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/delete_conflicting_redirect_routes_range_spec.rb
@@ -0,0 +1,40 @@
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutesRange, :migration, schema: 20170907170235 do
+ let!(:redirect_routes) { table(:redirect_routes) }
+ let!(:routes) { table(:routes) }
+
+ before do
+ routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1')
+ routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2')
+ routes.create!(id: 3, source_id: 3, source_type: 'Namespace', path: 'foo3')
+ routes.create!(id: 4, source_id: 4, source_type: 'Namespace', path: 'foo4')
+ routes.create!(id: 5, source_id: 5, source_type: 'Namespace', path: 'foo5')
+
+ # Valid redirects
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar2')
+ redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'bar3')
+
+ # Conflicting redirects
+ redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'foo1')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo2')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo3')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo4')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo5')
+ end
+
+ it 'deletes the conflicting redirect_routes in the range' do
+ expect(redirect_routes.count).to eq(8)
+
+ expect do
+ described_class.new.perform(1, 3)
+ end.to change { redirect_routes.where("path like 'foo%'").count }.from(5).to(2)
+
+ expect do
+ described_class.new.perform(4, 5)
+ end.to change { redirect_routes.where("path like 'foo%'").count }.from(2).to(0)
+
+ expect(redirect_routes.count).to eq(3)
+ end
+end
diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb
index 1bcdc369c44..3c8350b3aad 100644
--- a/spec/lib/gitlab/database/migration_helpers_spec.rb
+++ b/spec/lib/gitlab/database/migration_helpers_spec.rb
@@ -914,4 +914,126 @@ describe Gitlab::Database::MigrationHelpers do
.to raise_error(RuntimeError, /Your database user is not allowed/)
end
end
+
+ describe '#bulk_queue_background_migration_jobs_by_range', :sidekiq do
+ context 'when the model has an ID column' do
+ let!(:id1) { create(:user).id }
+ let!(:id2) { create(:user).id }
+ let!(:id3) { create(:user).id }
+
+ before do
+ User.class_eval do
+ include EachBatch
+ end
+ end
+
+ context 'with enough rows to bulk queue jobs more than once' do
+ before do
+ stub_const('Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE', 1)
+ end
+
+ it 'queues jobs correctly' do
+ Sidekiq::Testing.fake! do
+ model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
+
+ expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
+ expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
+ end
+ end
+
+ it 'queues jobs in groups of buffer size 1' do
+ expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]]])
+ expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id3, id3]]])
+
+ model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
+ end
+ end
+
+ context 'with not enough rows to bulk queue jobs more than once' do
+ it 'queues jobs correctly' do
+ Sidekiq::Testing.fake! do
+ model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
+
+ expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
+ expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
+ end
+ end
+
+ it 'queues jobs in bulk all at once (big buffer size)' do
+ expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]],
+ ['FooJob', [id3, id3]]])
+
+ model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
+ end
+ end
+
+ context 'without specifying batch_size' do
+ it 'queues jobs correctly' do
+ Sidekiq::Testing.fake! do
+ model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob')
+
+ expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]])
+ end
+ end
+ end
+ end
+
+ context "when the model doesn't have an ID column" do
+ it 'raises error (for now)' do
+ expect do
+ model.bulk_queue_background_migration_jobs_by_range(ProjectAuthorization, 'FooJob')
+ end.to raise_error(StandardError, /does not have an ID/)
+ end
+ end
+ end
+
+ describe '#queue_background_migration_jobs_by_range_at_intervals', :sidekiq do
+ context 'when the model has an ID column' do
+ let!(:id1) { create(:user).id }
+ let!(:id2) { create(:user).id }
+ let!(:id3) { create(:user).id }
+
+ around do |example|
+ Timecop.freeze { example.run }
+ end
+
+ before do
+ User.class_eval do
+ include EachBatch
+ end
+ end
+
+ context 'with batch_size option' do
+ it 'queues jobs correctly' do
+ Sidekiq::Testing.fake! do
+ model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds, batch_size: 2)
+
+ expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
+ expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f)
+ expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
+ expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.seconds.from_now.to_f)
+ end
+ end
+ end
+
+ context 'without batch_size option' do
+ it 'queues jobs correctly' do
+ Sidekiq::Testing.fake! do
+ model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds)
+
+ expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]])
+ expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f)
+ end
+ end
+ end
+ end
+
+ context "when the model doesn't have an ID column" do
+ it 'raises error (for now)' do
+ expect do
+ model.queue_background_migration_jobs_by_range_at_intervals(ProjectAuthorization, 'FooJob', 10.seconds)
+ end.to raise_error(StandardError, /does not have an ID/)
+ end
+ end
+ end
end
diff --git a/spec/migrations/delete_conflicting_redirect_routes_spec.rb b/spec/migrations/delete_conflicting_redirect_routes_spec.rb
new file mode 100644
index 00000000000..1df2477da51
--- /dev/null
+++ b/spec/migrations/delete_conflicting_redirect_routes_spec.rb
@@ -0,0 +1,58 @@
+require 'spec_helper'
+require Rails.root.join('db', 'post_migrate', '20170907170235_delete_conflicting_redirect_routes')
+
+describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do
+ let!(:redirect_routes) { table(:redirect_routes) }
+ let!(:routes) { table(:routes) }
+
+ around do |example|
+ Timecop.freeze { example.run }
+ end
+
+ before do
+ stub_const("DeleteConflictingRedirectRoutes::BATCH_SIZE", 2)
+ stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE", 2)
+
+ routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1')
+ routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2')
+ routes.create!(id: 3, source_id: 3, source_type: 'Namespace', path: 'foo3')
+ routes.create!(id: 4, source_id: 4, source_type: 'Namespace', path: 'foo4')
+ routes.create!(id: 5, source_id: 5, source_type: 'Namespace', path: 'foo5')
+
+ # Valid redirects
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar2')
+ redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'bar3')
+
+ # Conflicting redirects
+ redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'foo1')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo2')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo3')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo4')
+ redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo5')
+ end
+
+ it 'correctly schedules background migrations' do
+ Sidekiq::Testing.fake! do
+ Timecop.freeze do
+ migrate!
+
+ expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([described_class::MIGRATION, [1, 2]])
+ expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(12.seconds.from_now.to_f)
+ expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([described_class::MIGRATION, [3, 4]])
+ expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(24.seconds.from_now.to_f)
+ expect(BackgroundMigrationWorker.jobs[2]['args']).to eq([described_class::MIGRATION, [5, 5]])
+ expect(BackgroundMigrationWorker.jobs[2]['at']).to eq(36.seconds.from_now.to_f)
+ expect(BackgroundMigrationWorker.jobs.size).to eq 3
+ end
+ end
+ end
+
+ it 'schedules background migrations' do
+ Sidekiq::Testing.inline! do
+ expect do
+ migrate!
+ end.to change { redirect_routes.count }.from(8).to(3)
+ end
+ end
+end