From f2d50af917b878a98e06b994ac32c0718f3d0b78 Mon Sep 17 00:00:00 2001 From: Sean McGivern Date: Mon, 3 Jul 2017 15:48:59 +0100 Subject: Migrate MR commits and diffs to new tables Previously, we stored these as serialised fields - `st_{commits,diffs}` - on the `merge_request_diffs` table. These now have their own tables - `merge_request_diff_{commits,diffs}` - with a column for each attribute of the serialised data. Add a background migration to go through the existing MR diffs and migrate them to the new format. Ignore any contents that cannot be displayed. Assuming that we have 5 million rows to migrate, and each batch of 2,500 rows can be completed in 5 minutes, this will take about 7 days to migrate everything. --- ...30158_schedule_merge_request_diff_migrations.rb | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb (limited to 'db/post_migrate') diff --git a/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb b/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb new file mode 100644 index 00000000000..17a9dc293f1 --- /dev/null +++ b/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb @@ -0,0 +1,33 @@ +class ScheduleMergeRequestDiffMigrations < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 2500 + MIGRATION = 'DeserializeMergeRequestDiffsAndCommits' + + disable_ddl_transaction! + + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + + include ::EachBatch + end + + # Assuming that there are 5 million rows affected (which is more than on + # GitLab.com), and that each batch of 2,500 rows takes up to 5 minutes, then + # we can migrate all the rows in 7 days. + # + # On staging, plucking the IDs themselves takes 5 seconds. + def up + non_empty = 'st_commits IS NOT NULL OR st_diffs IS NOT NULL' + + MergeRequestDiff.where(non_empty).each_batch(of: BATCH_SIZE) do |relation, index| + range = relation.pluck('MIN(id)', 'MAX(id)').first + + BackgroundMigrationWorker.perform_in(index * 5.minutes, MIGRATION, range) + end + end + + def down + end +end -- cgit v1.2.3