diff options
author | Oswaldo Ferreira <oswaldo@gitlab.com> | 2018-07-04 22:06:30 +0300 |
---|---|---|
committer | Oswaldo Ferreira <oswaldo@gitlab.com> | 2018-07-10 15:43:58 +0300 |
commit | 4455904bc154f1a36cedeea574bb0f454f92a9e9 (patch) | |
tree | 7484b5e155ffb386cf87684f76d3a8bfe98c4c8d /lib/gitlab/background_migration | |
parent | e66535e8407ccb8dd229fefdce817902a364f58a (diff) |
Add 1000 files per minute deletion ratio on scheduler
Diffstat (limited to 'lib/gitlab/background_migration')
-rw-r--r-- | lib/gitlab/background_migration/delete_diff_files.rb | 14 | ||||
-rw-r--r-- | lib/gitlab/background_migration/schedule_diff_files_deletion.rb | 59 |
2 files changed, 58 insertions, 15 deletions
diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 8fb2c334048..e2c90fce6b1 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,6 +4,20 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + + belongs_to :merge_request + + include EachBatch + end + + class MergeRequestDiffFile < ActiveRecord::Base + self.table_name = 'merge_request_diff_files' + + include EachBatch + end + def perform(merge_request_diff_id) merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id) diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb index d944ed90fce..6442468836b 100644 --- a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb +++ b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb @@ -5,27 +5,56 @@ module Gitlab module BackgroundMigration class ScheduleDiffFilesDeletion - BATCH_SIZE = 5 + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + + has_many :merge_request_diff_files + + include EachBatch + end + + ITERATION_BATCH = 1000 + DELETION_BATCH = 1000 # per minute MIGRATION = 'DeleteDiffFiles' - DELAY_INTERVAL = 10.minutes - def perform(diff_ids, scheduler_index) - relation = MergeRequestDiff.where(id: diff_ids) + # Considering query times and Redis writings, this should take around 2 + # hours to complete. + def perform + diffs_with_files = MergeRequestDiff.where.not(state: %w(without_files empty)) - job_batches = relation.pluck(:id).in_groups_of(BATCH_SIZE, false).map do |ids| - ids.map { |id| [MIGRATION, [id]] } - end + # This will be increased for each scheduled job + process_job_in = 1.second - job_batches.each_with_index do |jobs, inner_index| - # This will give some space between batches of workers. - interval = DELAY_INTERVAL * scheduler_index + inner_index.minutes + # explain (analyze, buffers) example for the iteration: + # + # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1) + # Index Cond: ((id >= 764586) AND (id < 835298)) + # Heap Fetches: 8 + # Buffers: shared hit=18188 + # Planning time: 0.752 ms + # Execution time: 12.430 ms + # + diffs_with_files.reorder(nil).each_batch(of: ITERATION_BATCH) do |relation, scheduler_index| + relation.each do |diff| + BackgroundMigrationWorker.perform_in(process_job_in, MIGRATION, [diff.id]) - # A single `merge_request_diff` can be associated with way too many - # `merge_request_diff_files`. It's better to avoid scheduling big - # batches and go with 5 at a time. - # - BackgroundMigrationWorker.bulk_perform_in(interval, jobs) + diff_files_count = diff.merge_request_diff_files.reorder(nil).count + + # We should limit on 1000 diff files deletion per minute to avoid + # replication lag issues. + # + interval = (diff_files_count.to_f / DELETION_BATCH).minutes + process_job_in += interval + end end + + log_days_to_process_all_jobs(process_job_in) + end + + def log_days_to_process_all_jobs(seconds_to_process) + days_to_process_all_jobs = (seconds_to_process / 60 / 60 / 24).to_i + Rails.logger.info("Gitlab::BackgroundMigration::DeleteDiffFiles will take " \ + "#{days_to_process_all_jobs} days to be processed") end end end |