From 46b28842b6a05b0b398bdd75e82a00439ad404b0 Mon Sep 17 00:00:00 2001 From: Tiago Botelho Date: Wed, 7 Nov 2018 14:32:15 +0000 Subject: StuckImportJobsWorker query performance optimization Improves the performance of fetching the enqueued projects for StuckImportJobsWorker, preventing a statement timeout. --- app/models/project_import_state.rb | 13 ++++++ app/workers/stuck_import_jobs_worker.rb | 46 +++++++++++----------- ...x-stuck-import-jobs-query-performance-issue.yml | 5 +++ spec/workers/stuck_import_jobs_worker_spec.rb | 22 +++++------ 4 files changed, 51 insertions(+), 35 deletions(-) create mode 100644 changelogs/unreleased/fix-stuck-import-jobs-query-performance-issue.yml diff --git a/app/models/project_import_state.rb b/app/models/project_import_state.rb index d59cb43dea4..7126bb66d80 100644 --- a/app/models/project_import_state.rb +++ b/app/models/project_import_state.rb @@ -56,4 +56,17 @@ class ProjectImportState < ActiveRecord::Base end end end + + def mark_as_failed(error_message) + original_errors = errors.dup + sanitized_message = Gitlab::UrlSanitizer.sanitize(error_message) + + fail_op + + update_column(:last_error, sanitized_message) + rescue ActiveRecord::ActiveRecordError => e + Rails.logger.error("Error setting import status to failed: #{e.message}. Original error: #{sanitized_message}") + ensure + @errors = original_errors + end end diff --git a/app/workers/stuck_import_jobs_worker.rb b/app/workers/stuck_import_jobs_worker.rb index de92f3eca6a..667a4121131 100644 --- a/app/workers/stuck_import_jobs_worker.rb +++ b/app/workers/stuck_import_jobs_worker.rb @@ -7,60 +7,58 @@ class StuckImportJobsWorker IMPORT_JOBS_EXPIRATION = 15.hours.to_i def perform - projects_without_jid_count = mark_projects_without_jid_as_failed! - projects_with_jid_count = mark_projects_with_jid_as_failed! + import_state_without_jid_count = mark_import_states_without_jid_as_failed! + import_state_with_jid_count = mark_import_states_with_jid_as_failed! Gitlab::Metrics.add_event(:stuck_import_jobs, - projects_without_jid_count: projects_without_jid_count, - projects_with_jid_count: projects_with_jid_count) + projects_without_jid_count: import_state_without_jid_count, + projects_with_jid_count: import_state_with_jid_count) end private - def mark_projects_without_jid_as_failed! - enqueued_projects_without_jid.each do |project| - project.mark_import_as_failed(error_message) + def mark_import_states_without_jid_as_failed! + enqueued_import_states_without_jid.each do |import_state| + import_state.mark_as_failed(error_message) end.count end # rubocop: disable CodeReuse/ActiveRecord - def mark_projects_with_jid_as_failed! - # TODO: Rollback this change to use SQL through #pluck - jids_and_ids = enqueued_projects_with_jid.map { |project| [project.import_jid, project.id] }.to_h + def mark_import_states_with_jid_as_failed! + jids_and_ids = enqueued_import_states_with_jid.pluck(:jid, :id).to_h # Find the jobs that aren't currently running or that exceeded the threshold. completed_jids = Gitlab::SidekiqStatus.completed_jids(jids_and_ids.keys) return unless completed_jids.any? - completed_project_ids = jids_and_ids.values_at(*completed_jids) + completed_import_state_ids = jids_and_ids.values_at(*completed_jids) - # We select the projects again, because they may have transitioned from + # We select the import states again, because they may have transitioned from # scheduled/started to finished/failed while we were looking up their Sidekiq status. - completed_projects = enqueued_projects_with_jid.where(id: completed_project_ids) + completed_import_states = enqueued_import_states_with_jid.where(id: completed_import_state_ids) - Rails.logger.info("Marked stuck import jobs as failed. JIDs: #{completed_projects.map(&:import_jid).join(', ')}") + completed_import_state_jids = completed_import_states.map { |import_state| import_state.jid }.join(', ') + Rails.logger.info("Marked stuck import jobs as failed. JIDs: #{completed_import_state_jids}") - completed_projects.each do |project| - project.mark_import_as_failed(error_message) + completed_import_states.each do |import_state| + import_state.mark_as_failed(error_message) end.count end # rubocop: enable CodeReuse/ActiveRecord - # rubocop: disable CodeReuse/ActiveRecord - def enqueued_projects - Project.joins_import_state.where("(import_state.status = 'scheduled' OR import_state.status = 'started') OR (projects.import_status = 'scheduled' OR projects.import_status = 'started')") + def enqueued_import_states + ProjectImportState.with_status([:scheduled, :started]) end - # rubocop: enable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord - def enqueued_projects_with_jid - enqueued_projects.where.not("import_state.jid IS NULL AND projects.import_jid IS NULL") + def enqueued_import_states_with_jid + enqueued_import_states.where.not(jid: nil) end # rubocop: enable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord - def enqueued_projects_without_jid - enqueued_projects.where("import_state.jid IS NULL AND projects.import_jid IS NULL") + def enqueued_import_states_without_jid + enqueued_import_states.where(jid: nil) end # rubocop: enable CodeReuse/ActiveRecord diff --git a/changelogs/unreleased/fix-stuck-import-jobs-query-performance-issue.yml b/changelogs/unreleased/fix-stuck-import-jobs-query-performance-issue.yml new file mode 100644 index 00000000000..d8455a8509f --- /dev/null +++ b/changelogs/unreleased/fix-stuck-import-jobs-query-performance-issue.yml @@ -0,0 +1,5 @@ +--- +title: Improves performance of stuck import jobs detection +merge_request: 22879 +author: +type: performance diff --git a/spec/workers/stuck_import_jobs_worker_spec.rb b/spec/workers/stuck_import_jobs_worker_spec.rb index 2169c14218b..e94d2be9850 100644 --- a/spec/workers/stuck_import_jobs_worker_spec.rb +++ b/spec/workers/stuck_import_jobs_worker_spec.rb @@ -8,29 +8,29 @@ describe StuckImportJobsWorker do context 'when the import status was already updated' do before do allow(Gitlab::SidekiqStatus).to receive(:completed_jids) do - project.import_start - project.import_finish + import_state.start + import_state.finish - [project.import_jid] + [import_state.jid] end end it 'does not mark the project as failed' do worker.perform - expect(project.reload.import_status).to eq('finished') + expect(import_state.reload.status).to eq('finished') end end context 'when the import status was not updated' do before do - allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return([project.import_jid]) + allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return([import_state.jid]) end it 'marks the project as failed' do worker.perform - expect(project.reload.import_status).to eq('failed') + expect(import_state.reload.status).to eq('failed') end end end @@ -41,27 +41,27 @@ describe StuckImportJobsWorker do end it 'does not mark the project as failed' do - expect { worker.perform }.not_to change { project.reload.import_status } + expect { worker.perform }.not_to change { import_state.reload.status } end end end describe 'with scheduled import_status' do it_behaves_like 'project import job detection' do - let(:project) { create(:project, :import_scheduled) } + let(:import_state) { create(:project, :import_scheduled).import_state } before do - project.import_state.update(jid: '123') + import_state.update(jid: '123') end end end describe 'with started import_status' do it_behaves_like 'project import job detection' do - let(:project) { create(:project, :import_started) } + let(:import_state) { create(:project, :import_started).import_state } before do - project.import_state.update(jid: '123') + import_state.update(jid: '123') end end end -- cgit v1.2.3