From 6a18a411a30e9e7406ba9335ab502ec396add662 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Fri, 17 May 2019 19:10:44 +0700 Subject: Make pipeline schedule worker resilient Currently, pipeline schedule worker is unstable because it's sometimes killed by excessive memory consumption. In order to improve the performance, we add the following fixes: 1. next_run_at is always real_next_run, which means the value always takes into account of worker's cron schedule 1. Remove exlusive lock. This is already covered by real_next_run change. 1. Use RunPipelineScheduleWorker for avoiding memory killer. Memory consumption is spread to the multiple sidekiq worker. --- app/workers/pipeline_schedule_worker.rb | 41 +++-------------------------- app/workers/run_pipeline_schedule_worker.rb | 26 +++++++++++++++++- 2 files changed, 28 insertions(+), 39 deletions(-) (limited to 'app/workers') diff --git a/app/workers/pipeline_schedule_worker.rb b/app/workers/pipeline_schedule_worker.rb index 8a9ee7808e4..9410fd1a786 100644 --- a/app/workers/pipeline_schedule_worker.rb +++ b/app/workers/pipeline_schedule_worker.rb @@ -3,47 +3,12 @@ class PipelineScheduleWorker include ApplicationWorker include CronjobQueue - include ::Gitlab::ExclusiveLeaseHelpers - EXCLUSIVE_LOCK_KEY = 'pipeline_schedules:run:lock' - LOCK_TIMEOUT = 50.minutes - - # rubocop: disable CodeReuse/ActiveRecord def perform - in_lock(EXCLUSIVE_LOCK_KEY, ttl: LOCK_TIMEOUT, retries: 1) do - Ci::PipelineSchedule.active.where("next_run_at < ?", Time.now) - .preload(:owner, :project).find_each do |schedule| - - schedule.schedule_next_run! - - Ci::CreatePipelineService.new(schedule.project, - schedule.owner, - ref: schedule.ref) - .execute!(:schedule, ignore_skip_ci: true, save_on_errors: true, schedule: schedule) - rescue => e - error(schedule, e) + Ci::PipelineSchedule.runnable_schedules.preloaded.find_in_batches do |schedules| + schedules.each do |schedule| + Ci::PipelineScheduleService.new(schedule.project, schedule.owner).execute(schedule) end end end - # rubocop: enable CodeReuse/ActiveRecord - - private - - def error(schedule, error) - failed_creation_counter.increment - - Rails.logger.error "Failed to create a scheduled pipeline. " \ - "schedule_id: #{schedule.id} message: #{error.message}" - - Gitlab::Sentry - .track_exception(error, - issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/41231', - extra: { schedule_id: schedule.id }) - end - - def failed_creation_counter - @failed_creation_counter ||= - Gitlab::Metrics.counter(:pipeline_schedule_creation_failed_total, - "Counter of failed attempts of pipeline schedule creation") - end end diff --git a/app/workers/run_pipeline_schedule_worker.rb b/app/workers/run_pipeline_schedule_worker.rb index f72331c003a..43e0b9db22f 100644 --- a/app/workers/run_pipeline_schedule_worker.rb +++ b/app/workers/run_pipeline_schedule_worker.rb @@ -21,6 +21,30 @@ class RunPipelineScheduleWorker Ci::CreatePipelineService.new(schedule.project, user, ref: schedule.ref) - .execute(:schedule, ignore_skip_ci: true, save_on_errors: false, schedule: schedule) + .execute!(:schedule, ignore_skip_ci: true, save_on_errors: false, schedule: schedule) + rescue Ci::CreatePipelineService::CreateError + # no-op. This is a user operation error such as corrupted .gitlab-ci.yml. + rescue => e + error(schedule, e) + end + + private + + def error(schedule, error) + failed_creation_counter.increment + + Rails.logger.error "Failed to create a scheduled pipeline. " \ + "schedule_id: #{schedule.id} message: #{error.message}" + + Gitlab::Sentry + .track_exception(error, + issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/41231', + extra: { schedule_id: schedule.id }) + end + + def failed_creation_counter + @failed_creation_counter ||= + Gitlab::Metrics.counter(:pipeline_schedule_creation_failed_total, + "Counter of failed attempts of pipeline schedule creation") end end -- cgit v1.2.3