From cb193cd6b54ee9ac0cf87650100585293540abfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Wed, 21 Aug 2019 11:32:45 +0200 Subject: Improve resillency of monitor - Retry connection when it fails - Properly shutdown daemon - Stop monitor if the Exception is raised - Properly guard exception handling --- lib/gitlab/daemon.rb | 5 ++- lib/gitlab/sidekiq_monitor.rb | 72 ++++++++++++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/daemon.rb b/lib/gitlab/daemon.rb index 6d5fc4219fb..2f4ae010e74 100644 --- a/lib/gitlab/daemon.rb +++ b/lib/gitlab/daemon.rb @@ -46,7 +46,10 @@ module Gitlab if thread thread.wakeup if thread.alive? - thread.join unless Thread.current == thread + begin + thread.join unless Thread.current == thread + rescue Exception # rubocop:disable Lint/RescueException + end @thread = nil end end diff --git a/lib/gitlab/sidekiq_monitor.rb b/lib/gitlab/sidekiq_monitor.rb index bfbf998b3a0..9ec3be48adb 100644 --- a/lib/gitlab/sidekiq_monitor.rb +++ b/lib/gitlab/sidekiq_monitor.rb @@ -6,6 +6,7 @@ module Gitlab NOTIFICATION_CHANNEL = 'sidekiq:cancel:notifications'.freeze CANCEL_DEADLINE = 24.hours.seconds + RECONNECT_TIME = 3.seconds # We use exception derived from `Exception` # to consider this as an very low-level exception @@ -33,7 +34,8 @@ module Gitlab action: 'run', queue: queue, jid: jid, - canceled: true) + canceled: true + ) raise CancelledError end @@ -44,12 +46,45 @@ module Gitlab end end + def self.cancel_job(jid) + payload = { + action: 'cancel', + jid: jid + }.to_json + + ::Gitlab::Redis::SharedState.with do |redis| + redis.setex(cancel_job_key(jid), CANCEL_DEADLINE, 1) + redis.publish(NOTIFICATION_CHANNEL, payload) + end + end + + private + def start_working Sidekiq.logger.info( class: self.class, action: 'start', - message: 'Starting Monitor Daemon') + message: 'Starting Monitor Daemon' + ) + + while enabled? + process_messages + sleep(RECONNECT_TIME) + end + ensure + Sidekiq.logger.warn( + class: self.class, + action: 'stop', + message: 'Stopping Monitor Daemon' + ) + end + + def stop_working + thread.raise(Interrupt) if thread.alive? + end + + def process_messages ::Gitlab::Redis::SharedState.with do |redis| redis.subscribe(NOTIFICATION_CHANNEL) do |on| on.message do |channel, message| @@ -57,39 +92,24 @@ module Gitlab end end end - - Sidekiq.logger.warn( - class: self.class, - action: 'stop', - message: 'Stopping Monitor Daemon') rescue Exception => e # rubocop:disable Lint/RescueException Sidekiq.logger.warn( class: self.class, action: 'exception', - message: e.message) - raise e - end - - def self.cancel_job(jid) - payload = { - action: 'cancel', - jid: jid - }.to_json + message: e.message + ) - ::Gitlab::Redis::SharedState.with do |redis| - redis.setex(cancel_job_key(jid), CANCEL_DEADLINE, 1) - redis.publish(NOTIFICATION_CHANNEL, payload) - end + # we re-raise system exceptions + raise e unless e.is_a?(StandardError) end - private - def process_message(message) Sidekiq.logger.info( class: self.class, channel: NOTIFICATION_CHANNEL, message: 'Received payload on channel', - payload: message) + payload: message + ) message = safe_parse(message) return unless message @@ -115,14 +135,16 @@ module Gitlab Thread.new do # try to find a thread, but with guaranteed - # handle that this thread corresponds to actually running job + # that handle for thread corresponds to actually + # running job find_thread_with_lock(jid) do |thread| Sidekiq.logger.warn( class: self.class, action: 'cancel', message: 'Canceling thread with CancelledError', jid: jid, - thread_id: thread.object_id) + thread_id: thread.object_id + ) thread&.raise(CancelledError) end -- cgit v1.2.3