diff options
author | Stan Hu <stanhu@gmail.com> | 2019-03-04 21:05:47 +0300 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2019-03-04 21:05:47 +0300 |
commit | 5c80bbb33c12490bc5fa711642a40fc16bdb79a4 (patch) | |
tree | 37469f16101018bb3fe1eb3728257fac6425da03 /lib | |
parent | 088128e621d0a7d48e89e738348a4ec57d8a0e36 (diff) | |
parent | f0c52df5e540e825be0babd04cc557f3f40cf1c6 (diff) |
Merge branch '40396-sidekiq-in-process-group' into 'master'
sidekiq: terminate child processes at shutdown
See merge request gitlab-org/gitlab-ce!25669
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/sidekiq_middleware/memory_killer.rb | 17 | ||||
-rw-r--r-- | lib/gitlab/sidekiq_signals.rb | 42 |
2 files changed, 57 insertions, 2 deletions
diff --git a/lib/gitlab/sidekiq_middleware/memory_killer.rb b/lib/gitlab/sidekiq_middleware/memory_killer.rb index 47333d257eb..ed2c7ee9a2d 100644 --- a/lib/gitlab/sidekiq_middleware/memory_killer.rb +++ b/lib/gitlab/sidekiq_middleware/memory_killer.rb @@ -36,11 +36,13 @@ module Gitlab # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish. # Then, tell Sidekiq to gracefully shut down by giving jobs a few more # moments to finish, killing and requeuing them if they didn't, and - # then terminating itself. + # then terminating itself. Sidekiq will replicate the TERM to all its + # children if it can. wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down') # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't. - wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die') + # Kill the whole pgroup, so we can be sure no children are left behind + wait_and_signal_pgroup(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die') end end @@ -53,6 +55,17 @@ module Gitlab output.to_i end + # If this sidekiq process is pgroup leader, signal to the whole pgroup + def wait_and_signal_pgroup(time, signal, explanation) + return wait_and_signal(time, signal, explanation) unless Process.getpgrp == pid + + Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})" + sleep(time) + + Sidekiq.logger.warn "sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})" + Process.kill(signal, "-#{pid}") + end + def wait_and_signal(time, signal, explanation) Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})" sleep(time) diff --git a/lib/gitlab/sidekiq_signals.rb b/lib/gitlab/sidekiq_signals.rb new file mode 100644 index 00000000000..b704ee9a0a9 --- /dev/null +++ b/lib/gitlab/sidekiq_signals.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module Gitlab + # As a process group leader, we can ensure that children of sidekiq are killed + # at the same time as sidekiq itself, to stop long-lived children from being + # reparented to init and "escaping". To do this, we override the default + # handlers used by sidekiq for INT and TERM signals + module SidekiqSignals + REPLACE_SIGNALS = %w[INT TERM].freeze + + SIDEKIQ_CHANGED_MESSAGE = + "Intercepting signal handlers: #{REPLACE_SIGNALS.join(", ")} failed. " \ + "Sidekiq should have registered them, but appears not to have done so." + + def self.install!(sidekiq_handlers) + # This only works if we're process group leader + return unless Process.getpgrp == Process.pid + + raise SIDEKIQ_CHANGED_MESSAGE unless + REPLACE_SIGNALS == sidekiq_handlers.keys & REPLACE_SIGNALS + + REPLACE_SIGNALS.each do |signal| + old_handler = sidekiq_handlers[signal] + sidekiq_handlers[signal] = ->(cli) do + blindly_signal_pgroup!(signal) + old_handler.call(cli) + end + end + end + + # The process group leader can forward INT and TERM signals to the whole + # group. However, the forwarded signal is *also* received by the leader, + # which could lead to an infinite loop. We can avoid this by temporarily + # ignoring the forwarded signal. This may cause us to miss some repeated + # signals from outside the process group, but that isn't fatal. + def self.blindly_signal_pgroup!(signal) + old_trap = trap(signal, 'IGNORE') + Process.kill(signal, "-#{Process.getpgrp}") + trap(signal, old_trap) + end + end +end |