Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Thomas <nick@gitlab.com>2019-03-01 02:25:37 +0300
committerStan Hu <stanhu@gmail.com>2019-03-04 20:06:41 +0300
commitf0c52df5e540e825be0babd04cc557f3f40cf1c6 (patch)
tree0d39c1df112c4ac71938c6051d65dc4751c9d526
parent6b507626ed3499c1796706b507c30b8f46c706b7 (diff)
sidekiq: terminate child processes at shutdown
Sidekiq jobs frequently spawn long-lived child processes to do work. In some circumstances, these can be reparented to init when sidekiq is terminated, leading to duplication of work and strange concurrency problems. This commit changes sidekiq so that, if run as a process group leader, it will forward `INT` and `TERM` signals to the whole process group. If the memory killer is active, it will also use the process group when resorting to `kill -9` to shut down. These changes mean that a naive `kill <pid-of-sidekiq>` will now do the right thing, killing any child processes spawned by sidekiq, as long as the process supervisor placed it in its own process group. If sidekiq isn't a process group leader, this new code is skipped.
-rw-r--r--changelogs/unreleased/40396-sidekiq-in-process-group.yml5
-rw-r--r--config/initializers/sidekiq.rb3
-rw-r--r--doc/administration/operations/sidekiq_memory_killer.md5
-rw-r--r--lib/gitlab/sidekiq_middleware/memory_killer.rb17
-rw-r--r--lib/gitlab/sidekiq_signals.rb42
-rw-r--r--spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb13
-rw-r--r--spec/lib/gitlab/sidekiq_signals_spec.rb69
7 files changed, 151 insertions, 3 deletions
diff --git a/changelogs/unreleased/40396-sidekiq-in-process-group.yml b/changelogs/unreleased/40396-sidekiq-in-process-group.yml
new file mode 100644
index 00000000000..e41557e20d0
--- /dev/null
+++ b/changelogs/unreleased/40396-sidekiq-in-process-group.yml
@@ -0,0 +1,5 @@
+---
+title: 'sidekiq: terminate child processes at shutdown'
+merge_request: 25669
+author:
+type: added
diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb
index 4a8d72e37a5..2e4aa9c1053 100644
--- a/config/initializers/sidekiq.rb
+++ b/config/initializers/sidekiq.rb
@@ -77,6 +77,9 @@ Sidekiq.configure_server do |config|
# Avoid autoload issue such as 'Mail::Parsers::AddressStruct'
# https://github.com/mikel/mail/issues/912#issuecomment-214850355
Mail.eager_autoload!
+
+ # Ensure the whole process group is terminated if possible
+ Gitlab::SidekiqSignals.install!(Sidekiq::CLI::SIGNAL_HANDLERS)
end
Sidekiq.configure_client do |config|
diff --git a/doc/administration/operations/sidekiq_memory_killer.md b/doc/administration/operations/sidekiq_memory_killer.md
index cbffd883774..8eac42f2fe2 100644
--- a/doc/administration/operations/sidekiq_memory_killer.md
+++ b/doc/administration/operations/sidekiq_memory_killer.md
@@ -17,6 +17,11 @@ With the default settings, the MemoryKiller will cause a Sidekiq restart no
more often than once every 15 minutes, with the restart causing about one
minute of delay for incoming background jobs.
+Some background jobs rely on long-running external processes. To ensure these
+are cleanly terminated when Sidekiq is restarted, each Sidekiq process should be
+run as a process group leader (e.g., using `chpst -P`). If using Omnibus or the
+`bin/background_jobs` script with `runit` installed, this is handled for you.
+
## Configuring the MemoryKiller
The MemoryKiller is controlled using environment variables.
diff --git a/lib/gitlab/sidekiq_middleware/memory_killer.rb b/lib/gitlab/sidekiq_middleware/memory_killer.rb
index 47333d257eb..ed2c7ee9a2d 100644
--- a/lib/gitlab/sidekiq_middleware/memory_killer.rb
+++ b/lib/gitlab/sidekiq_middleware/memory_killer.rb
@@ -36,11 +36,13 @@ module Gitlab
# Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
# Then, tell Sidekiq to gracefully shut down by giving jobs a few more
# moments to finish, killing and requeuing them if they didn't, and
- # then terminating itself.
+ # then terminating itself. Sidekiq will replicate the TERM to all its
+ # children if it can.
wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')
# Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
- wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
+ # Kill the whole pgroup, so we can be sure no children are left behind
+ wait_and_signal_pgroup(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
end
end
@@ -53,6 +55,17 @@ module Gitlab
output.to_i
end
+ # If this sidekiq process is pgroup leader, signal to the whole pgroup
+ def wait_and_signal_pgroup(time, signal, explanation)
+ return wait_and_signal(time, signal, explanation) unless Process.getpgrp == pid
+
+ Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})"
+ sleep(time)
+
+ Sidekiq.logger.warn "sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})"
+ Process.kill(signal, "-#{pid}")
+ end
+
def wait_and_signal(time, signal, explanation)
Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
sleep(time)
diff --git a/lib/gitlab/sidekiq_signals.rb b/lib/gitlab/sidekiq_signals.rb
new file mode 100644
index 00000000000..b704ee9a0a9
--- /dev/null
+++ b/lib/gitlab/sidekiq_signals.rb
@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+
+module Gitlab
+ # As a process group leader, we can ensure that children of sidekiq are killed
+ # at the same time as sidekiq itself, to stop long-lived children from being
+ # reparented to init and "escaping". To do this, we override the default
+ # handlers used by sidekiq for INT and TERM signals
+ module SidekiqSignals
+ REPLACE_SIGNALS = %w[INT TERM].freeze
+
+ SIDEKIQ_CHANGED_MESSAGE =
+ "Intercepting signal handlers: #{REPLACE_SIGNALS.join(", ")} failed. " \
+ "Sidekiq should have registered them, but appears not to have done so."
+
+ def self.install!(sidekiq_handlers)
+ # This only works if we're process group leader
+ return unless Process.getpgrp == Process.pid
+
+ raise SIDEKIQ_CHANGED_MESSAGE unless
+ REPLACE_SIGNALS == sidekiq_handlers.keys & REPLACE_SIGNALS
+
+ REPLACE_SIGNALS.each do |signal|
+ old_handler = sidekiq_handlers[signal]
+ sidekiq_handlers[signal] = ->(cli) do
+ blindly_signal_pgroup!(signal)
+ old_handler.call(cli)
+ end
+ end
+ end
+
+ # The process group leader can forward INT and TERM signals to the whole
+ # group. However, the forwarded signal is *also* received by the leader,
+ # which could lead to an infinite loop. We can avoid this by temporarily
+ # ignoring the forwarded signal. This may cause us to miss some repeated
+ # signals from outside the process group, but that isn't fatal.
+ def self.blindly_signal_pgroup!(signal)
+ old_trap = trap(signal, 'IGNORE')
+ Process.kill(signal, "-#{Process.getpgrp}")
+ trap(signal, old_trap)
+ end
+ end
+end
diff --git a/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
index 5df56178df2..ff8c0825ee4 100644
--- a/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
+++ b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
@@ -35,7 +35,7 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do
stub_const("#{described_class}::MAX_RSS", 5.kilobytes)
end
- it 'sends the STP, TERM and KILL signals at expected times' do
+ it 'sends the TSTP, TERM and KILL signals at expected times' do
expect(subject).to receive(:sleep).with(15 * 60).ordered
expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered
@@ -47,6 +47,17 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do
run
end
+
+ it 'sends TSTP and TERM to the pid, but KILL to the pgroup, when running as process leader' do
+ allow(Process).to receive(:getpgrp) { pid }
+ allow(subject).to receive(:sleep)
+
+ expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered
+ expect(Process).to receive(:kill).with('SIGTERM', pid).ordered
+ expect(Process).to receive(:kill).with('SIGKILL', "-#{pid}").ordered
+
+ run
+ end
end
context 'when MAX_RSS is not exceeded' do
diff --git a/spec/lib/gitlab/sidekiq_signals_spec.rb b/spec/lib/gitlab/sidekiq_signals_spec.rb
new file mode 100644
index 00000000000..4483224f49d
--- /dev/null
+++ b/spec/lib/gitlab/sidekiq_signals_spec.rb
@@ -0,0 +1,69 @@
+require 'spec_helper'
+
+describe Gitlab::SidekiqSignals do
+ describe '.install' do
+ let(:result) { Hash.new { |h, k| h[k] = 0 } }
+ let(:int_handler) { -> (_) { result['INT'] += 1 } }
+ let(:term_handler) { -> (_) { result['TERM'] += 1 } }
+ let(:other_handler) { -> (_) { result['OTHER'] += 1 } }
+ let(:signals) { { 'INT' => int_handler, 'TERM' => term_handler, 'OTHER' => other_handler } }
+
+ context 'not a process group leader' do
+ before do
+ allow(Process).to receive(:getpgrp) { Process.pid * 2 }
+ end
+
+ it 'does nothing' do
+ expect { described_class.install!(signals) }
+ .not_to change { signals }
+ end
+ end
+
+ context 'as a process group leader' do
+ before do
+ allow(Process).to receive(:getpgrp) { Process.pid }
+ end
+
+ it 'installs its own signal handlers for TERM and INT only' do
+ described_class.install!(signals)
+
+ expect(signals['INT']).not_to eq(int_handler)
+ expect(signals['TERM']).not_to eq(term_handler)
+ expect(signals['OTHER']).to eq(other_handler)
+ end
+
+ %w[INT TERM].each do |signal|
+ it "installs a forwarding signal handler for #{signal}" do
+ described_class.install!(signals)
+
+ expect(described_class)
+ .to receive(:trap)
+ .with(signal, 'IGNORE')
+ .and_return(:original_trap)
+ .ordered
+
+ expect(Process)
+ .to receive(:kill)
+ .with(signal, "-#{Process.pid}")
+ .ordered
+
+ expect(described_class)
+ .to receive(:trap)
+ .with(signal, :original_trap)
+ .ordered
+
+ signals[signal].call(:cli)
+
+ expect(result[signal]).to eq(1)
+ end
+
+ it "raises if sidekiq no longer traps SIG#{signal}" do
+ signals.delete(signal)
+
+ expect { described_class.install!(signals) }
+ .to raise_error(/Sidekiq should have registered/)
+ end
+ end
+ end
+ end
+end