Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorNick Thomas <nick@gitlab.com>2019-02-28 17:45:20 +0300
committerNick Thomas <nick@gitlab.com>2019-02-28 17:45:20 +0300
commitff431fe0762b4d590a1d307b1a6b3ab7268ff7df (patch)
tree64ba9c952f7f299b5251a553592c4f1df3d3e82c /lib
parent7d1216016cfc64e35955249e39c46615e2dbdf93 (diff)
Revert "Restart Unicorn and Sidekiq when GRPC throws 14:Endpoint read failed"
This reverts commit 006753110a462e62f549cdf3c410e73eed068dbf.
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/gitaly_client.rb23
-rw-r--r--lib/gitlab/sidekiq_middleware/memory_killer.rb69
-rw-r--r--lib/gitlab/sidekiq_middleware/shutdown.rb135
3 files changed, 69 insertions, 158 deletions
diff --git a/lib/gitlab/gitaly_client.rb b/lib/gitlab/gitaly_client.rb
index 5aeedb0f50d..869b835b61e 100644
--- a/lib/gitlab/gitaly_client.rb
+++ b/lib/gitlab/gitaly_client.rb
@@ -164,8 +164,6 @@ module Gitlab
kwargs = yield(kwargs) if block_given?
stub(service, storage).__send__(rpc, request, kwargs) # rubocop:disable GitlabSecurity/PublicSend
- rescue GRPC::Unavailable => ex
- handle_grpc_unavailable!(ex)
ensure
duration = Gitlab::Metrics::System.monotonic_time - start
@@ -178,27 +176,6 @@ module Gitlab
add_call_details(feature: "#{service}##{rpc}", duration: duration, request: request_hash, rpc: rpc)
end
- def self.handle_grpc_unavailable!(ex)
- status = ex.to_status
- raise ex unless status.details == 'Endpoint read failed'
-
- # There is a bug in grpc 1.8.x that causes a client process to get stuck
- # always raising '14:Endpoint read failed'. The only thing that we can
- # do to recover is to restart the process.
- #
- # See https://gitlab.com/gitlab-org/gitaly/issues/1029
-
- if Sidekiq.server?
- raise Gitlab::SidekiqMiddleware::Shutdown::WantShutdown.new(ex.to_s)
- else
- # SIGQUIT requests a Unicorn worker to shut down gracefully after the current request.
- Process.kill('QUIT', Process.pid)
- end
-
- raise ex
- end
- private_class_method :handle_grpc_unavailable!
-
def self.current_transaction_labels
Gitlab::Metrics::Transaction.current&.labels || {}
end
diff --git a/lib/gitlab/sidekiq_middleware/memory_killer.rb b/lib/gitlab/sidekiq_middleware/memory_killer.rb
new file mode 100644
index 00000000000..ad4112ba555
--- /dev/null
+++ b/lib/gitlab/sidekiq_middleware/memory_killer.rb
@@ -0,0 +1,69 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module SidekiqMiddleware
+ class MemoryKiller
+ # Default the RSS limit to 0, meaning the MemoryKiller is disabled
+ MAX_RSS = (ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS'] || 0).to_s.to_i
+ # Give Sidekiq 15 minutes of grace time after exceeding the RSS limit
+ GRACE_TIME = (ENV['SIDEKIQ_MEMORY_KILLER_GRACE_TIME'] || 15 * 60).to_s.to_i
+ # Wait 30 seconds for running jobs to finish during graceful shutdown
+ SHUTDOWN_WAIT = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT'] || 30).to_s.to_i
+
+ # Create a mutex used to ensure there will be only one thread waiting to
+ # shut Sidekiq down
+ MUTEX = Mutex.new
+
+ def call(worker, job, queue)
+ yield
+
+ current_rss = get_rss
+
+ return unless MAX_RSS > 0 && current_rss > MAX_RSS
+
+ Thread.new do
+ # Return if another thread is already waiting to shut Sidekiq down
+ next unless MUTEX.try_lock
+
+ Sidekiq.logger.warn "Sidekiq worker PID-#{pid} current RSS #{current_rss}"\
+ " exceeds maximum RSS #{MAX_RSS} after finishing job #{worker.class} JID-#{job['jid']}"
+ Sidekiq.logger.warn "Sidekiq worker PID-#{pid} will stop fetching new jobs in #{GRACE_TIME} seconds, and will be shut down #{SHUTDOWN_WAIT} seconds later"
+
+ # Wait `GRACE_TIME` to give the memory intensive job time to finish.
+ # Then, tell Sidekiq to stop fetching new jobs.
+ wait_and_signal(GRACE_TIME, 'SIGSTP', 'stop fetching new jobs')
+
+ # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
+ # Then, tell Sidekiq to gracefully shut down by giving jobs a few more
+ # moments to finish, killing and requeuing them if they didn't, and
+ # then terminating itself.
+ wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')
+
+ # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
+ wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
+ end
+ end
+
+ private
+
+ def get_rss
+ output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{pid}), Rails.root.to_s)
+ return 0 unless status.zero?
+
+ output.to_i
+ end
+
+ def wait_and_signal(time, signal, explanation)
+ Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
+ sleep(time)
+
+ Sidekiq.logger.warn "sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
+ Process.kill(signal, pid)
+ end
+
+ def pid
+ Process.pid
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/sidekiq_middleware/shutdown.rb b/lib/gitlab/sidekiq_middleware/shutdown.rb
deleted file mode 100644
index 19f3be83bce..00000000000
--- a/lib/gitlab/sidekiq_middleware/shutdown.rb
+++ /dev/null
@@ -1,135 +0,0 @@
-# frozen_string_literal: true
-
-require 'mutex_m'
-
-module Gitlab
- module SidekiqMiddleware
- class Shutdown
- extend Mutex_m
-
- # Default the RSS limit to 0, meaning the MemoryKiller is disabled
- MAX_RSS = (ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS'] || 0).to_s.to_i
- # Give Sidekiq 15 minutes of grace time after exceeding the RSS limit
- GRACE_TIME = (ENV['SIDEKIQ_MEMORY_KILLER_GRACE_TIME'] || 15 * 60).to_s.to_i
- # Wait 30 seconds for running jobs to finish during graceful shutdown
- SHUTDOWN_WAIT = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT'] || 30).to_s.to_i
-
- # This exception can be used to request that the middleware start shutting down Sidekiq
- WantShutdown = Class.new(StandardError)
-
- ShutdownWithoutRaise = Class.new(WantShutdown)
- private_constant :ShutdownWithoutRaise
-
- # For testing only, to avoid race conditions (?) in Rspec mocks.
- attr_reader :trace
-
- # We store the shutdown thread in a class variable to ensure that there
- # can be only one shutdown thread in the process.
- def self.create_shutdown_thread
- mu_synchronize do
- break unless @shutdown_thread.nil?
-
- @shutdown_thread = Thread.new { yield }
- end
- end
-
- # For testing only: so we can wait for the shutdown thread to finish.
- def self.shutdown_thread
- mu_synchronize { @shutdown_thread }
- end
-
- # For testing only: so that we can reset the global state before each test.
- def self.clear_shutdown_thread
- mu_synchronize { @shutdown_thread = nil }
- end
-
- def initialize
- @trace = Queue.new if Rails.env.test?
- end
-
- def call(worker, job, queue)
- shutdown_exception = nil
-
- begin
- yield
- check_rss!
- rescue WantShutdown => ex
- shutdown_exception = ex
- end
-
- return unless shutdown_exception
-
- self.class.create_shutdown_thread do
- do_shutdown(worker, job, shutdown_exception)
- end
-
- raise shutdown_exception unless shutdown_exception.is_a?(ShutdownWithoutRaise)
- end
-
- private
-
- def do_shutdown(worker, job, shutdown_exception)
- Sidekiq.logger.warn "Sidekiq worker PID-#{pid} shutting down because of #{shutdown_exception} after job "\
- "#{worker.class} JID-#{job['jid']}"
- Sidekiq.logger.warn "Sidekiq worker PID-#{pid} will stop fetching new jobs in #{GRACE_TIME} seconds, and will be shut down #{SHUTDOWN_WAIT} seconds later"
-
- # Wait `GRACE_TIME` to give the memory intensive job time to finish.
- # Then, tell Sidekiq to stop fetching new jobs.
- wait_and_signal(GRACE_TIME, 'SIGTSTP', 'stop fetching new jobs')
-
- # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
- # Then, tell Sidekiq to gracefully shut down by giving jobs a few more
- # moments to finish, killing and requeuing them if they didn't, and
- # then terminating itself.
- wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')
-
- # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
- wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
- end
-
- def check_rss!
- return unless MAX_RSS > 0
-
- current_rss = get_rss
- return unless current_rss > MAX_RSS
-
- raise ShutdownWithoutRaise.new("current RSS #{current_rss} exceeds maximum RSS #{MAX_RSS}")
- end
-
- def get_rss
- output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{pid}), Rails.root.to_s)
- return 0 unless status.zero?
-
- output.to_i
- end
-
- def wait_and_signal(time, signal, explanation)
- Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
- sleep(time)
-
- Sidekiq.logger.warn "sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
- kill(signal, pid)
- end
-
- def pid
- Process.pid
- end
-
- def sleep(time)
- if Rails.env.test?
- @trace << [:sleep, time]
- else
- Kernel.sleep(time)
- end
- end
-
- def kill(signal, pid)
- if Rails.env.test?
- @trace << [:kill, signal, pid]
- else
- Process.kill(signal, pid)
- end
- end
- end
- end
-end