diff options
Diffstat (limited to 'sidekiq_cluster/cli.rb')
-rw-r--r-- | sidekiq_cluster/cli.rb | 92 |
1 files changed, 83 insertions, 9 deletions
diff --git a/sidekiq_cluster/cli.rb b/sidekiq_cluster/cli.rb index 55b4521d37d..57649ec74c8 100644 --- a/sidekiq_cluster/cli.rb +++ b/sidekiq_cluster/cli.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative '../config/bundler_setup' + require 'optparse' require 'logger' require 'time' @@ -11,11 +13,21 @@ require_relative '../lib/gitlab/utils' require_relative '../lib/gitlab/sidekiq_config/cli_methods' require_relative '../lib/gitlab/sidekiq_config/worker_matcher' require_relative '../lib/gitlab/sidekiq_logging/json_formatter' +require_relative '../lib/gitlab/process_management' +require_relative '../metrics_server/metrics_server' require_relative 'sidekiq_cluster' module Gitlab module SidekiqCluster class CLI + THREAD_NAME = 'supervisor' + + # The signals that should terminate both the master and workers. + TERMINATE_SIGNALS = %i(INT TERM).freeze + + # The signals that should simply be forwarded to the workers. + FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze + CommandError = Class.new(StandardError) def initialize(log_output = $stderr) @@ -23,6 +35,7 @@ module Gitlab @max_concurrency = 50 @min_concurrency = 0 @environment = ENV['RAILS_ENV'] || 'development' + @metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/sidekiq") @pid = nil @interval = 5 @alive = true @@ -35,6 +48,8 @@ module Gitlab end def run(argv = ARGV) + Thread.current.name = THREAD_NAME + if argv.empty? raise CommandError, 'You must specify at least one queue to start a worker for' @@ -88,6 +103,8 @@ module Gitlab @logger.info("Starting cluster with #{queue_groups.length} processes") end + start_metrics_server(wipe_metrics_dir: true) + @processes = SidekiqCluster.start( queue_groups, env: @environment, @@ -106,7 +123,7 @@ module Gitlab end def write_pid - SidekiqCluster.write_pid(@pid) if @pid + ProcessManagement.write_pid(@pid) if @pid end def soft_timeout_seconds @@ -123,11 +140,11 @@ module Gitlab end def continue_waiting?(deadline) - SidekiqCluster.any_alive?(@processes) && monotonic_time < deadline + ProcessManagement.any_alive?(@processes) && monotonic_time < deadline end def hard_stop_stuck_pids - SidekiqCluster.signal_processes(SidekiqCluster.pids_alive(@processes), "-KILL") + ProcessManagement.signal_processes(ProcessManagement.pids_alive(@processes), "-KILL") end def wait_for_termination @@ -138,14 +155,14 @@ module Gitlab end def trap_signals - SidekiqCluster.trap_terminate do |signal| + ProcessManagement.trap_signals(TERMINATE_SIGNALS) do |signal| @alive = false - SidekiqCluster.signal_processes(@processes, signal) + ProcessManagement.signal_processes(@processes, signal) wait_for_termination end - SidekiqCluster.trap_forward do |signal| - SidekiqCluster.signal_processes(@processes, signal) + ProcessManagement.trap_signals(FORWARD_SIGNALS) do |signal| + ProcessManagement.signal_processes(@processes, signal) end end @@ -153,17 +170,74 @@ module Gitlab while @alive sleep(@interval) - unless SidekiqCluster.all_alive?(@processes) + if metrics_server_enabled? && ProcessManagement.process_died?(@metrics_server_pid) + @logger.warn('Metrics server went away') + start_metrics_server(wipe_metrics_dir: false) + end + + unless ProcessManagement.all_alive?(@processes) # If a child process died we'll just terminate the whole cluster. It's up to # runit and such to then restart the cluster. @logger.info('A worker terminated, shutting down the cluster') - SidekiqCluster.signal_processes(@processes, :TERM) + stop_metrics_server + ProcessManagement.signal_processes(@processes, :TERM) break end end end + def start_metrics_server(wipe_metrics_dir: false) + return unless metrics_server_enabled? + + @logger.info("Starting metrics server on port #{sidekiq_exporter_port}") + @metrics_server_pid = MetricsServer.spawn( + 'sidekiq', + metrics_dir: @metrics_dir, + wipe_metrics_dir: wipe_metrics_dir, + trapped_signals: TERMINATE_SIGNALS + FORWARD_SIGNALS + ) + end + + def sidekiq_exporter_enabled? + ::Settings.monitoring.sidekiq_exporter.enabled + rescue Settingslogic::MissingSetting + nil + end + + def exporter_has_a_unique_port? + # In https://gitlab.com/gitlab-org/gitlab/-/issues/345802 we added settings for sidekiq_health_checks. + # These settings default to the same values as sidekiq_exporter for backwards compatibility. + # If a different port for sidekiq_health_checks has been set up, we know that the + # user wants to serve health checks and metrics from different servers. + return false if sidekiq_health_check_port.nil? || sidekiq_exporter_port.nil? + + sidekiq_exporter_port != sidekiq_health_check_port + end + + def sidekiq_exporter_port + ::Settings.monitoring.sidekiq_exporter.port + rescue Settingslogic::MissingSetting + nil + end + + def sidekiq_health_check_port + ::Settings.monitoring.sidekiq_health_checks.port + rescue Settingslogic::MissingSetting + nil + end + + def metrics_server_enabled? + !@dryrun && sidekiq_exporter_enabled? && exporter_has_a_unique_port? + end + + def stop_metrics_server + return unless @metrics_server_pid + + @logger.info("Stopping metrics server (PID #{@metrics_server_pid})") + ProcessManagement.signal(@metrics_server_pid, :TERM) + end + def option_parser OptionParser.new do |opt| opt.banner = "#{File.basename(__FILE__)} [QUEUE,QUEUE] [QUEUE] ... [OPTIONS]" |