diff options
Diffstat (limited to 'lib/gitlab/database/load_balancing')
8 files changed, 199 insertions, 38 deletions
diff --git a/lib/gitlab/database/load_balancing/configuration.rb b/lib/gitlab/database/load_balancing/configuration.rb index 6156515bd73..da313361073 100644 --- a/lib/gitlab/database/load_balancing/configuration.rb +++ b/lib/gitlab/database/load_balancing/configuration.rb @@ -7,7 +7,7 @@ module Gitlab class Configuration attr_accessor :hosts, :max_replication_difference, :max_replication_lag_time, :replica_check_interval, - :service_discovery, :model + :service_discovery # Creates a configuration object for the given ActiveRecord model. def self.for_model(model) @@ -41,6 +41,8 @@ module Gitlab end end + config.reuse_primary_connection! + config end @@ -59,6 +61,28 @@ module Gitlab disconnect_timeout: 120, use_tcp: false } + + # Temporary model for GITLAB_LOAD_BALANCING_REUSE_PRIMARY_ + # To be removed with FF + @primary_model = nil + end + + def db_config_name + @model.connection_db_config.name.to_sym + end + + # With connection re-use the primary connection can be overwritten + # to be used from different model + def primary_connection_specification_name + (@primary_model || @model).connection_specification_name + end + + def primary_db_config + (@primary_model || @model).connection_db_config + end + + def replica_db_config + @model.connection_db_config end def pool_size @@ -86,6 +110,30 @@ module Gitlab def service_discovery_enabled? service_discovery[:record].present? end + + # TODO: This is temporary code to allow re-use of primary connection + # if the two connections are pointing to the same host. This is needed + # to properly support transaction visibility. + # + # This behavior is required to support [Phase 3](https://gitlab.com/groups/gitlab-org/-/epics/6160#progress). + # This method is meant to be removed as soon as it is finished. + # + # The remapping is done as-is: + # export GITLAB_LOAD_BALANCING_REUSE_PRIMARY_<name-of-connection>=<new-name-of-connection> + # + # Ex.: + # export GITLAB_LOAD_BALANCING_REUSE_PRIMARY_ci=main + # + def reuse_primary_connection! + new_connection = ENV["GITLAB_LOAD_BALANCING_REUSE_PRIMARY_#{db_config_name}"] + return unless new_connection.present? + + @primary_model = Gitlab::Database.database_base_models[new_connection.to_sym] + + unless @primary_model + raise "Invalid value for 'GITLAB_LOAD_BALANCING_REUSE_PRIMARY_#{db_config_name}=#{new_connection}'" + end + end end end end diff --git a/lib/gitlab/database/load_balancing/connection_proxy.rb b/lib/gitlab/database/load_balancing/connection_proxy.rb index 1be63da8896..a91df2eccdd 100644 --- a/lib/gitlab/database/load_balancing/connection_proxy.rb +++ b/lib/gitlab/database/load_balancing/connection_proxy.rb @@ -13,6 +13,13 @@ module Gitlab WriteInsideReadOnlyTransactionError = Class.new(StandardError) READ_ONLY_TRANSACTION_KEY = :load_balacing_read_only_transaction + # The load balancer returned by connection might be different + # between `model.connection.load_balancer` vs `model.load_balancer` + # + # The used `model.connection` is dependent on `use_model_load_balancing`. + # See more in: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73949. + # + # Always use `model.load_balancer` or `model.sticking`. attr_reader :load_balancer # These methods perform writes after which we need to stick to the diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb index 2be7f0baa60..1e27bcfc55d 100644 --- a/lib/gitlab/database/load_balancing/load_balancer.rb +++ b/lib/gitlab/database/load_balancing/load_balancer.rb @@ -12,7 +12,7 @@ module Gitlab REPLICA_SUFFIX = '_replica' - attr_reader :name, :host_list, :configuration + attr_reader :host_list, :configuration # configuration - An instance of `LoadBalancing::Configuration` that # contains the configuration details (such as the hosts) @@ -26,8 +26,10 @@ module Gitlab else HostList.new(configuration.hosts.map { |addr| Host.new(addr, self) }) end + end - @name = @configuration.model.connection_db_config.name.to_sym + def name + @configuration.db_config_name end def primary_only? @@ -64,7 +66,7 @@ module Gitlab # times before using the primary instead. will_retry = conflict_retried < @host_list.length * 3 - LoadBalancing::Logger.warn( + ::Gitlab::Database::LoadBalancing::Logger.warn( event: :host_query_conflict, message: 'Query conflict on host', conflict_retried: conflict_retried, @@ -89,7 +91,7 @@ module Gitlab end end - LoadBalancing::Logger.warn( + ::Gitlab::Database::LoadBalancing::Logger.warn( event: :no_secondaries_available, message: 'No secondaries were available, using primary instead', conflict_retried: conflict_retried, @@ -136,7 +138,7 @@ module Gitlab # Returns the transaction write location of the primary. def primary_write_location location = read_write do |connection| - ::Gitlab::Database.main.get_write_location(connection) + get_write_location(connection) end return location if location @@ -230,7 +232,7 @@ module Gitlab # host - An optional host name to use instead of the default one. # port - An optional port to connect to. def create_replica_connection_pool(pool_size, host = nil, port = nil) - db_config = pool.db_config + db_config = @configuration.replica_db_config env_config = db_config.configuration_hash.dup env_config[:pool] = pool_size @@ -255,22 +257,67 @@ module Gitlab # leverage that. def pool ActiveRecord::Base.connection_handler.retrieve_connection_pool( - @configuration.model.connection_specification_name, + @configuration.primary_connection_specification_name, role: ActiveRecord::Base.writing_role, shard: ActiveRecord::Base.default_shard ) || raise(::ActiveRecord::ConnectionNotEstablished) end + def wal_diff(location1, location2) + read_write do |connection| + lsn1 = connection.quote(location1) + lsn2 = connection.quote(location2) + + query = <<-SQL.squish + SELECT pg_wal_lsn_diff(#{lsn1}, #{lsn2}) + AS result + SQL + + row = connection.select_all(query).first + row['result'] if row + end + end + private def ensure_caching! - host.enable_query_cache! unless host.query_cache_enabled + return unless Rails.application.executor.active? + return if host.query_cache_enabled + + host.enable_query_cache! end def request_cache base = SafeRequestStore[:gitlab_load_balancer] ||= {} base[self] ||= {} end + + # @param [ActiveRecord::Connection] ar_connection + # @return [String] + def get_write_location(ar_connection) + use_new_load_balancer_query = Gitlab::Utils + .to_boolean(ENV['USE_NEW_LOAD_BALANCER_QUERY'], default: true) + + sql = + if use_new_load_balancer_query + <<~NEWSQL + SELECT CASE + WHEN pg_is_in_recovery() = true AND EXISTS (SELECT 1 FROM pg_stat_get_wal_senders()) + THEN pg_last_wal_replay_lsn()::text + WHEN pg_is_in_recovery() = false + THEN pg_current_wal_insert_lsn()::text + ELSE NULL + END AS location; + NEWSQL + else + <<~SQL + SELECT pg_current_wal_insert_lsn()::text AS location + SQL + end + + row = ar_connection.select_all(sql).first + row['location'] if row + end end end end diff --git a/lib/gitlab/database/load_balancing/primary_host.rb b/lib/gitlab/database/load_balancing/primary_host.rb index 7070cc54d4b..fb52b384ddb 100644 --- a/lib/gitlab/database/load_balancing/primary_host.rb +++ b/lib/gitlab/database/load_balancing/primary_host.rb @@ -49,6 +49,11 @@ module Gitlab end def offline! + ::Gitlab::Database::LoadBalancing::Logger.warn( + event: :host_offline, + message: 'Marking primary host as offline' + ) + nil end diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb index 7ce7649cc22..99b1c31b04b 100644 --- a/lib/gitlab/database/load_balancing/rack_middleware.rb +++ b/lib/gitlab/database/load_balancing/rack_middleware.rb @@ -38,8 +38,8 @@ module Gitlab def unstick_or_continue_sticking(env) namespaces_and_ids = sticking_namespaces(env) - namespaces_and_ids.each do |(model, namespace, id)| - model.sticking.unstick_or_continue_sticking(namespace, id) + namespaces_and_ids.each do |(sticking, namespace, id)| + sticking.unstick_or_continue_sticking(namespace, id) end end @@ -47,8 +47,8 @@ module Gitlab def stick_if_necessary(env) namespaces_and_ids = sticking_namespaces(env) - namespaces_and_ids.each do |model, namespace, id| - model.sticking.stick_if_necessary(namespace, id) + namespaces_and_ids.each do |sticking, namespace, id| + sticking.stick_if_necessary(namespace, id) end end @@ -74,7 +74,7 @@ module Gitlab # models that support load balancing. In the future (if we # determined this to be OK) we may be able to relax this. ::Gitlab::Database::LoadBalancing.base_models.map do |model| - [model, :user, warden.user.id] + [model.sticking, :user, warden.user.id] end elsif env[STICK_OBJECT].present? env[STICK_OBJECT].to_a diff --git a/lib/gitlab/database/load_balancing/setup.rb b/lib/gitlab/database/load_balancing/setup.rb index 3cce839a960..ef38f42f50b 100644 --- a/lib/gitlab/database/load_balancing/setup.rb +++ b/lib/gitlab/database/load_balancing/setup.rb @@ -5,7 +5,7 @@ module Gitlab module LoadBalancing # Class for setting up load balancing of a specific model. class Setup - attr_reader :configuration + attr_reader :model, :configuration def initialize(model, start_service_discovery: false) @model = model @@ -14,47 +14,102 @@ module Gitlab end def setup - disable_prepared_statements - setup_load_balancer + configure_connection + setup_connection_proxy setup_service_discovery + setup_feature_flag_to_model_load_balancing end - def disable_prepared_statements + def configure_connection db_config_object = @model.connection_db_config - config = - db_config_object.configuration_hash.merge(prepared_statements: false) + + hash = db_config_object.configuration_hash.merge( + prepared_statements: false, + pool: Gitlab::Database.default_pool_size + ) hash_config = ActiveRecord::DatabaseConfigurations::HashConfig.new( db_config_object.env_name, db_config_object.name, - config + hash ) @model.establish_connection(hash_config) end - def setup_load_balancer - lb = LoadBalancer.new(configuration) - + def setup_connection_proxy # We just use a simple `class_attribute` here so we don't need to # inject any modules and/or expose unnecessary methods. - @model.class_attribute(:connection) - @model.class_attribute(:sticking) + setup_class_attribute(:load_balancer, load_balancer) + setup_class_attribute(:connection, ConnectionProxy.new(load_balancer)) + setup_class_attribute(:sticking, Sticking.new(load_balancer)) + end + + # TODO: This is temporary code to gradually redirect traffic to use + # a dedicated DB replicas, or DB primaries (depending on configuration) + # This implements a sticky behavior for the current request if enabled. + # + # This is needed for Phase 3 and Phase 4 of application rollout + # https://gitlab.com/groups/gitlab-org/-/epics/6160#progress + # + # If `GITLAB_USE_MODEL_LOAD_BALANCING` is set, its value is preferred + # Otherwise, a `use_model_load_balancing` FF value is used + def setup_feature_flag_to_model_load_balancing + return if active_record_base? - @model.connection = ConnectionProxy.new(lb) - @model.sticking = Sticking.new(lb) + @model.singleton_class.prepend(ModelLoadBalancingFeatureFlagMixin) end def setup_service_discovery return unless configuration.service_discovery_enabled? - lb = @model.connection.load_balancer - sv = ServiceDiscovery.new(lb, **configuration.service_discovery) + sv = ServiceDiscovery.new(load_balancer, **configuration.service_discovery) sv.perform_service_discovery sv.start if @start_service_discovery end + + def load_balancer + @load_balancer ||= LoadBalancer.new(configuration) + end + + private + + def setup_class_attribute(attribute, value) + @model.class_attribute(attribute) + @model.public_send("#{attribute}=", value) # rubocop:disable GitlabSecurity/PublicSend + end + + def active_record_base? + @model == ActiveRecord::Base + end + + module ModelLoadBalancingFeatureFlagMixin + extend ActiveSupport::Concern + + def use_model_load_balancing? + # Cache environment variable and return env variable first if defined + use_model_load_balancing_env = Gitlab::Utils.to_boolean(ENV["GITLAB_USE_MODEL_LOAD_BALANCING"]) + + unless use_model_load_balancing_env.nil? + return use_model_load_balancing_env + end + + # Check a feature flag using RequestStore (if active) + return false unless Gitlab::SafeRequestStore.active? + + Gitlab::SafeRequestStore.fetch(:use_model_load_balancing) do + Feature.enabled?(:use_model_load_balancing, default_enabled: :yaml) + end + end + + # rubocop:disable Database/MultipleDatabases + def connection + use_model_load_balancing? ? super : ActiveRecord::Base.connection + end + # rubocop:enable Database/MultipleDatabases + end end end end diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb index f0c7016032b..b9acc36b4cc 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb @@ -13,7 +13,7 @@ module Gitlab job['load_balancing_strategy'] = strategy.to_s if use_primary?(strategy) - Session.current.use_primary! + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! elsif strategy == :retry raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\ " Replica was not up to date." @@ -29,8 +29,8 @@ module Gitlab private def clear - LoadBalancing.release_hosts - Session.clear_session + ::Gitlab::Database::LoadBalancing.release_hosts + ::Gitlab::Database::LoadBalancing::Session.clear_session end def use_primary?(strategy) @@ -66,7 +66,7 @@ module Gitlab def legacy_wal_location(job) wal_location = job['database_write_location'] || job['database_replica_location'] - { Gitlab::Database::MAIN_DATABASE_NAME.to_sym => wal_location } if wal_location + { ::Gitlab::Database::MAIN_DATABASE_NAME.to_sym => wal_location } if wal_location end def load_balancing_available?(worker_class) @@ -90,7 +90,7 @@ module Gitlab end def databases_in_sync?(wal_locations) - LoadBalancing.each_load_balancer.all? do |lb| + ::Gitlab::Database::LoadBalancing.each_load_balancer.all? do |lb| if (location = wal_locations[lb.name]) lb.select_up_to_date_host(location) else diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb index df4ad18581f..834e9c6d3c6 100644 --- a/lib/gitlab/database/load_balancing/sticking.rb +++ b/lib/gitlab/database/load_balancing/sticking.rb @@ -12,7 +12,6 @@ module Gitlab def initialize(load_balancer) @load_balancer = load_balancer - @model = load_balancer.configuration.model end # Unsticks or continues sticking the current request. @@ -27,8 +26,8 @@ module Gitlab def stick_or_unstick_request(env, namespace, id) unstick_or_continue_sticking(namespace, id) - env[RackMiddleware::STICK_OBJECT] ||= Set.new - env[RackMiddleware::STICK_OBJECT] << [@model, namespace, id] + env[::Gitlab::Database::LoadBalancing::RackMiddleware::STICK_OBJECT] ||= Set.new + env[::Gitlab::Database::LoadBalancing::RackMiddleware::STICK_OBJECT] << [self, namespace, id] end # Sticks to the primary if a write was performed. |