diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-10-20 11:43:02 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-10-20 11:43:02 +0300 |
commit | d9ab72d6080f594d0b3cae15f14b3ef2c6c638cb (patch) | |
tree | 2341ef426af70ad1e289c38036737e04b0aa5007 /lib/gitlab/database | |
parent | d6e514dd13db8947884cd58fe2a9c2a063400a9b (diff) |
Add latest changes from gitlab-org/gitlab@14-4-stable-eev14.4.0-rc42
Diffstat (limited to 'lib/gitlab/database')
23 files changed, 412 insertions, 260 deletions
diff --git a/lib/gitlab/database/count.rb b/lib/gitlab/database/count.rb index eac61254bdf..ce61c1ba9ad 100644 --- a/lib/gitlab/database/count.rb +++ b/lib/gitlab/database/count.rb @@ -35,7 +35,17 @@ module Gitlab # # @param [Array] # @return [Hash] of Model -> count mapping - def self.approximate_counts(models, strategies: [TablesampleCountStrategy, ReltuplesCountStrategy, ExactCountStrategy]) + def self.approximate_counts(models, strategies: []) + if strategies.empty? + # ExactCountStrategy is the only strategy working on read-only DBs, as others make + # use of tuple stats which use the primary DB to estimate tables size in a transaction. + strategies = if ::Gitlab::Database.read_write? + [TablesampleCountStrategy, ReltuplesCountStrategy, ExactCountStrategy] + else + [ExactCountStrategy] + end + end + strategies.each_with_object({}) do |strategy, counts_by_model| models_with_missing_counts = models - counts_by_model.keys diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb index bbfbf83222f..3e322e752b7 100644 --- a/lib/gitlab/database/load_balancing.rb +++ b/lib/gitlab/database/load_balancing.rb @@ -4,72 +4,34 @@ module Gitlab module Database module LoadBalancing # The exceptions raised for connection errors. - CONNECTION_ERRORS = if defined?(PG) - [ - PG::ConnectionBad, - PG::ConnectionDoesNotExist, - PG::ConnectionException, - PG::ConnectionFailure, - PG::UnableToSend, - # During a failover this error may be raised when - # writing to a primary. - PG::ReadOnlySqlTransaction - ].freeze - else - [].freeze - end - - ProxyNotConfiguredError = Class.new(StandardError) - - # The connection proxy to use for load balancing (if enabled). - def self.proxy - unless load_balancing_proxy = ActiveRecord::Base.load_balancing_proxy - Gitlab::ErrorTracking.track_exception( - ProxyNotConfiguredError.new( - "Attempting to access the database load balancing proxy, but it wasn't configured.\n" \ - "Did you forget to call '#{self.name}.configure_proxy'?" - )) - end - - load_balancing_proxy - end - - # Returns a Hash containing the load balancing configuration. - def self.configuration - @configuration ||= Configuration.for_model(ActiveRecord::Base) - end - - # Returns true if load balancing is to be enabled. - def self.enable? - return false if Gitlab::Runtime.rake? - - configured? - end + CONNECTION_ERRORS = [ + PG::ConnectionBad, + PG::ConnectionDoesNotExist, + PG::ConnectionException, + PG::ConnectionFailure, + PG::UnableToSend, + # During a failover this error may be raised when + # writing to a primary. + PG::ReadOnlySqlTransaction, + # This error is raised when we can't connect to the database in the + # first place (e.g. it's offline or the hostname is incorrect). + ActiveRecord::ConnectionNotEstablished + ].freeze - def self.configured? - configuration.load_balancing_enabled? || - configuration.service_discovery_enabled? + def self.base_models + @base_models ||= ::Gitlab::Database.database_base_models.values.freeze end - def self.start_service_discovery - return unless configuration.service_discovery_enabled? + def self.each_load_balancer + return to_enum(__method__) unless block_given? - ServiceDiscovery - .new(proxy.load_balancer, **configuration.service_discovery) - .start + base_models.each do |model| + yield model.connection.load_balancer + end end - # Configures proxying of requests. - def self.configure_proxy - lb = LoadBalancer.new(configuration, primary_only: !enable?) - ActiveRecord::Base.load_balancing_proxy = ConnectionProxy.new(lb) - - # Populate service discovery immediately if it is configured - if configuration.service_discovery_enabled? - ServiceDiscovery - .new(lb, **configuration.service_discovery) - .perform_service_discovery - end + def self.release_hosts + each_load_balancer(&:release_host) end DB_ROLES = [ diff --git a/lib/gitlab/database/load_balancing/action_cable_callbacks.rb b/lib/gitlab/database/load_balancing/action_cable_callbacks.rb index 4feba989a0a..7164976ff73 100644 --- a/lib/gitlab/database/load_balancing/action_cable_callbacks.rb +++ b/lib/gitlab/database/load_balancing/action_cable_callbacks.rb @@ -16,7 +16,7 @@ module Gitlab inner.call ensure - ::Gitlab::Database::LoadBalancing.proxy.load_balancer.release_host + ::Gitlab::Database::LoadBalancing.release_hosts ::Gitlab::Database::LoadBalancing::Session.clear_session end end diff --git a/lib/gitlab/database/load_balancing/active_record_proxy.rb b/lib/gitlab/database/load_balancing/active_record_proxy.rb deleted file mode 100644 index deaea62d774..00000000000 --- a/lib/gitlab/database/load_balancing/active_record_proxy.rb +++ /dev/null @@ -1,15 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - module LoadBalancing - # Module injected into ActiveRecord::Base to allow hijacking of the - # "connection" method. - module ActiveRecordProxy - def connection - ::Gitlab::Database::LoadBalancing.proxy - end - end - end - end -end diff --git a/lib/gitlab/database/load_balancing/configuration.rb b/lib/gitlab/database/load_balancing/configuration.rb index 238f55fd98e..6156515bd73 100644 --- a/lib/gitlab/database/load_balancing/configuration.rb +++ b/lib/gitlab/database/load_balancing/configuration.rb @@ -72,7 +72,14 @@ module Gitlab Database.default_pool_size end + # Returns `true` if the use of load balancing replicas should be + # enabled. + # + # This is disabled for Rake tasks to ensure e.g. database migrations + # always produce consistent results. def load_balancing_enabled? + return false if Gitlab::Runtime.rake? + hosts.any? || service_discovery_enabled? end diff --git a/lib/gitlab/database/load_balancing/host.rb b/lib/gitlab/database/load_balancing/host.rb index acd7df0a263..bdbb80d6f31 100644 --- a/lib/gitlab/database/load_balancing/host.rb +++ b/lib/gitlab/database/load_balancing/host.rb @@ -9,19 +9,12 @@ module Gitlab delegate :connection, :release_connection, :enable_query_cache!, :disable_query_cache!, :query_cache_enabled, to: :pool - CONNECTION_ERRORS = - if defined?(PG) - [ - ActionView::Template::Error, - ActiveRecord::StatementInvalid, - PG::Error - ].freeze - else - [ - ActionView::Template::Error, - ActiveRecord::StatementInvalid - ].freeze - end + CONNECTION_ERRORS = [ + ActionView::Template::Error, + ActiveRecord::StatementInvalid, + ActiveRecord::ConnectionNotEstablished, + PG::Error + ].freeze # host - The address of the database. # load_balancer - The LoadBalancer that manages this Host. diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb index 9b00b323301..cc9ca325337 100644 --- a/lib/gitlab/database/load_balancing/load_balancer.rb +++ b/lib/gitlab/database/load_balancing/load_balancer.rb @@ -12,22 +12,26 @@ module Gitlab REPLICA_SUFFIX = '_replica' - attr_reader :host_list, :configuration + attr_reader :name, :host_list, :configuration # configuration - An instance of `LoadBalancing::Configuration` that # contains the configuration details (such as the hosts) # for this load balancer. - # primary_only - If set, the replicas are ignored and the primary is - # always used. - def initialize(configuration, primary_only: false) + def initialize(configuration) @configuration = configuration - @primary_only = primary_only + @primary_only = !configuration.load_balancing_enabled? @host_list = - if primary_only + if @primary_only HostList.new([PrimaryHost.new(self)]) else HostList.new(configuration.hosts.map { |addr| Host.new(addr, self) }) end + + @name = @configuration.model.connection_db_config.name.to_sym + end + + def primary_only? + @primary_only end def disconnect!(timeout: 120) @@ -151,6 +155,17 @@ module Gitlab # Yields a block, retrying it upon error using an exponential backoff. def retry_with_backoff(retries = 3, time = 2) + # In CI we only use the primary, but databases may not always be + # available (or take a few seconds to become available). Retrying in + # this case can slow down CI jobs. In addition, retrying with _only_ + # a primary being present isn't all that helpful. + # + # To prevent this from happening, we don't make any attempt at + # retrying unless one or more replicas are used. This matches the + # behaviour from before we enabled load balancing code even if no + # replicas were configured. + return yield if primary_only? + retried = 0 last_error = nil @@ -176,6 +191,11 @@ module Gitlab def connection_error?(error) case error + when ActiveRecord::NoDatabaseError + # Retrying this error isn't going to magically make the database + # appear. It also slows down CI jobs that are meant to create the + # database in the first place. + false when ActiveRecord::StatementInvalid, ActionView::Template::Error # After connecting to the DB Rails will wrap query errors using this # class. @@ -235,7 +255,7 @@ module Gitlab @configuration.model.connection_specification_name, role: ActiveRecord::Base.writing_role, shard: ActiveRecord::Base.default_shard - ) + ) || raise(::ActiveRecord::ConnectionNotEstablished) end private diff --git a/lib/gitlab/database/load_balancing/primary_host.rb b/lib/gitlab/database/load_balancing/primary_host.rb index e379652c260..7070cc54d4b 100644 --- a/lib/gitlab/database/load_balancing/primary_host.rb +++ b/lib/gitlab/database/load_balancing/primary_host.rb @@ -11,6 +11,12 @@ module Gitlab # balancing is enabled, but no replicas have been configured (= the # default case). class PrimaryHost + WAL_ERROR_MESSAGE = <<~MSG.strip + Obtaining WAL information when not using any replicas results in + redundant queries, and may break installations that don't support + streaming replication (e.g. AWS' Aurora database). + MSG + def initialize(load_balancer) @load_balancer = load_balancer end @@ -51,30 +57,16 @@ module Gitlab end def primary_write_location - @load_balancer.primary_write_location + raise NotImplementedError, WAL_ERROR_MESSAGE end def database_replica_location - row = query_and_release(<<-SQL.squish) - SELECT pg_last_wal_replay_lsn()::text AS location - SQL - - row['location'] if row.any? - rescue *Host::CONNECTION_ERRORS - nil + raise NotImplementedError, WAL_ERROR_MESSAGE end def caught_up?(_location) true end - - def query_and_release(sql) - connection.select_all(sql).first || {} - rescue StandardError - {} - ensure - release_connection - end end end end diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb index f8a31622b7d..7ce7649cc22 100644 --- a/lib/gitlab/database/load_balancing/rack_middleware.rb +++ b/lib/gitlab/database/load_balancing/rack_middleware.rb @@ -9,23 +9,6 @@ module Gitlab class RackMiddleware STICK_OBJECT = 'load_balancing.stick_object' - # Unsticks or continues sticking the current request. - # - # This method also updates the Rack environment so #call can later - # determine if we still need to stick or not. - # - # env - The Rack environment. - # namespace - The namespace to use for sticking. - # id - The identifier to use for sticking. - def self.stick_or_unstick(env, namespace, id) - return unless ::Gitlab::Database::LoadBalancing.enable? - - ::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(namespace, id) - - env[STICK_OBJECT] ||= Set.new - env[STICK_OBJECT] << [namespace, id] - end - def initialize(app) @app = app end @@ -53,41 +36,46 @@ module Gitlab # Typically this code will only be reachable for Rails requests as # Grape data is not yet available at this point. def unstick_or_continue_sticking(env) - namespaces_and_ids = sticking_namespaces_and_ids(env) + namespaces_and_ids = sticking_namespaces(env) - namespaces_and_ids.each do |namespace, id| - ::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(namespace, id) + namespaces_and_ids.each do |(model, namespace, id)| + model.sticking.unstick_or_continue_sticking(namespace, id) end end # Determine if we need to stick after handling a request. def stick_if_necessary(env) - namespaces_and_ids = sticking_namespaces_and_ids(env) + namespaces_and_ids = sticking_namespaces(env) - namespaces_and_ids.each do |namespace, id| - ::Gitlab::Database::LoadBalancing::Sticking.stick_if_necessary(namespace, id) + namespaces_and_ids.each do |model, namespace, id| + model.sticking.stick_if_necessary(namespace, id) end end def clear - load_balancer.release_host + ::Gitlab::Database::LoadBalancing.release_hosts ::Gitlab::Database::LoadBalancing::Session.clear_session end - def load_balancer - ::Gitlab::Database::LoadBalancing.proxy.load_balancer - end - # Determines the sticking namespace and identifier based on the Rack # environment. # # For Rails requests this uses warden, but Grape and others have to # manually set the right environment variable. - def sticking_namespaces_and_ids(env) + def sticking_namespaces(env) warden = env['warden'] if warden && warden.user - [[:user, warden.user.id]] + # When sticking per user, _only_ sticking the main connection could + # result in the application trying to read data from a different + # connection, while that data isn't available yet. + # + # To prevent this from happening, we scope sticking to all the + # models that support load balancing. In the future (if we + # determined this to be OK) we may be able to relax this. + ::Gitlab::Database::LoadBalancing.base_models.map do |model| + [model, :user, warden.user.id] + end elsif env[STICK_OBJECT].present? env[STICK_OBJECT].to_a else diff --git a/lib/gitlab/database/load_balancing/setup.rb b/lib/gitlab/database/load_balancing/setup.rb new file mode 100644 index 00000000000..3cce839a960 --- /dev/null +++ b/lib/gitlab/database/load_balancing/setup.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # Class for setting up load balancing of a specific model. + class Setup + attr_reader :configuration + + def initialize(model, start_service_discovery: false) + @model = model + @configuration = Configuration.for_model(model) + @start_service_discovery = start_service_discovery + end + + def setup + disable_prepared_statements + setup_load_balancer + setup_service_discovery + end + + def disable_prepared_statements + db_config_object = @model.connection_db_config + config = + db_config_object.configuration_hash.merge(prepared_statements: false) + + hash_config = ActiveRecord::DatabaseConfigurations::HashConfig.new( + db_config_object.env_name, + db_config_object.name, + config + ) + + @model.establish_connection(hash_config) + end + + def setup_load_balancer + lb = LoadBalancer.new(configuration) + + # We just use a simple `class_attribute` here so we don't need to + # inject any modules and/or expose unnecessary methods. + @model.class_attribute(:connection) + @model.class_attribute(:sticking) + + @model.connection = ConnectionProxy.new(lb) + @model.sticking = Sticking.new(lb) + end + + def setup_service_discovery + return unless configuration.service_discovery_enabled? + + lb = @model.connection.load_balancer + sv = ServiceDiscovery.new(lb, **configuration.service_discovery) + + sv.perform_service_discovery + + sv.start if @start_service_discovery + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb index 518a812b406..62dfe75a851 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb @@ -30,26 +30,26 @@ module Gitlab end def set_data_consistency_locations!(job) - # Once we add support for multiple databases to our load balancer, we would use something like this: - # job['wal_locations'] = Gitlab::Database::DATABASES.transform_values do |connection| - # connection.load_balancer.primary_write_location - # end - # - job['wal_locations'] = { Gitlab::Database::MAIN_DATABASE_NAME.to_sym => wal_location } if wal_location - end + locations = {} - def wal_location - strong_memoize(:wal_location) do - if Session.current.use_primary? - load_balancer.primary_write_location - else - load_balancer.host.database_replica_location + ::Gitlab::Database::LoadBalancing.each_load_balancer do |lb| + if (location = wal_location_for(lb)) + locations[lb.name] = location end end + + job['wal_locations'] = locations end - def load_balancer - LoadBalancing.proxy.load_balancer + def wal_location_for(load_balancer) + # When only using the primary there's no need for any WAL queries. + return if load_balancer.primary_only? + + if ::Gitlab::Database::LoadBalancing::Session.current.use_primary? + load_balancer.primary_write_location + else + load_balancer.host.database_replica_location + end end end end diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb index 15f8f0fb240..f0c7016032b 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb @@ -29,7 +29,7 @@ module Gitlab private def clear - release_hosts + LoadBalancing.release_hosts Session.clear_session end @@ -44,7 +44,7 @@ module Gitlab return :primary_no_wal unless wal_locations - if all_databases_has_replica_caught_up?(wal_locations) + if databases_in_sync?(wal_locations) # Happy case: we can read from a replica. retried_before?(worker_class, job) ? :replica_retried : :replica elsif can_retry?(worker_class, job) @@ -89,27 +89,18 @@ module Gitlab job['retry_count'].nil? end - def all_databases_has_replica_caught_up?(wal_locations) - wal_locations.all? do |_config_name, location| - # Once we add support for multiple databases to our load balancer, we would use something like this: - # Gitlab::Database::DATABASES[config_name].load_balancer.select_up_to_date_host(location) - load_balancer.select_up_to_date_host(location) + def databases_in_sync?(wal_locations) + LoadBalancing.each_load_balancer.all? do |lb| + if (location = wal_locations[lb.name]) + lb.select_up_to_date_host(location) + else + # If there's no entry for a load balancer it means the Sidekiq + # job doesn't care for it. In this case we'll treat the load + # balancer as being in sync. + true + end end end - - def release_hosts - # Once we add support for multiple databases to our load balancer, we would use something like this: - # connection.load_balancer.primary_write_location - # - # Gitlab::Database::DATABASES.values.each do |connection| - # connection.load_balancer.release_host - # end - load_balancer.release_host - end - - def load_balancer - LoadBalancing.proxy.load_balancer - end end end end diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb index 20d42b9a694..df4ad18581f 100644 --- a/lib/gitlab/database/load_balancing/sticking.rb +++ b/lib/gitlab/database/load_balancing/sticking.rb @@ -5,36 +5,47 @@ module Gitlab module LoadBalancing # Module used for handling sticking connections to a primary, if # necessary. - # - # ## Examples - # - # Sticking a user to the primary: - # - # Sticking.stick_if_necessary(:user, current_user.id) - # - # To unstick if possible, or continue using the primary otherwise: - # - # Sticking.unstick_or_continue_sticking(:user, current_user.id) - module Sticking + class Sticking # The number of seconds after which a session should stop reading from # the primary. EXPIRATION = 30 - # Sticks to the primary if a write was performed. - def self.stick_if_necessary(namespace, id) - return unless LoadBalancing.enable? + def initialize(load_balancer) + @load_balancer = load_balancer + @model = load_balancer.configuration.model + end - stick(namespace, id) if Session.current.performed_write? + # Unsticks or continues sticking the current request. + # + # This method also updates the Rack environment so #call can later + # determine if we still need to stick or not. + # + # env - The Rack environment. + # namespace - The namespace to use for sticking. + # id - The identifier to use for sticking. + # model - The ActiveRecord model to scope sticking to. + def stick_or_unstick_request(env, namespace, id) + unstick_or_continue_sticking(namespace, id) + + env[RackMiddleware::STICK_OBJECT] ||= Set.new + env[RackMiddleware::STICK_OBJECT] << [@model, namespace, id] + end + + # Sticks to the primary if a write was performed. + def stick_if_necessary(namespace, id) + stick(namespace, id) if ::Gitlab::Database::LoadBalancing::Session.current.performed_write? end - # Checks if we are caught-up with all the work - def self.all_caught_up?(namespace, id) + def all_caught_up?(namespace, id) location = last_write_location_for(namespace, id) return true unless location - load_balancer.select_up_to_date_host(location).tap do |found| - ActiveSupport::Notifications.instrument('caught_up_replica_pick.load_balancing', { result: found } ) + @load_balancer.select_up_to_date_host(location).tap do |found| + ActiveSupport::Notifications.instrument( + 'caught_up_replica_pick.load_balancing', + { result: found } + ) unstick(namespace, id) if found end @@ -45,7 +56,7 @@ module Gitlab # in another thread. # # Returns true if one host was selected. - def self.select_caught_up_replicas(namespace, id) + def select_caught_up_replicas(namespace, id) location = last_write_location_for(namespace, id) # Unlike all_caught_up?, we return false if no write location exists. @@ -53,95 +64,92 @@ module Gitlab # write location. If no such location exists, err on the side of caution. return false unless location - load_balancer.select_up_to_date_host(location).tap do |selected| + @load_balancer.select_up_to_date_host(location).tap do |selected| unstick(namespace, id) if selected end end # Sticks to the primary if necessary, otherwise unsticks an object (if # it was previously stuck to the primary). - def self.unstick_or_continue_sticking(namespace, id) - Session.current.use_primary! unless all_caught_up?(namespace, id) + def unstick_or_continue_sticking(namespace, id) + return if all_caught_up?(namespace, id) + + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! end # Select a replica that has caught up with the primary. If one has not been # found, stick to the primary. - def self.select_valid_host(namespace, id) - replica_selected = select_caught_up_replicas(namespace, id) + def select_valid_host(namespace, id) + replica_selected = + select_caught_up_replicas(namespace, id) - Session.current.use_primary! unless replica_selected + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! unless replica_selected end # Starts sticking to the primary for the given namespace and id, using # the latest WAL pointer from the primary. - def self.stick(namespace, id) - return unless LoadBalancing.enable? - + def stick(namespace, id) mark_primary_write_location(namespace, id) - Session.current.use_primary! + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! end - def self.bulk_stick(namespace, ids) - return unless LoadBalancing.enable? - + def bulk_stick(namespace, ids) with_primary_write_location do |location| ids.each do |id| set_write_location_for(namespace, id, location) end end - Session.current.use_primary! + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! end - def self.with_primary_write_location - return unless LoadBalancing.configured? + def with_primary_write_location + # When only using the primary, there's no point in getting write + # locations, as the primary is always in sync with itself. + return if @load_balancer.primary_only? - # Load balancing could be enabled for the Web application server, - # but it's not activated for Sidekiq. We should update Redis with - # the write location just in case load balancing is being used. - location = - if LoadBalancing.enable? - load_balancer.primary_write_location - else - Gitlab::Database.main.get_write_location(ActiveRecord::Base.connection) - end + location = @load_balancer.primary_write_location return if location.blank? yield(location) end - def self.mark_primary_write_location(namespace, id) + def mark_primary_write_location(namespace, id) with_primary_write_location do |location| set_write_location_for(namespace, id, location) end end - # Stops sticking to the primary. - def self.unstick(namespace, id) + def unstick(namespace, id) Gitlab::Redis::SharedState.with do |redis| redis.del(redis_key_for(namespace, id)) + redis.del(old_redis_key_for(namespace, id)) end end - def self.set_write_location_for(namespace, id, location) + def set_write_location_for(namespace, id, location) Gitlab::Redis::SharedState.with do |redis| redis.set(redis_key_for(namespace, id), location, ex: EXPIRATION) + redis.set(old_redis_key_for(namespace, id), location, ex: EXPIRATION) end end - def self.last_write_location_for(namespace, id) + def last_write_location_for(namespace, id) Gitlab::Redis::SharedState.with do |redis| - redis.get(redis_key_for(namespace, id)) + redis.get(redis_key_for(namespace, id)) || + redis.get(old_redis_key_for(namespace, id)) end end - def self.redis_key_for(namespace, id) - "database-load-balancing/write-location/#{namespace}/#{id}" + def redis_key_for(namespace, id) + name = @load_balancer.name + + "database-load-balancing/write-location/#{name}/#{namespace}/#{id}" end - def self.load_balancer - LoadBalancing.proxy.load_balancer + def old_redis_key_for(namespace, id) + "database-load-balancing/write-location/#{namespace}/#{id}" end end end diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb index 19d80ba1d64..bdaf0d35a83 100644 --- a/lib/gitlab/database/migrations/background_migration_helpers.rb +++ b/lib/gitlab/database/migrations/background_migration_helpers.rb @@ -106,7 +106,7 @@ module Gitlab final_delay = 0 batch_counter = 0 - model_class.each_batch(of: batch_size) do |relation, index| + model_class.each_batch(of: batch_size, column: primary_column_name) do |relation, index| max = relation.arel_table[primary_column_name].maximum min = relation.arel_table[primary_column_name].minimum diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb index d1e55eb825c..6e5ffb74411 100644 --- a/lib/gitlab/database/migrations/instrumentation.rb +++ b/lib/gitlab/database/migrations/instrumentation.rb @@ -4,21 +4,21 @@ module Gitlab module Database module Migrations class Instrumentation - RESULT_DIR = Rails.root.join('tmp', 'migration-testing').freeze STATS_FILENAME = 'migration-stats.json' attr_reader :observations - def initialize(observer_classes = ::Gitlab::Database::Migrations::Observers.all_observers) + def initialize(result_dir:, observer_classes: ::Gitlab::Database::Migrations::Observers.all_observers) @observer_classes = observer_classes @observations = [] + @result_dir = result_dir end def observe(version:, name:, &block) observation = Observation.new(version, name) observation.success = true - observers = observer_classes.map { |c| c.new(observation) } + observers = observer_classes.map { |c| c.new(observation, @result_dir) } exception = nil diff --git a/lib/gitlab/database/migrations/observers/migration_observer.rb b/lib/gitlab/database/migrations/observers/migration_observer.rb index 85d18abb9ef..106f8f1f829 100644 --- a/lib/gitlab/database/migrations/observers/migration_observer.rb +++ b/lib/gitlab/database/migrations/observers/migration_observer.rb @@ -5,11 +5,12 @@ module Gitlab module Migrations module Observers class MigrationObserver - attr_reader :connection, :observation + attr_reader :connection, :observation, :output_dir - def initialize(observation) + def initialize(observation, output_dir) @connection = ActiveRecord::Base.connection @observation = observation + @output_dir = output_dir end def before diff --git a/lib/gitlab/database/migrations/observers/query_details.rb b/lib/gitlab/database/migrations/observers/query_details.rb index dadacd2d2fc..8f4406e79a5 100644 --- a/lib/gitlab/database/migrations/observers/query_details.rb +++ b/lib/gitlab/database/migrations/observers/query_details.rb @@ -6,7 +6,7 @@ module Gitlab module Observers class QueryDetails < MigrationObserver def before - file_path = File.join(Instrumentation::RESULT_DIR, "#{observation.version}_#{observation.name}-query-details.json") + file_path = File.join(output_dir, "#{observation.version}_#{observation.name}-query-details.json") @file = File.open(file_path, 'wb') @writer = Oj::StreamWriter.new(@file, {}) @writer.push_array diff --git a/lib/gitlab/database/migrations/observers/query_log.rb b/lib/gitlab/database/migrations/observers/query_log.rb index e15d733d2a2..c42fd8bd23d 100644 --- a/lib/gitlab/database/migrations/observers/query_log.rb +++ b/lib/gitlab/database/migrations/observers/query_log.rb @@ -7,7 +7,7 @@ module Gitlab class QueryLog < MigrationObserver def before @logger_was = ActiveRecord::Base.logger - file_path = File.join(Instrumentation::RESULT_DIR, "#{observation.version}_#{observation.name}.log") + file_path = File.join(output_dir, "#{observation.version}_#{observation.name}.log") @logger = Logger.new(file_path) ActiveRecord::Base.logger = @logger end diff --git a/lib/gitlab/database/migrations/runner.rb b/lib/gitlab/database/migrations/runner.rb new file mode 100644 index 00000000000..b267a64256b --- /dev/null +++ b/lib/gitlab/database/migrations/runner.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + class Runner + BASE_RESULT_DIR = Rails.root.join('tmp', 'migration-testing').freeze + + class << self + def up + Runner.new(direction: :up, migrations: migrations_for_up, result_dir: BASE_RESULT_DIR.join('up')) + end + + def down + Runner.new(direction: :down, migrations: migrations_for_down, result_dir: BASE_RESULT_DIR.join('down')) + end + + def migration_context + @migration_context ||= ApplicationRecord.connection.migration_context + end + + private + + def migrations_for_up + existing_versions = migration_context.get_all_versions.to_set + + migration_context.migrations.reject do |migration| + existing_versions.include?(migration.version) + end + end + + def migration_file_names_this_branch + `git diff --name-only origin/HEAD...HEAD db/post_migrate db/migrate`.split("\n") + end + + def migrations_for_down + versions_this_branch = migration_file_names_this_branch.map do |m_name| + m_name.match(%r{^db/(post_)?migrate/(\d+)}) { |m| m.captures[1]&.to_i } + end.to_set + + existing_versions = migration_context.get_all_versions.to_set + migration_context.migrations.select do |migration| + existing_versions.include?(migration.version) && versions_this_branch.include?(migration.version) + end + end + end + + attr_reader :direction, :result_dir, :migrations + + delegate :migration_context, to: :class + + def initialize(direction:, migrations:, result_dir:) + raise "Direction must be up or down" unless %i[up down].include?(direction) + + @direction = direction + @migrations = migrations + @result_dir = result_dir + end + + def run + FileUtils.mkdir_p(result_dir) + + verbose_was = ActiveRecord::Migration.verbose + ActiveRecord::Migration.verbose = true + + sorted_migrations = migrations.sort_by(&:version) + sorted_migrations.reverse! if direction == :down + + instrumentation = Instrumentation.new(result_dir: result_dir) + + sorted_migrations.each do |migration| + instrumentation.observe(version: migration.version, name: migration.name) do + ActiveRecord::Migrator.new(direction, migration_context.migrations, migration_context.schema_migration, migration.version).run + end + end + ensure + if instrumentation + File.open(File.join(result_dir, Gitlab::Database::Migrations::Instrumentation::STATS_FILENAME), 'wb+') do |io| + io << instrumentation.observations.to_json + end + end + + # We clear the cache here to mirror the cache clearing that happens at the end of `db:migrate` tasks + # This clearing makes subsequent rake tasks in the same execution pick up database schema changes caused by + # the migrations that were just executed + ApplicationRecord.clear_cache! + ActiveRecord::Migration.verbose = verbose_was + end + end + end + end +end diff --git a/lib/gitlab/database/partitioning.rb b/lib/gitlab/database/partitioning.rb index bbde2063c41..71fb995577a 100644 --- a/lib/gitlab/database/partitioning.rb +++ b/lib/gitlab/database/partitioning.rb @@ -14,6 +14,10 @@ module Gitlab def self.sync_partitions(models_to_sync = registered_models) MultiDatabasePartitionManager.new(models_to_sync).sync_partitions end + + def self.drop_detached_partitions + MultiDatabasePartitionDropper.new.drop_detached_partitions + end end end end diff --git a/lib/gitlab/database/partitioning/detached_partition_dropper.rb b/lib/gitlab/database/partitioning/detached_partition_dropper.rb index dc63d93fd07..3e7ddece20b 100644 --- a/lib/gitlab/database/partitioning/detached_partition_dropper.rb +++ b/lib/gitlab/database/partitioning/detached_partition_dropper.rb @@ -7,18 +7,15 @@ module Gitlab return unless Feature.enabled?(:drop_detached_partitions, default_enabled: :yaml) Gitlab::AppLogger.info(message: "Checking for previously detached partitions to drop") + Postgresql::DetachedPartition.ready_to_drop.find_each do |detached_partition| - conn.transaction do + connection.transaction do # Another process may have already dropped the table and deleted this entry next unless (detached_partition = Postgresql::DetachedPartition.lock.find_by(id: detached_partition.id)) - unless check_partition_detached?(detached_partition) - Gitlab::AppLogger.error(message: "Attempt to drop attached database partition", partition_name: detached_partition.table_name) - detached_partition.destroy! - next - end + drop_detached_partition(detached_partition.table_name) - drop_one(detached_partition) + detached_partition.destroy! end rescue StandardError => e Gitlab::AppLogger.error(message: "Failed to drop previously detached partition", @@ -30,25 +27,30 @@ module Gitlab private - def drop_one(detached_partition) - conn.transaction do - conn.execute(<<~SQL) - DROP TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{conn.quote_table_name(detached_partition.table_name)} - SQL + def drop_detached_partition(partition_name) + partition_identifier = qualify_partition_name(partition_name) + + if partition_detached?(partition_identifier) + connection.drop_table(partition_identifier, if_exists: true) - detached_partition.destroy! + Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: partition_name) + else + Gitlab::AppLogger.error(message: "Attempt to drop attached database partition", partition_name: partition_name) end - Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: detached_partition.table_name) end - def check_partition_detached?(detached_partition) + def qualify_partition_name(table_name) + "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{table_name}" + end + + def partition_detached?(partition_identifier) # PostgresPartition checks the pg_inherits view, so our partition will only show here if it's still attached # and thus should not be dropped - !PostgresPartition.for_identifier("#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{detached_partition.table_name}").exists? + !Gitlab::Database::PostgresPartition.for_identifier(partition_identifier).exists? end - def conn - @conn ||= ApplicationRecord.connection + def connection + Postgresql::DetachedPartition.connection end end end diff --git a/lib/gitlab/database/partitioning/multi_database_partition_dropper.rb b/lib/gitlab/database/partitioning/multi_database_partition_dropper.rb new file mode 100644 index 00000000000..769b658bae4 --- /dev/null +++ b/lib/gitlab/database/partitioning/multi_database_partition_dropper.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Partitioning + class MultiDatabasePartitionDropper + def drop_detached_partitions + Gitlab::AppLogger.info(message: "Dropping detached postgres partitions") + + each_database_connection do |name, connection| + Gitlab::Database::SharedModel.using_connection(connection) do + Gitlab::AppLogger.debug(message: "Switched database connection", connection_name: name) + + DetachedPartitionDropper.new.perform + end + end + + Gitlab::AppLogger.info(message: "Finished dropping detached postgres partitions") + end + + private + + def each_database_connection + databases.each_pair do |name, connection_wrapper| + yield name, connection_wrapper.scope.connection + end + end + + def databases + Gitlab::Database.databases + end + end + end + end +end diff --git a/lib/gitlab/database/shared_model.rb b/lib/gitlab/database/shared_model.rb index 8f256758961..f304c32d731 100644 --- a/lib/gitlab/database/shared_model.rb +++ b/lib/gitlab/database/shared_model.rb @@ -2,6 +2,7 @@ module Gitlab module Database + # This abstract class is used for models which need to exist in multiple de-composed databases. class SharedModel < ActiveRecord::Base self.abstract_class = true |