Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2021-10-20 11:43:02 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2021-10-20 11:43:02 +0300
commitd9ab72d6080f594d0b3cae15f14b3ef2c6c638cb (patch)
tree2341ef426af70ad1e289c38036737e04b0aa5007 /lib/gitlab/database
parentd6e514dd13db8947884cd58fe2a9c2a063400a9b (diff)
Add latest changes from gitlab-org/gitlab@14-4-stable-eev14.4.0-rc42
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/count.rb12
-rw-r--r--lib/gitlab/database/load_balancing.rb82
-rw-r--r--lib/gitlab/database/load_balancing/action_cable_callbacks.rb2
-rw-r--r--lib/gitlab/database/load_balancing/active_record_proxy.rb15
-rw-r--r--lib/gitlab/database/load_balancing/configuration.rb7
-rw-r--r--lib/gitlab/database/load_balancing/host.rb19
-rw-r--r--lib/gitlab/database/load_balancing/load_balancer.rb34
-rw-r--r--lib/gitlab/database/load_balancing/primary_host.rb24
-rw-r--r--lib/gitlab/database/load_balancing/rack_middleware.rb48
-rw-r--r--lib/gitlab/database/load_balancing/setup.rb61
-rw-r--r--lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb30
-rw-r--r--lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb33
-rw-r--r--lib/gitlab/database/load_balancing/sticking.rb118
-rw-r--r--lib/gitlab/database/migrations/background_migration_helpers.rb2
-rw-r--r--lib/gitlab/database/migrations/instrumentation.rb6
-rw-r--r--lib/gitlab/database/migrations/observers/migration_observer.rb5
-rw-r--r--lib/gitlab/database/migrations/observers/query_details.rb2
-rw-r--r--lib/gitlab/database/migrations/observers/query_log.rb2
-rw-r--r--lib/gitlab/database/migrations/runner.rb92
-rw-r--r--lib/gitlab/database/partitioning.rb4
-rw-r--r--lib/gitlab/database/partitioning/detached_partition_dropper.rb38
-rw-r--r--lib/gitlab/database/partitioning/multi_database_partition_dropper.rb35
-rw-r--r--lib/gitlab/database/shared_model.rb1
23 files changed, 412 insertions, 260 deletions
diff --git a/lib/gitlab/database/count.rb b/lib/gitlab/database/count.rb
index eac61254bdf..ce61c1ba9ad 100644
--- a/lib/gitlab/database/count.rb
+++ b/lib/gitlab/database/count.rb
@@ -35,7 +35,17 @@ module Gitlab
#
# @param [Array]
# @return [Hash] of Model -> count mapping
- def self.approximate_counts(models, strategies: [TablesampleCountStrategy, ReltuplesCountStrategy, ExactCountStrategy])
+ def self.approximate_counts(models, strategies: [])
+ if strategies.empty?
+ # ExactCountStrategy is the only strategy working on read-only DBs, as others make
+ # use of tuple stats which use the primary DB to estimate tables size in a transaction.
+ strategies = if ::Gitlab::Database.read_write?
+ [TablesampleCountStrategy, ReltuplesCountStrategy, ExactCountStrategy]
+ else
+ [ExactCountStrategy]
+ end
+ end
+
strategies.each_with_object({}) do |strategy, counts_by_model|
models_with_missing_counts = models - counts_by_model.keys
diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb
index bbfbf83222f..3e322e752b7 100644
--- a/lib/gitlab/database/load_balancing.rb
+++ b/lib/gitlab/database/load_balancing.rb
@@ -4,72 +4,34 @@ module Gitlab
module Database
module LoadBalancing
# The exceptions raised for connection errors.
- CONNECTION_ERRORS = if defined?(PG)
- [
- PG::ConnectionBad,
- PG::ConnectionDoesNotExist,
- PG::ConnectionException,
- PG::ConnectionFailure,
- PG::UnableToSend,
- # During a failover this error may be raised when
- # writing to a primary.
- PG::ReadOnlySqlTransaction
- ].freeze
- else
- [].freeze
- end
-
- ProxyNotConfiguredError = Class.new(StandardError)
-
- # The connection proxy to use for load balancing (if enabled).
- def self.proxy
- unless load_balancing_proxy = ActiveRecord::Base.load_balancing_proxy
- Gitlab::ErrorTracking.track_exception(
- ProxyNotConfiguredError.new(
- "Attempting to access the database load balancing proxy, but it wasn't configured.\n" \
- "Did you forget to call '#{self.name}.configure_proxy'?"
- ))
- end
-
- load_balancing_proxy
- end
-
- # Returns a Hash containing the load balancing configuration.
- def self.configuration
- @configuration ||= Configuration.for_model(ActiveRecord::Base)
- end
-
- # Returns true if load balancing is to be enabled.
- def self.enable?
- return false if Gitlab::Runtime.rake?
-
- configured?
- end
+ CONNECTION_ERRORS = [
+ PG::ConnectionBad,
+ PG::ConnectionDoesNotExist,
+ PG::ConnectionException,
+ PG::ConnectionFailure,
+ PG::UnableToSend,
+ # During a failover this error may be raised when
+ # writing to a primary.
+ PG::ReadOnlySqlTransaction,
+ # This error is raised when we can't connect to the database in the
+ # first place (e.g. it's offline or the hostname is incorrect).
+ ActiveRecord::ConnectionNotEstablished
+ ].freeze
- def self.configured?
- configuration.load_balancing_enabled? ||
- configuration.service_discovery_enabled?
+ def self.base_models
+ @base_models ||= ::Gitlab::Database.database_base_models.values.freeze
end
- def self.start_service_discovery
- return unless configuration.service_discovery_enabled?
+ def self.each_load_balancer
+ return to_enum(__method__) unless block_given?
- ServiceDiscovery
- .new(proxy.load_balancer, **configuration.service_discovery)
- .start
+ base_models.each do |model|
+ yield model.connection.load_balancer
+ end
end
- # Configures proxying of requests.
- def self.configure_proxy
- lb = LoadBalancer.new(configuration, primary_only: !enable?)
- ActiveRecord::Base.load_balancing_proxy = ConnectionProxy.new(lb)
-
- # Populate service discovery immediately if it is configured
- if configuration.service_discovery_enabled?
- ServiceDiscovery
- .new(lb, **configuration.service_discovery)
- .perform_service_discovery
- end
+ def self.release_hosts
+ each_load_balancer(&:release_host)
end
DB_ROLES = [
diff --git a/lib/gitlab/database/load_balancing/action_cable_callbacks.rb b/lib/gitlab/database/load_balancing/action_cable_callbacks.rb
index 4feba989a0a..7164976ff73 100644
--- a/lib/gitlab/database/load_balancing/action_cable_callbacks.rb
+++ b/lib/gitlab/database/load_balancing/action_cable_callbacks.rb
@@ -16,7 +16,7 @@ module Gitlab
inner.call
ensure
- ::Gitlab::Database::LoadBalancing.proxy.load_balancer.release_host
+ ::Gitlab::Database::LoadBalancing.release_hosts
::Gitlab::Database::LoadBalancing::Session.clear_session
end
end
diff --git a/lib/gitlab/database/load_balancing/active_record_proxy.rb b/lib/gitlab/database/load_balancing/active_record_proxy.rb
deleted file mode 100644
index deaea62d774..00000000000
--- a/lib/gitlab/database/load_balancing/active_record_proxy.rb
+++ /dev/null
@@ -1,15 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- module LoadBalancing
- # Module injected into ActiveRecord::Base to allow hijacking of the
- # "connection" method.
- module ActiveRecordProxy
- def connection
- ::Gitlab::Database::LoadBalancing.proxy
- end
- end
- end
- end
-end
diff --git a/lib/gitlab/database/load_balancing/configuration.rb b/lib/gitlab/database/load_balancing/configuration.rb
index 238f55fd98e..6156515bd73 100644
--- a/lib/gitlab/database/load_balancing/configuration.rb
+++ b/lib/gitlab/database/load_balancing/configuration.rb
@@ -72,7 +72,14 @@ module Gitlab
Database.default_pool_size
end
+ # Returns `true` if the use of load balancing replicas should be
+ # enabled.
+ #
+ # This is disabled for Rake tasks to ensure e.g. database migrations
+ # always produce consistent results.
def load_balancing_enabled?
+ return false if Gitlab::Runtime.rake?
+
hosts.any? || service_discovery_enabled?
end
diff --git a/lib/gitlab/database/load_balancing/host.rb b/lib/gitlab/database/load_balancing/host.rb
index acd7df0a263..bdbb80d6f31 100644
--- a/lib/gitlab/database/load_balancing/host.rb
+++ b/lib/gitlab/database/load_balancing/host.rb
@@ -9,19 +9,12 @@ module Gitlab
delegate :connection, :release_connection, :enable_query_cache!, :disable_query_cache!, :query_cache_enabled, to: :pool
- CONNECTION_ERRORS =
- if defined?(PG)
- [
- ActionView::Template::Error,
- ActiveRecord::StatementInvalid,
- PG::Error
- ].freeze
- else
- [
- ActionView::Template::Error,
- ActiveRecord::StatementInvalid
- ].freeze
- end
+ CONNECTION_ERRORS = [
+ ActionView::Template::Error,
+ ActiveRecord::StatementInvalid,
+ ActiveRecord::ConnectionNotEstablished,
+ PG::Error
+ ].freeze
# host - The address of the database.
# load_balancer - The LoadBalancer that manages this Host.
diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb
index 9b00b323301..cc9ca325337 100644
--- a/lib/gitlab/database/load_balancing/load_balancer.rb
+++ b/lib/gitlab/database/load_balancing/load_balancer.rb
@@ -12,22 +12,26 @@ module Gitlab
REPLICA_SUFFIX = '_replica'
- attr_reader :host_list, :configuration
+ attr_reader :name, :host_list, :configuration
# configuration - An instance of `LoadBalancing::Configuration` that
# contains the configuration details (such as the hosts)
# for this load balancer.
- # primary_only - If set, the replicas are ignored and the primary is
- # always used.
- def initialize(configuration, primary_only: false)
+ def initialize(configuration)
@configuration = configuration
- @primary_only = primary_only
+ @primary_only = !configuration.load_balancing_enabled?
@host_list =
- if primary_only
+ if @primary_only
HostList.new([PrimaryHost.new(self)])
else
HostList.new(configuration.hosts.map { |addr| Host.new(addr, self) })
end
+
+ @name = @configuration.model.connection_db_config.name.to_sym
+ end
+
+ def primary_only?
+ @primary_only
end
def disconnect!(timeout: 120)
@@ -151,6 +155,17 @@ module Gitlab
# Yields a block, retrying it upon error using an exponential backoff.
def retry_with_backoff(retries = 3, time = 2)
+ # In CI we only use the primary, but databases may not always be
+ # available (or take a few seconds to become available). Retrying in
+ # this case can slow down CI jobs. In addition, retrying with _only_
+ # a primary being present isn't all that helpful.
+ #
+ # To prevent this from happening, we don't make any attempt at
+ # retrying unless one or more replicas are used. This matches the
+ # behaviour from before we enabled load balancing code even if no
+ # replicas were configured.
+ return yield if primary_only?
+
retried = 0
last_error = nil
@@ -176,6 +191,11 @@ module Gitlab
def connection_error?(error)
case error
+ when ActiveRecord::NoDatabaseError
+ # Retrying this error isn't going to magically make the database
+ # appear. It also slows down CI jobs that are meant to create the
+ # database in the first place.
+ false
when ActiveRecord::StatementInvalid, ActionView::Template::Error
# After connecting to the DB Rails will wrap query errors using this
# class.
@@ -235,7 +255,7 @@ module Gitlab
@configuration.model.connection_specification_name,
role: ActiveRecord::Base.writing_role,
shard: ActiveRecord::Base.default_shard
- )
+ ) || raise(::ActiveRecord::ConnectionNotEstablished)
end
private
diff --git a/lib/gitlab/database/load_balancing/primary_host.rb b/lib/gitlab/database/load_balancing/primary_host.rb
index e379652c260..7070cc54d4b 100644
--- a/lib/gitlab/database/load_balancing/primary_host.rb
+++ b/lib/gitlab/database/load_balancing/primary_host.rb
@@ -11,6 +11,12 @@ module Gitlab
# balancing is enabled, but no replicas have been configured (= the
# default case).
class PrimaryHost
+ WAL_ERROR_MESSAGE = <<~MSG.strip
+ Obtaining WAL information when not using any replicas results in
+ redundant queries, and may break installations that don't support
+ streaming replication (e.g. AWS' Aurora database).
+ MSG
+
def initialize(load_balancer)
@load_balancer = load_balancer
end
@@ -51,30 +57,16 @@ module Gitlab
end
def primary_write_location
- @load_balancer.primary_write_location
+ raise NotImplementedError, WAL_ERROR_MESSAGE
end
def database_replica_location
- row = query_and_release(<<-SQL.squish)
- SELECT pg_last_wal_replay_lsn()::text AS location
- SQL
-
- row['location'] if row.any?
- rescue *Host::CONNECTION_ERRORS
- nil
+ raise NotImplementedError, WAL_ERROR_MESSAGE
end
def caught_up?(_location)
true
end
-
- def query_and_release(sql)
- connection.select_all(sql).first || {}
- rescue StandardError
- {}
- ensure
- release_connection
- end
end
end
end
diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb
index f8a31622b7d..7ce7649cc22 100644
--- a/lib/gitlab/database/load_balancing/rack_middleware.rb
+++ b/lib/gitlab/database/load_balancing/rack_middleware.rb
@@ -9,23 +9,6 @@ module Gitlab
class RackMiddleware
STICK_OBJECT = 'load_balancing.stick_object'
- # Unsticks or continues sticking the current request.
- #
- # This method also updates the Rack environment so #call can later
- # determine if we still need to stick or not.
- #
- # env - The Rack environment.
- # namespace - The namespace to use for sticking.
- # id - The identifier to use for sticking.
- def self.stick_or_unstick(env, namespace, id)
- return unless ::Gitlab::Database::LoadBalancing.enable?
-
- ::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(namespace, id)
-
- env[STICK_OBJECT] ||= Set.new
- env[STICK_OBJECT] << [namespace, id]
- end
-
def initialize(app)
@app = app
end
@@ -53,41 +36,46 @@ module Gitlab
# Typically this code will only be reachable for Rails requests as
# Grape data is not yet available at this point.
def unstick_or_continue_sticking(env)
- namespaces_and_ids = sticking_namespaces_and_ids(env)
+ namespaces_and_ids = sticking_namespaces(env)
- namespaces_and_ids.each do |namespace, id|
- ::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(namespace, id)
+ namespaces_and_ids.each do |(model, namespace, id)|
+ model.sticking.unstick_or_continue_sticking(namespace, id)
end
end
# Determine if we need to stick after handling a request.
def stick_if_necessary(env)
- namespaces_and_ids = sticking_namespaces_and_ids(env)
+ namespaces_and_ids = sticking_namespaces(env)
- namespaces_and_ids.each do |namespace, id|
- ::Gitlab::Database::LoadBalancing::Sticking.stick_if_necessary(namespace, id)
+ namespaces_and_ids.each do |model, namespace, id|
+ model.sticking.stick_if_necessary(namespace, id)
end
end
def clear
- load_balancer.release_host
+ ::Gitlab::Database::LoadBalancing.release_hosts
::Gitlab::Database::LoadBalancing::Session.clear_session
end
- def load_balancer
- ::Gitlab::Database::LoadBalancing.proxy.load_balancer
- end
-
# Determines the sticking namespace and identifier based on the Rack
# environment.
#
# For Rails requests this uses warden, but Grape and others have to
# manually set the right environment variable.
- def sticking_namespaces_and_ids(env)
+ def sticking_namespaces(env)
warden = env['warden']
if warden && warden.user
- [[:user, warden.user.id]]
+ # When sticking per user, _only_ sticking the main connection could
+ # result in the application trying to read data from a different
+ # connection, while that data isn't available yet.
+ #
+ # To prevent this from happening, we scope sticking to all the
+ # models that support load balancing. In the future (if we
+ # determined this to be OK) we may be able to relax this.
+ ::Gitlab::Database::LoadBalancing.base_models.map do |model|
+ [model, :user, warden.user.id]
+ end
elsif env[STICK_OBJECT].present?
env[STICK_OBJECT].to_a
else
diff --git a/lib/gitlab/database/load_balancing/setup.rb b/lib/gitlab/database/load_balancing/setup.rb
new file mode 100644
index 00000000000..3cce839a960
--- /dev/null
+++ b/lib/gitlab/database/load_balancing/setup.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module LoadBalancing
+ # Class for setting up load balancing of a specific model.
+ class Setup
+ attr_reader :configuration
+
+ def initialize(model, start_service_discovery: false)
+ @model = model
+ @configuration = Configuration.for_model(model)
+ @start_service_discovery = start_service_discovery
+ end
+
+ def setup
+ disable_prepared_statements
+ setup_load_balancer
+ setup_service_discovery
+ end
+
+ def disable_prepared_statements
+ db_config_object = @model.connection_db_config
+ config =
+ db_config_object.configuration_hash.merge(prepared_statements: false)
+
+ hash_config = ActiveRecord::DatabaseConfigurations::HashConfig.new(
+ db_config_object.env_name,
+ db_config_object.name,
+ config
+ )
+
+ @model.establish_connection(hash_config)
+ end
+
+ def setup_load_balancer
+ lb = LoadBalancer.new(configuration)
+
+ # We just use a simple `class_attribute` here so we don't need to
+ # inject any modules and/or expose unnecessary methods.
+ @model.class_attribute(:connection)
+ @model.class_attribute(:sticking)
+
+ @model.connection = ConnectionProxy.new(lb)
+ @model.sticking = Sticking.new(lb)
+ end
+
+ def setup_service_discovery
+ return unless configuration.service_discovery_enabled?
+
+ lb = @model.connection.load_balancer
+ sv = ServiceDiscovery.new(lb, **configuration.service_discovery)
+
+ sv.perform_service_discovery
+
+ sv.start if @start_service_discovery
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
index 518a812b406..62dfe75a851 100644
--- a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
+++ b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
@@ -30,26 +30,26 @@ module Gitlab
end
def set_data_consistency_locations!(job)
- # Once we add support for multiple databases to our load balancer, we would use something like this:
- # job['wal_locations'] = Gitlab::Database::DATABASES.transform_values do |connection|
- # connection.load_balancer.primary_write_location
- # end
- #
- job['wal_locations'] = { Gitlab::Database::MAIN_DATABASE_NAME.to_sym => wal_location } if wal_location
- end
+ locations = {}
- def wal_location
- strong_memoize(:wal_location) do
- if Session.current.use_primary?
- load_balancer.primary_write_location
- else
- load_balancer.host.database_replica_location
+ ::Gitlab::Database::LoadBalancing.each_load_balancer do |lb|
+ if (location = wal_location_for(lb))
+ locations[lb.name] = location
end
end
+
+ job['wal_locations'] = locations
end
- def load_balancer
- LoadBalancing.proxy.load_balancer
+ def wal_location_for(load_balancer)
+ # When only using the primary there's no need for any WAL queries.
+ return if load_balancer.primary_only?
+
+ if ::Gitlab::Database::LoadBalancing::Session.current.use_primary?
+ load_balancer.primary_write_location
+ else
+ load_balancer.host.database_replica_location
+ end
end
end
end
diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
index 15f8f0fb240..f0c7016032b 100644
--- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
+++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
@@ -29,7 +29,7 @@ module Gitlab
private
def clear
- release_hosts
+ LoadBalancing.release_hosts
Session.clear_session
end
@@ -44,7 +44,7 @@ module Gitlab
return :primary_no_wal unless wal_locations
- if all_databases_has_replica_caught_up?(wal_locations)
+ if databases_in_sync?(wal_locations)
# Happy case: we can read from a replica.
retried_before?(worker_class, job) ? :replica_retried : :replica
elsif can_retry?(worker_class, job)
@@ -89,27 +89,18 @@ module Gitlab
job['retry_count'].nil?
end
- def all_databases_has_replica_caught_up?(wal_locations)
- wal_locations.all? do |_config_name, location|
- # Once we add support for multiple databases to our load balancer, we would use something like this:
- # Gitlab::Database::DATABASES[config_name].load_balancer.select_up_to_date_host(location)
- load_balancer.select_up_to_date_host(location)
+ def databases_in_sync?(wal_locations)
+ LoadBalancing.each_load_balancer.all? do |lb|
+ if (location = wal_locations[lb.name])
+ lb.select_up_to_date_host(location)
+ else
+ # If there's no entry for a load balancer it means the Sidekiq
+ # job doesn't care for it. In this case we'll treat the load
+ # balancer as being in sync.
+ true
+ end
end
end
-
- def release_hosts
- # Once we add support for multiple databases to our load balancer, we would use something like this:
- # connection.load_balancer.primary_write_location
- #
- # Gitlab::Database::DATABASES.values.each do |connection|
- # connection.load_balancer.release_host
- # end
- load_balancer.release_host
- end
-
- def load_balancer
- LoadBalancing.proxy.load_balancer
- end
end
end
end
diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb
index 20d42b9a694..df4ad18581f 100644
--- a/lib/gitlab/database/load_balancing/sticking.rb
+++ b/lib/gitlab/database/load_balancing/sticking.rb
@@ -5,36 +5,47 @@ module Gitlab
module LoadBalancing
# Module used for handling sticking connections to a primary, if
# necessary.
- #
- # ## Examples
- #
- # Sticking a user to the primary:
- #
- # Sticking.stick_if_necessary(:user, current_user.id)
- #
- # To unstick if possible, or continue using the primary otherwise:
- #
- # Sticking.unstick_or_continue_sticking(:user, current_user.id)
- module Sticking
+ class Sticking
# The number of seconds after which a session should stop reading from
# the primary.
EXPIRATION = 30
- # Sticks to the primary if a write was performed.
- def self.stick_if_necessary(namespace, id)
- return unless LoadBalancing.enable?
+ def initialize(load_balancer)
+ @load_balancer = load_balancer
+ @model = load_balancer.configuration.model
+ end
- stick(namespace, id) if Session.current.performed_write?
+ # Unsticks or continues sticking the current request.
+ #
+ # This method also updates the Rack environment so #call can later
+ # determine if we still need to stick or not.
+ #
+ # env - The Rack environment.
+ # namespace - The namespace to use for sticking.
+ # id - The identifier to use for sticking.
+ # model - The ActiveRecord model to scope sticking to.
+ def stick_or_unstick_request(env, namespace, id)
+ unstick_or_continue_sticking(namespace, id)
+
+ env[RackMiddleware::STICK_OBJECT] ||= Set.new
+ env[RackMiddleware::STICK_OBJECT] << [@model, namespace, id]
+ end
+
+ # Sticks to the primary if a write was performed.
+ def stick_if_necessary(namespace, id)
+ stick(namespace, id) if ::Gitlab::Database::LoadBalancing::Session.current.performed_write?
end
- # Checks if we are caught-up with all the work
- def self.all_caught_up?(namespace, id)
+ def all_caught_up?(namespace, id)
location = last_write_location_for(namespace, id)
return true unless location
- load_balancer.select_up_to_date_host(location).tap do |found|
- ActiveSupport::Notifications.instrument('caught_up_replica_pick.load_balancing', { result: found } )
+ @load_balancer.select_up_to_date_host(location).tap do |found|
+ ActiveSupport::Notifications.instrument(
+ 'caught_up_replica_pick.load_balancing',
+ { result: found }
+ )
unstick(namespace, id) if found
end
@@ -45,7 +56,7 @@ module Gitlab
# in another thread.
#
# Returns true if one host was selected.
- def self.select_caught_up_replicas(namespace, id)
+ def select_caught_up_replicas(namespace, id)
location = last_write_location_for(namespace, id)
# Unlike all_caught_up?, we return false if no write location exists.
@@ -53,95 +64,92 @@ module Gitlab
# write location. If no such location exists, err on the side of caution.
return false unless location
- load_balancer.select_up_to_date_host(location).tap do |selected|
+ @load_balancer.select_up_to_date_host(location).tap do |selected|
unstick(namespace, id) if selected
end
end
# Sticks to the primary if necessary, otherwise unsticks an object (if
# it was previously stuck to the primary).
- def self.unstick_or_continue_sticking(namespace, id)
- Session.current.use_primary! unless all_caught_up?(namespace, id)
+ def unstick_or_continue_sticking(namespace, id)
+ return if all_caught_up?(namespace, id)
+
+ ::Gitlab::Database::LoadBalancing::Session.current.use_primary!
end
# Select a replica that has caught up with the primary. If one has not been
# found, stick to the primary.
- def self.select_valid_host(namespace, id)
- replica_selected = select_caught_up_replicas(namespace, id)
+ def select_valid_host(namespace, id)
+ replica_selected =
+ select_caught_up_replicas(namespace, id)
- Session.current.use_primary! unless replica_selected
+ ::Gitlab::Database::LoadBalancing::Session.current.use_primary! unless replica_selected
end
# Starts sticking to the primary for the given namespace and id, using
# the latest WAL pointer from the primary.
- def self.stick(namespace, id)
- return unless LoadBalancing.enable?
-
+ def stick(namespace, id)
mark_primary_write_location(namespace, id)
- Session.current.use_primary!
+ ::Gitlab::Database::LoadBalancing::Session.current.use_primary!
end
- def self.bulk_stick(namespace, ids)
- return unless LoadBalancing.enable?
-
+ def bulk_stick(namespace, ids)
with_primary_write_location do |location|
ids.each do |id|
set_write_location_for(namespace, id, location)
end
end
- Session.current.use_primary!
+ ::Gitlab::Database::LoadBalancing::Session.current.use_primary!
end
- def self.with_primary_write_location
- return unless LoadBalancing.configured?
+ def with_primary_write_location
+ # When only using the primary, there's no point in getting write
+ # locations, as the primary is always in sync with itself.
+ return if @load_balancer.primary_only?
- # Load balancing could be enabled for the Web application server,
- # but it's not activated for Sidekiq. We should update Redis with
- # the write location just in case load balancing is being used.
- location =
- if LoadBalancing.enable?
- load_balancer.primary_write_location
- else
- Gitlab::Database.main.get_write_location(ActiveRecord::Base.connection)
- end
+ location = @load_balancer.primary_write_location
return if location.blank?
yield(location)
end
- def self.mark_primary_write_location(namespace, id)
+ def mark_primary_write_location(namespace, id)
with_primary_write_location do |location|
set_write_location_for(namespace, id, location)
end
end
- # Stops sticking to the primary.
- def self.unstick(namespace, id)
+ def unstick(namespace, id)
Gitlab::Redis::SharedState.with do |redis|
redis.del(redis_key_for(namespace, id))
+ redis.del(old_redis_key_for(namespace, id))
end
end
- def self.set_write_location_for(namespace, id, location)
+ def set_write_location_for(namespace, id, location)
Gitlab::Redis::SharedState.with do |redis|
redis.set(redis_key_for(namespace, id), location, ex: EXPIRATION)
+ redis.set(old_redis_key_for(namespace, id), location, ex: EXPIRATION)
end
end
- def self.last_write_location_for(namespace, id)
+ def last_write_location_for(namespace, id)
Gitlab::Redis::SharedState.with do |redis|
- redis.get(redis_key_for(namespace, id))
+ redis.get(redis_key_for(namespace, id)) ||
+ redis.get(old_redis_key_for(namespace, id))
end
end
- def self.redis_key_for(namespace, id)
- "database-load-balancing/write-location/#{namespace}/#{id}"
+ def redis_key_for(namespace, id)
+ name = @load_balancer.name
+
+ "database-load-balancing/write-location/#{name}/#{namespace}/#{id}"
end
- def self.load_balancer
- LoadBalancing.proxy.load_balancer
+ def old_redis_key_for(namespace, id)
+ "database-load-balancing/write-location/#{namespace}/#{id}"
end
end
end
diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb
index 19d80ba1d64..bdaf0d35a83 100644
--- a/lib/gitlab/database/migrations/background_migration_helpers.rb
+++ b/lib/gitlab/database/migrations/background_migration_helpers.rb
@@ -106,7 +106,7 @@ module Gitlab
final_delay = 0
batch_counter = 0
- model_class.each_batch(of: batch_size) do |relation, index|
+ model_class.each_batch(of: batch_size, column: primary_column_name) do |relation, index|
max = relation.arel_table[primary_column_name].maximum
min = relation.arel_table[primary_column_name].minimum
diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb
index d1e55eb825c..6e5ffb74411 100644
--- a/lib/gitlab/database/migrations/instrumentation.rb
+++ b/lib/gitlab/database/migrations/instrumentation.rb
@@ -4,21 +4,21 @@ module Gitlab
module Database
module Migrations
class Instrumentation
- RESULT_DIR = Rails.root.join('tmp', 'migration-testing').freeze
STATS_FILENAME = 'migration-stats.json'
attr_reader :observations
- def initialize(observer_classes = ::Gitlab::Database::Migrations::Observers.all_observers)
+ def initialize(result_dir:, observer_classes: ::Gitlab::Database::Migrations::Observers.all_observers)
@observer_classes = observer_classes
@observations = []
+ @result_dir = result_dir
end
def observe(version:, name:, &block)
observation = Observation.new(version, name)
observation.success = true
- observers = observer_classes.map { |c| c.new(observation) }
+ observers = observer_classes.map { |c| c.new(observation, @result_dir) }
exception = nil
diff --git a/lib/gitlab/database/migrations/observers/migration_observer.rb b/lib/gitlab/database/migrations/observers/migration_observer.rb
index 85d18abb9ef..106f8f1f829 100644
--- a/lib/gitlab/database/migrations/observers/migration_observer.rb
+++ b/lib/gitlab/database/migrations/observers/migration_observer.rb
@@ -5,11 +5,12 @@ module Gitlab
module Migrations
module Observers
class MigrationObserver
- attr_reader :connection, :observation
+ attr_reader :connection, :observation, :output_dir
- def initialize(observation)
+ def initialize(observation, output_dir)
@connection = ActiveRecord::Base.connection
@observation = observation
+ @output_dir = output_dir
end
def before
diff --git a/lib/gitlab/database/migrations/observers/query_details.rb b/lib/gitlab/database/migrations/observers/query_details.rb
index dadacd2d2fc..8f4406e79a5 100644
--- a/lib/gitlab/database/migrations/observers/query_details.rb
+++ b/lib/gitlab/database/migrations/observers/query_details.rb
@@ -6,7 +6,7 @@ module Gitlab
module Observers
class QueryDetails < MigrationObserver
def before
- file_path = File.join(Instrumentation::RESULT_DIR, "#{observation.version}_#{observation.name}-query-details.json")
+ file_path = File.join(output_dir, "#{observation.version}_#{observation.name}-query-details.json")
@file = File.open(file_path, 'wb')
@writer = Oj::StreamWriter.new(@file, {})
@writer.push_array
diff --git a/lib/gitlab/database/migrations/observers/query_log.rb b/lib/gitlab/database/migrations/observers/query_log.rb
index e15d733d2a2..c42fd8bd23d 100644
--- a/lib/gitlab/database/migrations/observers/query_log.rb
+++ b/lib/gitlab/database/migrations/observers/query_log.rb
@@ -7,7 +7,7 @@ module Gitlab
class QueryLog < MigrationObserver
def before
@logger_was = ActiveRecord::Base.logger
- file_path = File.join(Instrumentation::RESULT_DIR, "#{observation.version}_#{observation.name}.log")
+ file_path = File.join(output_dir, "#{observation.version}_#{observation.name}.log")
@logger = Logger.new(file_path)
ActiveRecord::Base.logger = @logger
end
diff --git a/lib/gitlab/database/migrations/runner.rb b/lib/gitlab/database/migrations/runner.rb
new file mode 100644
index 00000000000..b267a64256b
--- /dev/null
+++ b/lib/gitlab/database/migrations/runner.rb
@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Migrations
+ class Runner
+ BASE_RESULT_DIR = Rails.root.join('tmp', 'migration-testing').freeze
+
+ class << self
+ def up
+ Runner.new(direction: :up, migrations: migrations_for_up, result_dir: BASE_RESULT_DIR.join('up'))
+ end
+
+ def down
+ Runner.new(direction: :down, migrations: migrations_for_down, result_dir: BASE_RESULT_DIR.join('down'))
+ end
+
+ def migration_context
+ @migration_context ||= ApplicationRecord.connection.migration_context
+ end
+
+ private
+
+ def migrations_for_up
+ existing_versions = migration_context.get_all_versions.to_set
+
+ migration_context.migrations.reject do |migration|
+ existing_versions.include?(migration.version)
+ end
+ end
+
+ def migration_file_names_this_branch
+ `git diff --name-only origin/HEAD...HEAD db/post_migrate db/migrate`.split("\n")
+ end
+
+ def migrations_for_down
+ versions_this_branch = migration_file_names_this_branch.map do |m_name|
+ m_name.match(%r{^db/(post_)?migrate/(\d+)}) { |m| m.captures[1]&.to_i }
+ end.to_set
+
+ existing_versions = migration_context.get_all_versions.to_set
+ migration_context.migrations.select do |migration|
+ existing_versions.include?(migration.version) && versions_this_branch.include?(migration.version)
+ end
+ end
+ end
+
+ attr_reader :direction, :result_dir, :migrations
+
+ delegate :migration_context, to: :class
+
+ def initialize(direction:, migrations:, result_dir:)
+ raise "Direction must be up or down" unless %i[up down].include?(direction)
+
+ @direction = direction
+ @migrations = migrations
+ @result_dir = result_dir
+ end
+
+ def run
+ FileUtils.mkdir_p(result_dir)
+
+ verbose_was = ActiveRecord::Migration.verbose
+ ActiveRecord::Migration.verbose = true
+
+ sorted_migrations = migrations.sort_by(&:version)
+ sorted_migrations.reverse! if direction == :down
+
+ instrumentation = Instrumentation.new(result_dir: result_dir)
+
+ sorted_migrations.each do |migration|
+ instrumentation.observe(version: migration.version, name: migration.name) do
+ ActiveRecord::Migrator.new(direction, migration_context.migrations, migration_context.schema_migration, migration.version).run
+ end
+ end
+ ensure
+ if instrumentation
+ File.open(File.join(result_dir, Gitlab::Database::Migrations::Instrumentation::STATS_FILENAME), 'wb+') do |io|
+ io << instrumentation.observations.to_json
+ end
+ end
+
+ # We clear the cache here to mirror the cache clearing that happens at the end of `db:migrate` tasks
+ # This clearing makes subsequent rake tasks in the same execution pick up database schema changes caused by
+ # the migrations that were just executed
+ ApplicationRecord.clear_cache!
+ ActiveRecord::Migration.verbose = verbose_was
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/partitioning.rb b/lib/gitlab/database/partitioning.rb
index bbde2063c41..71fb995577a 100644
--- a/lib/gitlab/database/partitioning.rb
+++ b/lib/gitlab/database/partitioning.rb
@@ -14,6 +14,10 @@ module Gitlab
def self.sync_partitions(models_to_sync = registered_models)
MultiDatabasePartitionManager.new(models_to_sync).sync_partitions
end
+
+ def self.drop_detached_partitions
+ MultiDatabasePartitionDropper.new.drop_detached_partitions
+ end
end
end
end
diff --git a/lib/gitlab/database/partitioning/detached_partition_dropper.rb b/lib/gitlab/database/partitioning/detached_partition_dropper.rb
index dc63d93fd07..3e7ddece20b 100644
--- a/lib/gitlab/database/partitioning/detached_partition_dropper.rb
+++ b/lib/gitlab/database/partitioning/detached_partition_dropper.rb
@@ -7,18 +7,15 @@ module Gitlab
return unless Feature.enabled?(:drop_detached_partitions, default_enabled: :yaml)
Gitlab::AppLogger.info(message: "Checking for previously detached partitions to drop")
+
Postgresql::DetachedPartition.ready_to_drop.find_each do |detached_partition|
- conn.transaction do
+ connection.transaction do
# Another process may have already dropped the table and deleted this entry
next unless (detached_partition = Postgresql::DetachedPartition.lock.find_by(id: detached_partition.id))
- unless check_partition_detached?(detached_partition)
- Gitlab::AppLogger.error(message: "Attempt to drop attached database partition", partition_name: detached_partition.table_name)
- detached_partition.destroy!
- next
- end
+ drop_detached_partition(detached_partition.table_name)
- drop_one(detached_partition)
+ detached_partition.destroy!
end
rescue StandardError => e
Gitlab::AppLogger.error(message: "Failed to drop previously detached partition",
@@ -30,25 +27,30 @@ module Gitlab
private
- def drop_one(detached_partition)
- conn.transaction do
- conn.execute(<<~SQL)
- DROP TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{conn.quote_table_name(detached_partition.table_name)}
- SQL
+ def drop_detached_partition(partition_name)
+ partition_identifier = qualify_partition_name(partition_name)
+
+ if partition_detached?(partition_identifier)
+ connection.drop_table(partition_identifier, if_exists: true)
- detached_partition.destroy!
+ Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: partition_name)
+ else
+ Gitlab::AppLogger.error(message: "Attempt to drop attached database partition", partition_name: partition_name)
end
- Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: detached_partition.table_name)
end
- def check_partition_detached?(detached_partition)
+ def qualify_partition_name(table_name)
+ "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{table_name}"
+ end
+
+ def partition_detached?(partition_identifier)
# PostgresPartition checks the pg_inherits view, so our partition will only show here if it's still attached
# and thus should not be dropped
- !PostgresPartition.for_identifier("#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{detached_partition.table_name}").exists?
+ !Gitlab::Database::PostgresPartition.for_identifier(partition_identifier).exists?
end
- def conn
- @conn ||= ApplicationRecord.connection
+ def connection
+ Postgresql::DetachedPartition.connection
end
end
end
diff --git a/lib/gitlab/database/partitioning/multi_database_partition_dropper.rb b/lib/gitlab/database/partitioning/multi_database_partition_dropper.rb
new file mode 100644
index 00000000000..769b658bae4
--- /dev/null
+++ b/lib/gitlab/database/partitioning/multi_database_partition_dropper.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Partitioning
+ class MultiDatabasePartitionDropper
+ def drop_detached_partitions
+ Gitlab::AppLogger.info(message: "Dropping detached postgres partitions")
+
+ each_database_connection do |name, connection|
+ Gitlab::Database::SharedModel.using_connection(connection) do
+ Gitlab::AppLogger.debug(message: "Switched database connection", connection_name: name)
+
+ DetachedPartitionDropper.new.perform
+ end
+ end
+
+ Gitlab::AppLogger.info(message: "Finished dropping detached postgres partitions")
+ end
+
+ private
+
+ def each_database_connection
+ databases.each_pair do |name, connection_wrapper|
+ yield name, connection_wrapper.scope.connection
+ end
+ end
+
+ def databases
+ Gitlab::Database.databases
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/shared_model.rb b/lib/gitlab/database/shared_model.rb
index 8f256758961..f304c32d731 100644
--- a/lib/gitlab/database/shared_model.rb
+++ b/lib/gitlab/database/shared_model.rb
@@ -2,6 +2,7 @@
module Gitlab
module Database
+ # This abstract class is used for models which need to exist in multiple de-composed databases.
class SharedModel < ActiveRecord::Base
self.abstract_class = true