Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/as_with_materialized.rb2
-rw-r--r--lib/gitlab/database/async_indexes.rb15
-rw-r--r--lib/gitlab/database/async_indexes/index_creator.rb63
-rw-r--r--lib/gitlab/database/async_indexes/migration_helpers.rb80
-rw-r--r--lib/gitlab/database/async_indexes/postgres_async_index.rb22
-rw-r--r--lib/gitlab/database/batch_counter.rb2
-rw-r--r--lib/gitlab/database/connection.rb249
-rw-r--r--lib/gitlab/database/count/reltuples_count_strategy.rb2
-rw-r--r--lib/gitlab/database/count/tablesample_count_strategy.rb2
-rw-r--r--lib/gitlab/database/grant.rb2
-rw-r--r--lib/gitlab/database/load_balancing.rb35
-rw-r--r--lib/gitlab/database/load_balancing/active_record_proxy.rb2
-rw-r--r--lib/gitlab/database/load_balancing/connection_proxy.rb34
-rw-r--r--lib/gitlab/database/load_balancing/host.rb20
-rw-r--r--lib/gitlab/database/load_balancing/host_list.rb17
-rw-r--r--lib/gitlab/database/load_balancing/load_balancer.rb142
-rw-r--r--lib/gitlab/database/load_balancing/rack_middleware.rb12
-rw-r--r--lib/gitlab/database/load_balancing/service_discovery.rb53
-rw-r--r--lib/gitlab/database/load_balancing/sticking.rb12
-rw-r--r--lib/gitlab/database/metrics.rb26
-rw-r--r--lib/gitlab/database/migration_helpers.rb37
-rw-r--r--lib/gitlab/database/migrations/background_migration_helpers.rb34
-rw-r--r--lib/gitlab/database/migrations/instrumentation.rb20
-rw-r--r--lib/gitlab/database/migrations/observation.rb3
-rw-r--r--lib/gitlab/database/migrations/observers.rb8
-rw-r--r--lib/gitlab/database/migrations/observers/migration_observer.rb7
-rw-r--r--lib/gitlab/database/migrations/observers/query_details.rb8
-rw-r--r--lib/gitlab/database/migrations/observers/query_log.rb8
-rw-r--r--lib/gitlab/database/migrations/observers/query_statistics.rb2
-rw-r--r--lib/gitlab/database/migrations/observers/total_database_size_change.rb2
-rw-r--r--lib/gitlab/database/multi_threaded_migration.rb52
-rw-r--r--lib/gitlab/database/partitioning/detached_partition_dropper.rb56
-rw-r--r--lib/gitlab/database/partitioning/monthly_strategy.rb2
-rw-r--r--lib/gitlab/database/partitioning/partition_manager.rb34
-rw-r--r--lib/gitlab/database/partitioning/partition_monitoring.rb5
-rw-r--r--lib/gitlab/database/partitioning/time_partition.rb7
-rw-r--r--lib/gitlab/database/postgres_foreign_key.rb15
-rw-r--r--lib/gitlab/database/postgres_hll/batch_distinct_counter.rb2
-rw-r--r--lib/gitlab/database/postgres_index.rb7
-rw-r--r--lib/gitlab/database/postgres_partition.rb8
-rw-r--r--lib/gitlab/database/reindexing.rb27
-rw-r--r--lib/gitlab/database/reindexing/reindex_concurrently.rb7
-rw-r--r--lib/gitlab/database/schema_migrations/context.rb13
-rw-r--r--lib/gitlab/database/similarity_score.rb4
-rw-r--r--lib/gitlab/database/transaction/context.rb125
-rw-r--r--lib/gitlab/database/transaction/observer.rb66
46 files changed, 1049 insertions, 302 deletions
diff --git a/lib/gitlab/database/as_with_materialized.rb b/lib/gitlab/database/as_with_materialized.rb
index eda991efbd5..07809c5b592 100644
--- a/lib/gitlab/database/as_with_materialized.rb
+++ b/lib/gitlab/database/as_with_materialized.rb
@@ -19,7 +19,7 @@ module Gitlab
# Note: to be deleted after the minimum PG version is set to 12.0
def self.materialized_supported?
strong_memoize(:materialized_supported) do
- Gitlab::Database.version.match?(/^1[2-9]\./) # version 12.x and above
+ Gitlab::Database.main.version.match?(/^1[2-9]\./) # version 12.x and above
end
end
diff --git a/lib/gitlab/database/async_indexes.rb b/lib/gitlab/database/async_indexes.rb
new file mode 100644
index 00000000000..d89d5238356
--- /dev/null
+++ b/lib/gitlab/database/async_indexes.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module AsyncIndexes
+ DEFAULT_INDEXES_PER_INVOCATION = 2
+
+ def self.create_pending_indexes!(how_many: DEFAULT_INDEXES_PER_INVOCATION)
+ PostgresAsyncIndex.order(:id).limit(how_many).each do |async_index|
+ IndexCreator.new(async_index).perform
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/async_indexes/index_creator.rb b/lib/gitlab/database/async_indexes/index_creator.rb
new file mode 100644
index 00000000000..00de79ec970
--- /dev/null
+++ b/lib/gitlab/database/async_indexes/index_creator.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module AsyncIndexes
+ class IndexCreator
+ include ExclusiveLeaseGuard
+
+ TIMEOUT_PER_ACTION = 1.day
+ STATEMENT_TIMEOUT = 9.hours
+
+ def initialize(async_index)
+ @async_index = async_index
+ end
+
+ def perform
+ try_obtain_lease do
+ if index_exists?
+ log_index_info('Skipping index creation as the index exists')
+ else
+ log_index_info('Creating async index')
+
+ set_statement_timeout do
+ connection.execute(async_index.definition)
+ end
+
+ log_index_info('Finished creating async index')
+ end
+
+ async_index.destroy
+ end
+ end
+
+ private
+
+ attr_reader :async_index
+
+ def index_exists?
+ connection.indexes(async_index.table_name).any? { |index| index.name == async_index.name }
+ end
+
+ def connection
+ @connection ||= ApplicationRecord.connection
+ end
+
+ def lease_timeout
+ TIMEOUT_PER_ACTION
+ end
+
+ def set_statement_timeout
+ connection.execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
+ yield
+ ensure
+ connection.execute('RESET statement_timeout')
+ end
+
+ def log_index_info(message)
+ Gitlab::AppLogger.info(message: message, table_name: async_index.table_name, index_name: async_index.name)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/async_indexes/migration_helpers.rb b/lib/gitlab/database/async_indexes/migration_helpers.rb
new file mode 100644
index 00000000000..dff6376270a
--- /dev/null
+++ b/lib/gitlab/database/async_indexes/migration_helpers.rb
@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module AsyncIndexes
+ module MigrationHelpers
+ def unprepare_async_index(table_name, column_name, **options)
+ return unless async_index_creation_available?
+
+ index_name = options[:name] || index_name(table_name, column_name)
+
+ raise 'Specifying index name is mandatory - specify name: argument' unless index_name
+
+ unprepare_async_index_by_name(table_name, index_name)
+ end
+
+ def unprepare_async_index_by_name(table_name, index_name, **options)
+ return unless async_index_creation_available?
+
+ PostgresAsyncIndex.find_by(name: index_name).try do |async_index|
+ async_index.destroy
+ end
+ end
+
+ # Prepares an index for asynchronous creation.
+ #
+ # Stores the index information in the postgres_async_indexes table to be created later. The
+ # index will be always be created CONCURRENTLY, so that option does not need to be given.
+ # If an existing asynchronous definition exists with the same name, the existing entry will be
+ # updated with the new definition.
+ #
+ # If the requested index has already been created, it is not stored in the table for
+ # asynchronous creation.
+ def prepare_async_index(table_name, column_name, **options)
+ return unless async_index_creation_available?
+
+ index_name = options[:name] || index_name(table_name, column_name)
+
+ raise 'Specifying index name is mandatory - specify name: argument' unless index_name
+
+ options = options.merge({ algorithm: :concurrently })
+
+ if index_exists?(table_name, column_name, **options)
+ Gitlab::AppLogger.warn(
+ message: 'Index not prepared because it already exists',
+ table_name: table_name,
+ index_name: index_name)
+
+ return
+ end
+
+ index, algorithm, if_not_exists = add_index_options(table_name, column_name, **options)
+
+ create_index = ActiveRecord::ConnectionAdapters::CreateIndexDefinition.new(index, algorithm, if_not_exists)
+ schema_creation = ActiveRecord::ConnectionAdapters::PostgreSQL::SchemaCreation.new(ApplicationRecord.connection)
+ definition = schema_creation.accept(create_index)
+
+ async_index = PostgresAsyncIndex.safe_find_or_create_by!(name: index_name) do |rec|
+ rec.table_name = table_name
+ rec.definition = definition
+ end
+
+ Gitlab::AppLogger.info(
+ message: 'Prepared index for async creation',
+ table_name: async_index.table_name,
+ index_name: async_index.name)
+
+ async_index
+ end
+
+ private
+
+ def async_index_creation_available?
+ ApplicationRecord.connection.table_exists?(:postgres_async_indexes) &&
+ Feature.enabled?(:database_async_index_creation, type: :ops)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/async_indexes/postgres_async_index.rb b/lib/gitlab/database/async_indexes/postgres_async_index.rb
new file mode 100644
index 00000000000..236459e6216
--- /dev/null
+++ b/lib/gitlab/database/async_indexes/postgres_async_index.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module AsyncIndexes
+ class PostgresAsyncIndex < ApplicationRecord
+ self.table_name = 'postgres_async_indexes'
+
+ MAX_IDENTIFIER_LENGTH = Gitlab::Database::MigrationHelpers::MAX_IDENTIFIER_NAME_LENGTH
+ MAX_DEFINITION_LENGTH = 2048
+
+ validates :name, presence: true, length: { maximum: MAX_IDENTIFIER_LENGTH }
+ validates :table_name, presence: true, length: { maximum: MAX_IDENTIFIER_LENGTH }
+ validates :definition, presence: true, length: { maximum: MAX_DEFINITION_LENGTH }
+
+ def to_s
+ definition
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/batch_counter.rb b/lib/gitlab/database/batch_counter.rb
index 5f2e404c9da..7efa5b46ecb 100644
--- a/lib/gitlab/database/batch_counter.rb
+++ b/lib/gitlab/database/batch_counter.rb
@@ -31,7 +31,7 @@ module Gitlab
end
def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
- raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
+ raise 'BatchCount can not be run inside a transaction' if @relation.connection.transaction_open?
check_mode!(mode)
diff --git a/lib/gitlab/database/connection.rb b/lib/gitlab/database/connection.rb
new file mode 100644
index 00000000000..21861e4fba8
--- /dev/null
+++ b/lib/gitlab/database/connection.rb
@@ -0,0 +1,249 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ # Configuration settings and methods for interacting with a PostgreSQL
+ # database, with support for multiple databases.
+ class Connection
+ DEFAULT_POOL_HEADROOM = 10
+
+ attr_reader :scope
+
+ # Initializes a new `Database`.
+ #
+ # The `scope` argument must be an object (such as `ActiveRecord::Base`)
+ # that supports retrieving connections and connection pools.
+ def initialize(scope = ActiveRecord::Base)
+ @config = nil
+ @scope = scope
+ @version = nil
+ @open_transactions_baseline = 0
+ end
+
+ # We configure the database connection pool size automatically based on
+ # the configured concurrency. We also add some headroom, to make sure we
+ # don't run out of connections when more threads besides the 'user-facing'
+ # ones are running.
+ #
+ # Read more about this in
+ # doc/development/database/client_side_connection_pool.md
+ def default_pool_size
+ headroom =
+ (ENV["DB_POOL_HEADROOM"].presence || DEFAULT_POOL_HEADROOM).to_i
+
+ Gitlab::Runtime.max_threads + headroom
+ end
+
+ def config
+ # The result of this method must not be cached, as other methods may use
+ # it after making configuration changes and expect those changes to be
+ # present. For example, `disable_prepared_statements` expects the
+ # configuration settings to always be up to date.
+ #
+ # See the following for more information:
+ #
+ # - https://gitlab.com/gitlab-org/release/retrospectives/-/issues/39
+ # - https://gitlab.com/gitlab-com/gl-infra/production/-/issues/5238
+ scope.connection_db_config.configuration_hash.with_indifferent_access
+ end
+
+ def pool_size
+ config[:pool] || default_pool_size
+ end
+
+ def username
+ config[:username] || ENV['USER']
+ end
+
+ def database_name
+ config[:database]
+ end
+
+ def adapter_name
+ config[:adapter]
+ end
+
+ def human_adapter_name
+ if postgresql?
+ 'PostgreSQL'
+ else
+ 'Unknown'
+ end
+ end
+
+ def postgresql?
+ adapter_name.casecmp('postgresql') == 0
+ end
+
+ def db_config_with_default_pool_size
+ db_config_object = scope.connection_db_config
+ config = db_config_object.configuration_hash.merge(pool: default_pool_size)
+
+ ActiveRecord::DatabaseConfigurations::HashConfig.new(
+ db_config_object.env_name,
+ db_config_object.name,
+ config
+ )
+ end
+
+ # Disables prepared statements for the current database connection.
+ def disable_prepared_statements
+ scope.establish_connection(config.merge(prepared_statements: false))
+ end
+
+ # Check whether the underlying database is in read-only mode
+ def db_read_only?
+ pg_is_in_recovery =
+ scope
+ .connection
+ .execute('SELECT pg_is_in_recovery()')
+ .first
+ .fetch('pg_is_in_recovery')
+
+ Gitlab::Utils.to_boolean(pg_is_in_recovery)
+ end
+
+ def db_read_write?
+ !db_read_only?
+ end
+
+ def version
+ @version ||= database_version.match(/\A(?:PostgreSQL |)([^\s]+).*\z/)[1]
+ end
+
+ def database_version
+ connection.execute("SELECT VERSION()").first['version']
+ end
+
+ def postgresql_minimum_supported_version?
+ version.to_f >= MINIMUM_POSTGRES_VERSION
+ end
+
+ # Bulk inserts a number of rows into a table, optionally returning their
+ # IDs.
+ #
+ # table - The name of the table to insert the rows into.
+ # rows - An Array of Hash instances, each mapping the columns to their
+ # values.
+ # return_ids - When set to true the return value will be an Array of IDs of
+ # the inserted rows
+ # disable_quote - A key or an Array of keys to exclude from quoting (You
+ # become responsible for protection from SQL injection for
+ # these keys!)
+ # on_conflict - Defines an upsert. Values can be: :disabled (default) or
+ # :do_nothing
+ def bulk_insert(table, rows, return_ids: false, disable_quote: [], on_conflict: nil)
+ return if rows.empty?
+
+ keys = rows.first.keys
+ columns = keys.map { |key| connection.quote_column_name(key) }
+
+ disable_quote = Array(disable_quote).to_set
+ tuples = rows.map do |row|
+ keys.map do |k|
+ disable_quote.include?(k) ? row[k] : connection.quote(row[k])
+ end
+ end
+
+ sql = <<-EOF
+ INSERT INTO #{table} (#{columns.join(', ')})
+ VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')}
+ EOF
+
+ sql = "#{sql} ON CONFLICT DO NOTHING" if on_conflict == :do_nothing
+
+ sql = "#{sql} RETURNING id" if return_ids
+
+ result = connection.execute(sql)
+
+ if return_ids
+ result.values.map { |tuple| tuple[0].to_i }
+ else
+ []
+ end
+ end
+
+ def cached_column_exists?(table_name, column_name)
+ connection
+ .schema_cache.columns_hash(table_name)
+ .has_key?(column_name.to_s)
+ end
+
+ def cached_table_exists?(table_name)
+ exists? && connection.schema_cache.data_source_exists?(table_name)
+ end
+
+ def exists?
+ connection
+
+ true
+ rescue StandardError
+ false
+ end
+
+ def system_id
+ row = connection
+ .execute('SELECT system_identifier FROM pg_control_system()')
+ .first
+
+ row['system_identifier']
+ end
+
+ # @param [ActiveRecord::Connection] ar_connection
+ # @return [String]
+ def get_write_location(ar_connection)
+ use_new_load_balancer_query = Gitlab::Utils
+ .to_boolean(ENV['USE_NEW_LOAD_BALANCER_QUERY'], default: true)
+
+ sql =
+ if use_new_load_balancer_query
+ <<~NEWSQL
+ SELECT CASE
+ WHEN pg_is_in_recovery() = true AND EXISTS (SELECT 1 FROM pg_stat_get_wal_senders())
+ THEN pg_last_wal_replay_lsn()::text
+ WHEN pg_is_in_recovery() = false
+ THEN pg_current_wal_insert_lsn()::text
+ ELSE NULL
+ END AS location;
+ NEWSQL
+ else
+ <<~SQL
+ SELECT pg_current_wal_insert_lsn()::text AS location
+ SQL
+ end
+
+ row = ar_connection.select_all(sql).first
+ row['location'] if row
+ end
+
+ # inside_transaction? will return true if the caller is running within a
+ # transaction. Handles special cases when running inside a test
+ # environment, where tests may be wrapped in transactions
+ def inside_transaction?
+ base = Rails.env.test? ? @open_transactions_baseline : 0
+
+ scope.connection.open_transactions > base
+ end
+
+ # These methods that access @open_transactions_baseline are not
+ # thread-safe. These are fine though because we only call these in
+ # RSpec's main thread. If we decide to run specs multi-threaded, we would
+ # need to use something like ThreadGroup to keep track of this value
+ def set_open_transactions_baseline
+ @open_transactions_baseline = scope.connection.open_transactions
+ end
+
+ def reset_open_transactions_baseline
+ @open_transactions_baseline = 0
+ end
+
+ private
+
+ def connection
+ scope.connection
+ end
+ end
+ end
+end
+
+Gitlab::Database::Connection.prepend_mod_with('Gitlab::Database::Connection')
diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb
index a7bfafe2815..870cf25984b 100644
--- a/lib/gitlab/database/count/reltuples_count_strategy.rb
+++ b/lib/gitlab/database/count/reltuples_count_strategy.rb
@@ -54,7 +54,7 @@ module Gitlab
# Querying tuple stats only works on the primary. Due to load balancing, the
# easiest way to do this is to start a transaction.
- ActiveRecord::Base.transaction do
+ ActiveRecord::Base.transaction do # rubocop: disable Database/MultipleDatabases
get_statistics(non_sti_table_names, check_statistics: check_statistics).each_with_object({}) do |row, data|
model = table_to_model[row.table_name]
data[model] = row.estimate
diff --git a/lib/gitlab/database/count/tablesample_count_strategy.rb b/lib/gitlab/database/count/tablesample_count_strategy.rb
index e9387a91a14..489bc0aacea 100644
--- a/lib/gitlab/database/count/tablesample_count_strategy.rb
+++ b/lib/gitlab/database/count/tablesample_count_strategy.rb
@@ -61,7 +61,7 @@ module Gitlab
#{where_clause(model)}
SQL
- rows = ActiveRecord::Base.connection.select_all(query)
+ rows = ActiveRecord::Base.connection.select_all(query) # rubocop: disable Database/MultipleDatabases
Integer(rows.first['count'])
end
diff --git a/lib/gitlab/database/grant.rb b/lib/gitlab/database/grant.rb
index 7774dd9fffe..c8a30c68bc6 100644
--- a/lib/gitlab/database/grant.rb
+++ b/lib/gitlab/database/grant.rb
@@ -10,7 +10,7 @@ module Gitlab
# We _must not_ use quote_table_name as this will produce double
# quotes on PostgreSQL and for "has_table_privilege" we need single
# quotes.
- connection = ActiveRecord::Base.connection
+ connection = ActiveRecord::Base.connection # rubocop: disable Database/MultipleDatabases
quoted_table = connection.quote(table)
begin
diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb
index 31d41a6d6c0..08f108eb8e4 100644
--- a/lib/gitlab/database/load_balancing.rb
+++ b/lib/gitlab/database/load_balancing.rb
@@ -23,7 +23,7 @@ module Gitlab
# The connection proxy to use for load balancing (if enabled).
def self.proxy
- unless @proxy
+ unless load_balancing_proxy = ActiveRecord::Base.load_balancing_proxy
Gitlab::ErrorTracking.track_exception(
ProxyNotConfiguredError.new(
"Attempting to access the database load balancing proxy, but it wasn't configured.\n" \
@@ -31,12 +31,12 @@ module Gitlab
))
end
- @proxy
+ load_balancing_proxy
end
# Returns a Hash containing the load balancing configuration.
def self.configuration
- Gitlab::Database.config[:load_balancing] || {}
+ Gitlab::Database.main.config[:load_balancing] || {}
end
# Returns the maximum replica lag size in bytes.
@@ -79,7 +79,7 @@ module Gitlab
end
def self.pool_size
- Gitlab::Database.config[:pool]
+ Gitlab::Database.main.pool_size
end
# Returns true if load balancing is to be enabled.
@@ -107,12 +107,12 @@ module Gitlab
# Configures proxying of requests.
def self.configure_proxy(proxy = ConnectionProxy.new(hosts))
- @proxy = proxy
+ ActiveRecord::Base.load_balancing_proxy = proxy
- # This hijacks the "connection" method to ensure both
- # `ActiveRecord::Base.connection` and all models use the same load
- # balancing proxy.
- ActiveRecord::Base.singleton_class.prepend(ActiveRecordProxy)
+ # Populate service discovery immediately if it is configured
+ if service_discovery_enabled?
+ ServiceDiscovery.new(service_discovery_configuration).perform_service_discovery
+ end
end
def self.active_record_models
@@ -132,9 +132,22 @@ module Gitlab
# recognize the connection, this method returns the primary role
# directly. In future, we may need to check for other sources.
def self.db_role_for_connection(connection)
- return ROLE_PRIMARY if !enable? || @proxy.blank?
+ return ROLE_UNKNOWN unless connection
+
+ # The connection proxy does not have a role assigned
+ # as this is dependent on a execution context
+ return ROLE_UNKNOWN if connection.is_a?(ConnectionProxy)
- proxy.load_balancer.db_role_for_connection(connection)
+ # During application init we might receive `NullPool`
+ return ROLE_UNKNOWN unless connection.respond_to?(:pool) &&
+ connection.pool.respond_to?(:db_config) &&
+ connection.pool.db_config.respond_to?(:name)
+
+ if connection.pool.db_config.name.ends_with?(LoadBalancer::REPLICA_SUFFIX)
+ ROLE_REPLICA
+ else
+ ROLE_PRIMARY
+ end
end
end
end
diff --git a/lib/gitlab/database/load_balancing/active_record_proxy.rb b/lib/gitlab/database/load_balancing/active_record_proxy.rb
index 7763497e770..deaea62d774 100644
--- a/lib/gitlab/database/load_balancing/active_record_proxy.rb
+++ b/lib/gitlab/database/load_balancing/active_record_proxy.rb
@@ -7,7 +7,7 @@ module Gitlab
# "connection" method.
module ActiveRecordProxy
def connection
- LoadBalancing.proxy
+ ::Gitlab::Database::LoadBalancing.proxy
end
end
end
diff --git a/lib/gitlab/database/load_balancing/connection_proxy.rb b/lib/gitlab/database/load_balancing/connection_proxy.rb
index 3a09689a724..938f4951532 100644
--- a/lib/gitlab/database/load_balancing/connection_proxy.rb
+++ b/lib/gitlab/database/load_balancing/connection_proxy.rb
@@ -41,31 +41,31 @@ module Gitlab
def select_all(arel, name = nil, binds = [], preparable: nil)
if arel.respond_to?(:locked) && arel.locked
# SELECT ... FOR UPDATE queries should be sent to the primary.
- write_using_load_balancer(:select_all, [arel, name, binds],
+ write_using_load_balancer(:select_all, arel, name, binds,
sticky: true)
else
- read_using_load_balancer(:select_all, [arel, name, binds])
+ read_using_load_balancer(:select_all, arel, name, binds)
end
end
NON_STICKY_READS.each do |name|
- define_method(name) do |*args, &block|
- read_using_load_balancer(name, args, &block)
+ define_method(name) do |*args, **kwargs, &block|
+ read_using_load_balancer(name, *args, **kwargs, &block)
end
end
STICKY_WRITES.each do |name|
- define_method(name) do |*args, &block|
- write_using_load_balancer(name, args, sticky: true, &block)
+ define_method(name) do |*args, **kwargs, &block|
+ write_using_load_balancer(name, *args, sticky: true, **kwargs, &block)
end
end
- def transaction(*args, &block)
+ def transaction(*args, **kwargs, &block)
if current_session.fallback_to_replicas_for_ambiguous_queries?
track_read_only_transaction!
- read_using_load_balancer(:transaction, args, &block)
+ read_using_load_balancer(:transaction, *args, **kwargs, &block)
else
- write_using_load_balancer(:transaction, args, sticky: true, &block)
+ write_using_load_balancer(:transaction, *args, sticky: true, **kwargs, &block)
end
ensure
@@ -73,26 +73,26 @@ module Gitlab
end
# Delegates all unknown messages to a read-write connection.
- def method_missing(name, *args, &block)
+ def method_missing(...)
if current_session.fallback_to_replicas_for_ambiguous_queries?
- read_using_load_balancer(name, args, &block)
+ read_using_load_balancer(...)
else
- write_using_load_balancer(name, args, &block)
+ write_using_load_balancer(...)
end
end
# Performs a read using the load balancer.
#
# name - The name of the method to call on a connection object.
- def read_using_load_balancer(name, args, &block)
+ def read_using_load_balancer(...)
if current_session.use_primary? &&
!current_session.use_replicas_for_read_queries?
@load_balancer.read_write do |connection|
- connection.send(name, *args, &block)
+ connection.send(...)
end
else
@load_balancer.read do |connection|
- connection.send(name, *args, &block)
+ connection.send(...)
end
end
end
@@ -102,7 +102,7 @@ module Gitlab
# name - The name of the method to call on a connection object.
# sticky - If set to true the session will stick to the master after
# the write.
- def write_using_load_balancer(name, args, sticky: false, &block)
+ def write_using_load_balancer(name, *args, sticky: false, **kwargs, &block)
if read_only_transaction?
raise WriteInsideReadOnlyTransactionError, 'A write query is performed inside a read-only transaction'
end
@@ -113,7 +113,7 @@ module Gitlab
# secondary instead of on a primary (when necessary).
current_session.write! if sticky
- connection.send(name, *args, &block)
+ connection.send(name, *args, **kwargs, &block)
end
end
diff --git a/lib/gitlab/database/load_balancing/host.rb b/lib/gitlab/database/load_balancing/host.rb
index 3e74b5ea727..4c5357ae8e3 100644
--- a/lib/gitlab/database/load_balancing/host.rb
+++ b/lib/gitlab/database/load_balancing/host.rb
@@ -29,11 +29,11 @@ module Gitlab
@host = host
@port = port
@load_balancer = load_balancer
- @pool = Database.create_connection_pool(LoadBalancing.pool_size, host, port)
+ @pool = load_balancer.create_replica_connection_pool(::Gitlab::Database::LoadBalancing.pool_size, host, port)
@online = true
@last_checked_at = Time.zone.now
- interval = LoadBalancing.replica_check_interval
+ interval = ::Gitlab::Database::LoadBalancing.replica_check_interval
@intervals = (interval..(interval * 2)).step(0.5).to_a
end
@@ -41,10 +41,10 @@ module Gitlab
#
# timeout - The time after which the pool should be forcefully
# disconnected.
- def disconnect!(timeout = 120)
- start_time = Metrics::System.monotonic_time
+ def disconnect!(timeout: 120)
+ start_time = ::Gitlab::Metrics::System.monotonic_time
- while (Metrics::System.monotonic_time - start_time) <= timeout
+ while (::Gitlab::Metrics::System.monotonic_time - start_time) <= timeout
break if pool.connections.none?(&:in_use?)
sleep(2)
@@ -54,7 +54,7 @@ module Gitlab
end
def offline!
- LoadBalancing::Logger.warn(
+ ::Gitlab::Database::LoadBalancing::Logger.warn(
event: :host_offline,
message: 'Marking host as offline',
db_host: @host,
@@ -72,14 +72,14 @@ module Gitlab
refresh_status
if @online
- LoadBalancing::Logger.info(
+ ::Gitlab::Database::LoadBalancing::Logger.info(
event: :host_online,
message: 'Host is online after replica status check',
db_host: @host,
db_port: @port
)
else
- LoadBalancing::Logger.warn(
+ ::Gitlab::Database::LoadBalancing::Logger.warn(
event: :host_offline,
message: 'Host is offline after replica status check',
db_host: @host,
@@ -108,7 +108,7 @@ module Gitlab
def replication_lag_below_threshold?
if (lag_time = replication_lag_time)
- lag_time <= LoadBalancing.max_replication_lag_time
+ lag_time <= ::Gitlab::Database::LoadBalancing.max_replication_lag_time
else
false
end
@@ -125,7 +125,7 @@ module Gitlab
# only do this if we haven't replicated in a while so we only need
# to connect to the primary when truly necessary.
if (lag_size = replication_lag_size)
- lag_size <= LoadBalancing.max_replication_difference
+ lag_size <= ::Gitlab::Database::LoadBalancing.max_replication_difference
else
false
end
diff --git a/lib/gitlab/database/load_balancing/host_list.rb b/lib/gitlab/database/load_balancing/host_list.rb
index 24800012947..aa731521732 100644
--- a/lib/gitlab/database/load_balancing/host_list.rb
+++ b/lib/gitlab/database/load_balancing/host_list.rb
@@ -8,13 +8,11 @@ module Gitlab
# hosts - The list of secondary hosts to add.
def initialize(hosts = [])
@hosts = hosts.shuffle
- @pools = Set.new
@index = 0
@mutex = Mutex.new
@hosts_gauge = Gitlab::Metrics.gauge(:db_load_balancing_hosts, 'Current number of load balancing hosts')
set_metrics!
- update_pools
end
def hosts
@@ -35,15 +33,16 @@ module Gitlab
@mutex.synchronize { @hosts.map { |host| [host.host, host.port] } }
end
- def manage_pool?(pool)
- @pools.include?(pool)
- end
-
def hosts=(hosts)
@mutex.synchronize do
+ ::Gitlab::Database::LoadBalancing::Logger.info(
+ event: :host_list_update,
+ message: "Updating the host list for service discovery",
+ host_list_length: hosts.length,
+ old_host_list_length: @hosts.length
+ )
@hosts = hosts
unsafe_shuffle
- update_pools
end
set_metrics!
@@ -89,10 +88,6 @@ module Gitlab
def set_metrics!
@hosts_gauge.set({}, @hosts.length)
end
-
- def update_pools
- @pools = Set.new(@hosts.map(&:pool))
- end
end
end
end
diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb
index a5d67ebc050..e3f5d0ac470 100644
--- a/lib/gitlab/database/load_balancing/load_balancer.rb
+++ b/lib/gitlab/database/load_balancing/load_balancer.rb
@@ -7,20 +7,21 @@ module Gitlab
#
# Each host in the load balancer uses the same credentials as the primary
# database.
- #
- # This class *requires* that `ActiveRecord::Base.retrieve_connection`
- # always returns a connection to the primary.
class LoadBalancer
CACHE_KEY = :gitlab_load_balancer_host
- VALID_HOSTS_CACHE_KEY = :gitlab_load_balancer_valid_hosts
+
+ REPLICA_SUFFIX = '_replica'
attr_reader :host_list
# hosts - The hostnames/addresses of the additional databases.
- def initialize(hosts = [])
+ def initialize(hosts = [], model = ActiveRecord::Base)
+ @model = model
@host_list = HostList.new(hosts.map { |addr| Host.new(addr, self) })
- @connection_db_roles = {}.compare_by_identity
- @connection_db_roles_count = {}.compare_by_identity
+ end
+
+ def disconnect!(timeout: 120)
+ host_list.hosts.each { |host| host.disconnect!(timeout: timeout) }
end
# Yields a connection that can be used for reads.
@@ -28,7 +29,6 @@ module Gitlab
# If no secondaries were available this method will use the primary
# instead.
def read(&block)
- connection = nil
conflict_retried = 0
while host
@@ -36,12 +36,8 @@ module Gitlab
begin
connection = host.connection
- track_connection_role(connection, ROLE_REPLICA)
-
return yield connection
rescue StandardError => error
- untrack_connection_role(connection)
-
if serialization_failure?(error)
# This error can occur when a query conflicts. See
# https://www.postgresql.org/docs/current/static/hot-standby.html#HOT-STANDBY-CONFLICT
@@ -84,8 +80,6 @@ module Gitlab
)
read_write(&block)
- ensure
- untrack_connection_role(connection)
end
# Yields a connection that can be used for both reads and writes.
@@ -95,22 +89,9 @@ module Gitlab
# Instead of immediately grinding to a halt we'll retry the operation
# a few times.
retry_with_backoff do
- connection = ActiveRecord::Base.retrieve_connection
- track_connection_role(connection, ROLE_PRIMARY)
-
+ connection = pool.connection
yield connection
end
- ensure
- untrack_connection_role(connection)
- end
-
- # Recognize the role (primary/replica) of the database this connection
- # is connecting to. If the connection is not issued by this load
- # balancer, return nil
- def db_role_for_connection(connection)
- return @connection_db_roles[connection] if @connection_db_roles[connection]
- return ROLE_REPLICA if @host_list.manage_pool?(connection.pool)
- return ROLE_PRIMARY if connection.pool == ActiveRecord::Base.connection_pool
end
# Returns a host to use for queries.
@@ -118,28 +99,27 @@ module Gitlab
# Hosts are scoped per thread so that multiple threads don't
# accidentally re-use the same host + connection.
def host
- RequestStore[CACHE_KEY] ||= current_host_list.next
+ request_cache[CACHE_KEY] ||= @host_list.next
end
# Releases the host and connection for the current thread.
def release_host
- if host = RequestStore[CACHE_KEY]
+ if host = request_cache[CACHE_KEY]
host.disable_query_cache!
host.release_connection
end
- RequestStore.delete(CACHE_KEY)
- RequestStore.delete(VALID_HOSTS_CACHE_KEY)
+ request_cache.delete(CACHE_KEY)
end
def release_primary_connection
- ActiveRecord::Base.connection_pool.release_connection
+ pool.release_connection
end
# Returns the transaction write location of the primary.
def primary_write_location
location = read_write do |connection|
- ::Gitlab::Database.get_write_location(connection)
+ ::Gitlab::Database.main.get_write_location(connection)
end
return location if location
@@ -148,55 +128,17 @@ module Gitlab
end
# Returns true if there was at least one host that has caught up with the given transaction.
- #
- # In case of a retry, this method also stores the set of hosts that have caught up.
- #
- # UPD: `select_caught_up_hosts` seems to have redundant logic managing host list (`:gitlab_load_balancer_valid_hosts`),
- # while we only need a single host: https://gitlab.com/gitlab-org/gitlab/-/issues/326125#note_615271604
- # Also, shuffling the list afterwards doesn't seem to be necessary.
- # This may be improved by merging this method with `select_up_to_date_host`.
- # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out
- def select_caught_up_hosts(location)
- all_hosts = @host_list.hosts
- valid_hosts = all_hosts.select { |host| host.caught_up?(location) }
-
- return false if valid_hosts.empty?
-
- # Hosts can come online after the time when this scan was done,
- # so we need to remember the ones that can be used. If the host went
- # offline, we'll just rely on the retry mechanism to use the primary.
- set_consistent_hosts_for_request(HostList.new(valid_hosts))
-
- # Since we will be using a subset from the original list, let's just
- # pick a random host and mix up the original list to ensure we don't
- # only end up using one replica.
- RequestStore[CACHE_KEY] = valid_hosts.sample
- @host_list.shuffle
-
- true
- end
-
- # Returns true if there was at least one host that has caught up with the given transaction.
- # Similar to `#select_caught_up_hosts`, picks a random host, to rotate replicas we use.
- # Unlike `#select_caught_up_hosts`, does not iterate over all hosts if finds any.
- #
- # It is going to be merged with `select_caught_up_hosts`, because they intend to do the same.
def select_up_to_date_host(location)
all_hosts = @host_list.hosts.shuffle
host = all_hosts.find { |host| host.caught_up?(location) }
return false unless host
- RequestStore[CACHE_KEY] = host
+ request_cache[CACHE_KEY] = host
true
end
- # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out
- def set_consistent_hosts_for_request(hosts)
- RequestStore[VALID_HOSTS_CACHE_KEY] = hosts
- end
-
# Yields a block, retrying it upon error using an exponential backoff.
def retry_with_backoff(retries = 3, time = 2)
retried = 0
@@ -247,30 +189,50 @@ module Gitlab
end
end
- private
+ # pool_size - The size of the DB pool.
+ # host - An optional host name to use instead of the default one.
+ # port - An optional port to connect to.
+ def create_replica_connection_pool(pool_size, host = nil, port = nil)
+ db_config = pool.db_config
- def ensure_caching!
- host.enable_query_cache! unless host.query_cache_enabled
- end
+ env_config = db_config.configuration_hash.dup
+ env_config[:pool] = pool_size
+ env_config[:host] = host if host
+ env_config[:port] = port if port
+
+ replica_db_config = ActiveRecord::DatabaseConfigurations::HashConfig.new(
+ db_config.env_name,
+ db_config.name + REPLICA_SUFFIX,
+ env_config
+ )
- def track_connection_role(connection, role)
- @connection_db_roles[connection] = role
- @connection_db_roles_count[connection] ||= 0
- @connection_db_roles_count[connection] += 1
+ # We cannot use ActiveRecord::Base.connection_handler.establish_connection
+ # as it will rewrite ActiveRecord::Base.connection
+ ActiveRecord::ConnectionAdapters::ConnectionHandler
+ .new
+ .establish_connection(replica_db_config)
end
- def untrack_connection_role(connection)
- return if connection.blank? || @connection_db_roles_count[connection].blank?
+ private
- @connection_db_roles_count[connection] -= 1
- if @connection_db_roles_count[connection] <= 0
- @connection_db_roles.delete(connection)
- @connection_db_roles_count.delete(connection)
- end
+ # ActiveRecord::ConnectionAdapters::ConnectionHandler handles fetching,
+ # and caching for connections pools for each "connection", so we
+ # leverage that.
+ def pool
+ ActiveRecord::Base.connection_handler.retrieve_connection_pool(
+ @model.connection_specification_name,
+ role: ActiveRecord::Base.writing_role,
+ shard: ActiveRecord::Base.default_shard
+ )
+ end
+
+ def ensure_caching!
+ host.enable_query_cache! unless host.query_cache_enabled
end
- def current_host_list
- RequestStore[VALID_HOSTS_CACHE_KEY] || @host_list
+ def request_cache
+ base = RequestStore[:gitlab_load_balancer] ||= {}
+ base[pool] ||= {}
end
end
end
diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb
index 8e7e6865402..f8a31622b7d 100644
--- a/lib/gitlab/database/load_balancing/rack_middleware.rb
+++ b/lib/gitlab/database/load_balancing/rack_middleware.rb
@@ -18,9 +18,9 @@ module Gitlab
# namespace - The namespace to use for sticking.
# id - The identifier to use for sticking.
def self.stick_or_unstick(env, namespace, id)
- return unless LoadBalancing.enable?
+ return unless ::Gitlab::Database::LoadBalancing.enable?
- Sticking.unstick_or_continue_sticking(namespace, id)
+ ::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(namespace, id)
env[STICK_OBJECT] ||= Set.new
env[STICK_OBJECT] << [namespace, id]
@@ -56,7 +56,7 @@ module Gitlab
namespaces_and_ids = sticking_namespaces_and_ids(env)
namespaces_and_ids.each do |namespace, id|
- Sticking.unstick_or_continue_sticking(namespace, id)
+ ::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(namespace, id)
end
end
@@ -65,17 +65,17 @@ module Gitlab
namespaces_and_ids = sticking_namespaces_and_ids(env)
namespaces_and_ids.each do |namespace, id|
- Sticking.stick_if_necessary(namespace, id)
+ ::Gitlab::Database::LoadBalancing::Sticking.stick_if_necessary(namespace, id)
end
end
def clear
load_balancer.release_host
- Session.clear_session
+ ::Gitlab::Database::LoadBalancing::Session.clear_session
end
def load_balancer
- LoadBalancing.proxy.load_balancer
+ ::Gitlab::Database::LoadBalancing.proxy.load_balancer
end
# Determines the sticking namespace and identifier based on the Rack
diff --git a/lib/gitlab/database/load_balancing/service_discovery.rb b/lib/gitlab/database/load_balancing/service_discovery.rb
index 9b42b25be1c..251961c8246 100644
--- a/lib/gitlab/database/load_balancing/service_discovery.rb
+++ b/lib/gitlab/database/load_balancing/service_discovery.rb
@@ -13,7 +13,8 @@ module Gitlab
# balancer with said hosts. Requests may continue to use the old hosts
# until they complete.
class ServiceDiscovery
- attr_reader :interval, :record, :record_type, :disconnect_timeout
+ attr_reader :interval, :record, :record_type, :disconnect_timeout,
+ :load_balancer
MAX_SLEEP_ADJUSTMENT = 10
@@ -40,7 +41,17 @@ module Gitlab
# disconnect_timeout - The time after which an old host should be
# forcefully disconnected.
# use_tcp - Use TCP instaed of UDP to look up resources
- def initialize(nameserver:, port:, record:, record_type: 'A', interval: 60, disconnect_timeout: 120, use_tcp: false)
+ # load_balancer - The load balancer instance to use
+ def initialize(
+ nameserver:,
+ port:,
+ record:,
+ record_type: 'A',
+ interval: 60,
+ disconnect_timeout: 120,
+ use_tcp: false,
+ load_balancer: LoadBalancing.proxy.load_balancer
+ )
@nameserver = nameserver
@port = port
@record = record
@@ -48,34 +59,36 @@ module Gitlab
@interval = interval
@disconnect_timeout = disconnect_timeout
@use_tcp = use_tcp
+ @load_balancer = load_balancer
end
def start
Thread.new do
loop do
- interval =
- begin
- refresh_if_necessary
- rescue StandardError => error
- # Any exceptions that might occur should be reported to
- # Sentry, instead of silently terminating this thread.
- Gitlab::ErrorTracking.track_exception(error)
-
- Gitlab::AppLogger.error(
- "Service discovery encountered an error: #{error.message}"
- )
-
- self.interval
- end
+ next_sleep_duration = perform_service_discovery
# We slightly randomize the sleep() interval. This should reduce
# the likelihood of _all_ processes refreshing at the same time,
# possibly putting unnecessary pressure on the DNS server.
- sleep(interval + rand(MAX_SLEEP_ADJUSTMENT))
+ sleep(next_sleep_duration + rand(MAX_SLEEP_ADJUSTMENT))
end
end
end
+ def perform_service_discovery
+ refresh_if_necessary
+ rescue StandardError => error
+ # Any exceptions that might occur should be reported to
+ # Sentry, instead of silently terminating this thread.
+ Gitlab::ErrorTracking.track_exception(error)
+
+ Gitlab::AppLogger.error(
+ "Service discovery encountered an error: #{error.message}"
+ )
+
+ interval
+ end
+
# Refreshes the hosts, but only if the DNS record returned a new list of
# addresses.
#
@@ -108,7 +121,7 @@ module Gitlab
# host/connection. While this connection will be checked in and out,
# it won't be explicitly disconnected.
old_hosts.each do |host|
- host.disconnect!(disconnect_timeout)
+ host.disconnect!(timeout: disconnect_timeout)
end
end
@@ -147,10 +160,6 @@ module Gitlab
end.sort
end
- def load_balancer
- LoadBalancing.proxy.load_balancer
- end
-
def resolver
@resolver ||= Net::DNS::Resolver.new(
nameservers: Resolver.new(@nameserver).resolve,
diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb
index 8e1aa079216..20d42b9a694 100644
--- a/lib/gitlab/database/load_balancing/sticking.rb
+++ b/lib/gitlab/database/load_balancing/sticking.rb
@@ -53,14 +53,8 @@ module Gitlab
# write location. If no such location exists, err on the side of caution.
return false unless location
- if ::Feature.enabled?(:load_balancing_refine_load_balancer_methods)
- load_balancer.select_up_to_date_host(location).tap do |selected|
- unstick(namespace, id) if selected
- end
- else
- load_balancer.select_caught_up_hosts(location).tap do |selected|
- unstick(namespace, id) if selected
- end
+ load_balancer.select_up_to_date_host(location).tap do |selected|
+ unstick(namespace, id) if selected
end
end
@@ -109,7 +103,7 @@ module Gitlab
if LoadBalancing.enable?
load_balancer.primary_write_location
else
- Gitlab::Database.get_write_location(ActiveRecord::Base.connection)
+ Gitlab::Database.main.get_write_location(ActiveRecord::Base.connection)
end
return if location.blank?
diff --git a/lib/gitlab/database/metrics.rb b/lib/gitlab/database/metrics.rb
new file mode 100644
index 00000000000..5dabbc81b9c
--- /dev/null
+++ b/lib/gitlab/database/metrics.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class Metrics
+ extend ::Gitlab::Utils::StrongMemoize
+
+ class << self
+ def subtransactions_increment(model_name)
+ subtransactions_counter.increment(model: model_name)
+ end
+
+ private
+
+ def subtransactions_counter
+ strong_memoize(:subtransactions_counter) do
+ name = :gitlab_active_record_subtransactions_total
+ comment = 'Total amount of subtransactions created by ActiveRecord'
+
+ ::Gitlab::Metrics.counter(name, comment)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 842ab4f7b80..23d9b16dc09 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -6,6 +6,7 @@ module Gitlab
include Migrations::BackgroundMigrationHelpers
include DynamicModelHelpers
include RenameTableHelpers
+ include AsyncIndexes::MigrationHelpers
# https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
MAX_IDENTIFIER_NAME_LENGTH = 63
@@ -152,6 +153,9 @@ module Gitlab
disable_statement_timeout do
add_index(table_name, column_name, **options)
end
+
+ # We created this index. Now let's remove the queuing entry for async creation in case it's still there.
+ unprepare_async_index(table_name, column_name, **options)
end
# Removes an existed index, concurrently
@@ -178,6 +182,9 @@ module Gitlab
disable_statement_timeout do
remove_index(table_name, **options.merge({ column: column_name }))
end
+
+ # We removed this index. Now let's make sure it's not queued for async creation.
+ unprepare_async_index(table_name, column_name, **options)
end
# Removes an existing index, concurrently
@@ -208,6 +215,9 @@ module Gitlab
disable_statement_timeout do
remove_index(table_name, **options.merge({ name: index_name }))
end
+
+ # We removed this index. Now let's make sure it's not queued for async creation.
+ unprepare_async_index_by_name(table_name, index_name, **options)
end
# Adds a foreign key with only minimal locking on the tables involved.
@@ -221,8 +231,13 @@ module Gitlab
# on_delete - The action to perform when associated data is removed,
# defaults to "CASCADE".
# name - The name of the foreign key.
+ # validate - Flag that controls whether the new foreign key will be validated after creation.
+ # If the flag is not set, the constraint will only be enforced for new data.
+ # reverse_lock_order - Flag that controls whether we should attempt to acquire locks in the reverse
+ # order of the ALTER TABLE. This can be useful in situations where the foreign
+ # key creation could deadlock with another process.
#
- def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_column: :id, name: nil, validate: true)
+ def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_column: :id, name: nil, validate: true, reverse_lock_order: false)
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
if transaction_open?
@@ -250,6 +265,8 @@ module Gitlab
# data.
with_lock_retries do
+ execute("LOCK TABLE #{target}, #{source} IN SHARE ROW EXCLUSIVE MODE") if reverse_lock_order
+
execute <<-EOF.strip_heredoc
ALTER TABLE #{source}
ADD CONSTRAINT #{options[:name]}
@@ -324,9 +341,9 @@ module Gitlab
# - Per connection (requires a cleanup after the execution)
#
# When using a per connection disable statement, code must be inside
- # a block so we can automatically execute `RESET ALL` after block finishes
+ # a block so we can automatically execute `RESET statement_timeout` after block finishes
# otherwise the statement will still be disabled until connection is dropped
- # or `RESET ALL` is executed
+ # or `RESET statement_timeout` is executed
def disable_statement_timeout
if block_given?
if statement_timeout_disabled?
@@ -340,7 +357,7 @@ module Gitlab
yield
ensure
- execute('RESET ALL')
+ execute('RESET statement_timeout')
end
end
else
@@ -1248,8 +1265,8 @@ module Gitlab
def check_trigger_permissions!(table)
unless Grant.create_and_execute_trigger?(table)
- dbname = Database.database_name
- user = Database.username
+ dbname = Database.main.database_name
+ user = Database.main.username
raise <<-EOF
Your database user is not allowed to create, drop, or execute triggers on the
@@ -1569,8 +1586,8 @@ into similar problems in the future (e.g. when new tables are created).
def create_extension(extension)
execute('CREATE EXTENSION IF NOT EXISTS %s' % extension)
rescue ActiveRecord::StatementInvalid => e
- dbname = Database.database_name
- user = Database.username
+ dbname = Database.main.database_name
+ user = Database.main.username
warn(<<~MSG) if e.to_s =~ /permission denied/
GitLab requires the PostgreSQL extension '#{extension}' installed in database '#{dbname}', but
@@ -1597,8 +1614,8 @@ into similar problems in the future (e.g. when new tables are created).
def drop_extension(extension)
execute('DROP EXTENSION IF EXISTS %s' % extension)
rescue ActiveRecord::StatementInvalid => e
- dbname = Database.database_name
- user = Database.username
+ dbname = Database.main.database_name
+ user = Database.main.username
warn(<<~MSG) if e.to_s =~ /permission denied/
This migration attempts to drop the PostgreSQL extension '#{extension}'
diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb
index 28491a934a0..19d80ba1d64 100644
--- a/lib/gitlab/database/migrations/background_migration_helpers.rb
+++ b/lib/gitlab/database/migrations/background_migration_helpers.rb
@@ -264,6 +264,34 @@ module Gitlab
migration
end
+ # Force a background migration to complete.
+ #
+ # WARNING: This method will block the caller and move the background migration from an
+ # asynchronous migration to a synchronous migration.
+ #
+ # 1. Steal work from sidekiq and perform immediately (avoid duplicates generated by step 2).
+ # 2. Process any pending tracked jobs.
+ # 3. Steal work from sidekiq and perform immediately (clear anything left from step 2).
+ # 4. Optionally remove job tracking information.
+ #
+ # This method does not garauntee that all jobs completed successfully.
+ def finalize_background_migration(class_name, delete_tracking_jobs: ['succeeded'])
+ # Empty the sidekiq queue.
+ Gitlab::BackgroundMigration.steal(class_name)
+
+ # Process pending tracked jobs.
+ jobs = Gitlab::Database::BackgroundMigrationJob.pending.for_migration_class(class_name)
+ jobs.find_each do |job|
+ BackgroundMigrationWorker.new.perform(job.class_name, job.arguments)
+ end
+
+ # Empty the sidekiq queue.
+ Gitlab::BackgroundMigration.steal(class_name)
+
+ # Delete job tracking rows.
+ delete_job_tracking(class_name, status: delete_tracking_jobs) if delete_tracking_jobs
+ end
+
def perform_background_migration_inline?
Rails.env.test? || Rails.env.development?
end
@@ -304,6 +332,12 @@ module Gitlab
end
end
+ def delete_job_tracking(class_name, status: 'succeeded')
+ status = Array(status).map { |s| Gitlab::Database::BackgroundMigrationJob.statuses[s] }
+ jobs = Gitlab::Database::BackgroundMigrationJob.where(status: status).for_migration_class(class_name)
+ jobs.each_batch { |batch| batch.delete_all }
+ end
+
private
def track_in_database(class_name, arguments)
diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb
index e9ef80d5198..d1e55eb825c 100644
--- a/lib/gitlab/database/migrations/instrumentation.rb
+++ b/lib/gitlab/database/migrations/instrumentation.rb
@@ -9,18 +9,20 @@ module Gitlab
attr_reader :observations
- def initialize(observers = ::Gitlab::Database::Migrations::Observers.all_observers)
- @observers = observers
+ def initialize(observer_classes = ::Gitlab::Database::Migrations::Observers.all_observers)
+ @observer_classes = observer_classes
@observations = []
end
- def observe(migration, &block)
- observation = Observation.new(migration)
+ def observe(version:, name:, &block)
+ observation = Observation.new(version, name)
observation.success = true
+ observers = observer_classes.map { |c| c.new(observation) }
+
exception = nil
- on_each_observer { |observer| observer.before }
+ on_each_observer(observers) { |observer| observer.before }
observation.walltime = Benchmark.realtime do
yield
@@ -29,8 +31,8 @@ module Gitlab
observation.success = false
end
- on_each_observer { |observer| observer.after }
- on_each_observer { |observer| observer.record(observation) }
+ on_each_observer(observers) { |observer| observer.after }
+ on_each_observer(observers) { |observer| observer.record }
record_observation(observation)
@@ -41,13 +43,13 @@ module Gitlab
private
- attr_reader :observers
+ attr_reader :observer_classes
def record_observation(observation)
@observations << observation
end
- def on_each_observer(&block)
+ def on_each_observer(observers, &block)
observers.each do |observer|
yield observer
rescue StandardError => e
diff --git a/lib/gitlab/database/migrations/observation.rb b/lib/gitlab/database/migrations/observation.rb
index 046843824a4..54eedec3c7b 100644
--- a/lib/gitlab/database/migrations/observation.rb
+++ b/lib/gitlab/database/migrations/observation.rb
@@ -4,7 +4,8 @@ module Gitlab
module Database
module Migrations
Observation = Struct.new(
- :migration,
+ :version,
+ :name,
:walltime,
:success,
:total_database_size_change,
diff --git a/lib/gitlab/database/migrations/observers.rb b/lib/gitlab/database/migrations/observers.rb
index 979a098d699..140b3feed64 100644
--- a/lib/gitlab/database/migrations/observers.rb
+++ b/lib/gitlab/database/migrations/observers.rb
@@ -6,10 +6,10 @@ module Gitlab
module Observers
def self.all_observers
[
- TotalDatabaseSizeChange.new,
- QueryStatistics.new,
- QueryLog.new,
- QueryDetails.new
+ TotalDatabaseSizeChange,
+ QueryStatistics,
+ QueryLog,
+ QueryDetails
]
end
end
diff --git a/lib/gitlab/database/migrations/observers/migration_observer.rb b/lib/gitlab/database/migrations/observers/migration_observer.rb
index 9bfbf35887d..85d18abb9ef 100644
--- a/lib/gitlab/database/migrations/observers/migration_observer.rb
+++ b/lib/gitlab/database/migrations/observers/migration_observer.rb
@@ -5,10 +5,11 @@ module Gitlab
module Migrations
module Observers
class MigrationObserver
- attr_reader :connection
+ attr_reader :connection, :observation
- def initialize
+ def initialize(observation)
@connection = ActiveRecord::Base.connection
+ @observation = observation
end
def before
@@ -19,7 +20,7 @@ module Gitlab
# implement in subclass
end
- def record(observation)
+ def record
raise NotImplementedError, 'implement in subclass'
end
end
diff --git a/lib/gitlab/database/migrations/observers/query_details.rb b/lib/gitlab/database/migrations/observers/query_details.rb
index 52b6464d449..dadacd2d2fc 100644
--- a/lib/gitlab/database/migrations/observers/query_details.rb
+++ b/lib/gitlab/database/migrations/observers/query_details.rb
@@ -6,8 +6,8 @@ module Gitlab
module Observers
class QueryDetails < MigrationObserver
def before
- @file_path = File.join(Instrumentation::RESULT_DIR, 'current-details.json')
- @file = File.open(@file_path, 'wb')
+ file_path = File.join(Instrumentation::RESULT_DIR, "#{observation.version}_#{observation.name}-query-details.json")
+ @file = File.open(file_path, 'wb')
@writer = Oj::StreamWriter.new(@file, {})
@writer.push_array
@subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |*args|
@@ -22,8 +22,8 @@ module Gitlab
@file.close
end
- def record(observation)
- File.rename(@file_path, File.join(Instrumentation::RESULT_DIR, "#{observation.migration}-query-details.json"))
+ def record
+ # no-op
end
def record_sql_event(_name, started, finished, _unique_id, payload)
diff --git a/lib/gitlab/database/migrations/observers/query_log.rb b/lib/gitlab/database/migrations/observers/query_log.rb
index 45df07fe391..e15d733d2a2 100644
--- a/lib/gitlab/database/migrations/observers/query_log.rb
+++ b/lib/gitlab/database/migrations/observers/query_log.rb
@@ -7,8 +7,8 @@ module Gitlab
class QueryLog < MigrationObserver
def before
@logger_was = ActiveRecord::Base.logger
- @log_file_path = File.join(Instrumentation::RESULT_DIR, 'current.log')
- @logger = Logger.new(@log_file_path)
+ file_path = File.join(Instrumentation::RESULT_DIR, "#{observation.version}_#{observation.name}.log")
+ @logger = Logger.new(file_path)
ActiveRecord::Base.logger = @logger
end
@@ -17,8 +17,8 @@ module Gitlab
@logger.close
end
- def record(observation)
- File.rename(@log_file_path, File.join(Instrumentation::RESULT_DIR, "#{observation.migration}.log"))
+ def record
+ # no-op
end
end
end
diff --git a/lib/gitlab/database/migrations/observers/query_statistics.rb b/lib/gitlab/database/migrations/observers/query_statistics.rb
index 466f4724256..54504646a79 100644
--- a/lib/gitlab/database/migrations/observers/query_statistics.rb
+++ b/lib/gitlab/database/migrations/observers/query_statistics.rb
@@ -16,7 +16,7 @@ module Gitlab
connection.execute('select pg_stat_statements_reset()')
end
- def record(observation)
+ def record
return unless enabled?
observation.query_statistics = connection.execute(<<~SQL)
diff --git a/lib/gitlab/database/migrations/observers/total_database_size_change.rb b/lib/gitlab/database/migrations/observers/total_database_size_change.rb
index 0b76b0bef5e..2e89498b79f 100644
--- a/lib/gitlab/database/migrations/observers/total_database_size_change.rb
+++ b/lib/gitlab/database/migrations/observers/total_database_size_change.rb
@@ -13,7 +13,7 @@ module Gitlab
@size_after = get_total_database_size
end
- def record(observation)
+ def record
return unless @size_after && @size_before
observation.total_database_size_change = @size_after - @size_before
diff --git a/lib/gitlab/database/multi_threaded_migration.rb b/lib/gitlab/database/multi_threaded_migration.rb
deleted file mode 100644
index 65a6cb8e369..00000000000
--- a/lib/gitlab/database/multi_threaded_migration.rb
+++ /dev/null
@@ -1,52 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- module MultiThreadedMigration
- MULTI_THREAD_AR_CONNECTION = :thread_local_ar_connection
-
- # This overwrites the default connection method so that every thread can
- # use a thread-local connection, while still supporting all of Rails'
- # migration methods.
- def connection
- Thread.current[MULTI_THREAD_AR_CONNECTION] ||
- ActiveRecord::Base.connection
- end
-
- # Starts a thread-pool for N threads, along with N threads each using a
- # single connection. The provided block is yielded from inside each
- # thread.
- #
- # Example:
- #
- # with_multiple_threads(4) do
- # execute('SELECT ...')
- # end
- #
- # thread_count - The number of threads to start.
- #
- # join - When set to true this method will join the threads, blocking the
- # caller until all threads have finished running.
- #
- # Returns an Array containing the started threads.
- def with_multiple_threads(thread_count, join: true)
- pool = Gitlab::Database.create_connection_pool(thread_count)
-
- threads = Array.new(thread_count) do
- Thread.new do
- pool.with_connection do |connection|
- Thread.current[MULTI_THREAD_AR_CONNECTION] = connection
- yield
- ensure
- Thread.current[MULTI_THREAD_AR_CONNECTION] = nil
- end
- end
- end
-
- threads.each(&:join) if join
-
- threads
- end
- end
- end
-end
diff --git a/lib/gitlab/database/partitioning/detached_partition_dropper.rb b/lib/gitlab/database/partitioning/detached_partition_dropper.rb
new file mode 100644
index 00000000000..dc63d93fd07
--- /dev/null
+++ b/lib/gitlab/database/partitioning/detached_partition_dropper.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+module Gitlab
+ module Database
+ module Partitioning
+ class DetachedPartitionDropper
+ def perform
+ return unless Feature.enabled?(:drop_detached_partitions, default_enabled: :yaml)
+
+ Gitlab::AppLogger.info(message: "Checking for previously detached partitions to drop")
+ Postgresql::DetachedPartition.ready_to_drop.find_each do |detached_partition|
+ conn.transaction do
+ # Another process may have already dropped the table and deleted this entry
+ next unless (detached_partition = Postgresql::DetachedPartition.lock.find_by(id: detached_partition.id))
+
+ unless check_partition_detached?(detached_partition)
+ Gitlab::AppLogger.error(message: "Attempt to drop attached database partition", partition_name: detached_partition.table_name)
+ detached_partition.destroy!
+ next
+ end
+
+ drop_one(detached_partition)
+ end
+ rescue StandardError => e
+ Gitlab::AppLogger.error(message: "Failed to drop previously detached partition",
+ partition_name: detached_partition.table_name,
+ exception_class: e.class,
+ exception_message: e.message)
+ end
+ end
+
+ private
+
+ def drop_one(detached_partition)
+ conn.transaction do
+ conn.execute(<<~SQL)
+ DROP TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{conn.quote_table_name(detached_partition.table_name)}
+ SQL
+
+ detached_partition.destroy!
+ end
+ Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: detached_partition.table_name)
+ end
+
+ def check_partition_detached?(detached_partition)
+ # PostgresPartition checks the pg_inherits view, so our partition will only show here if it's still attached
+ # and thus should not be dropped
+ !PostgresPartition.for_identifier("#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{detached_partition.table_name}").exists?
+ end
+
+ def conn
+ @conn ||= ApplicationRecord.connection
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/partitioning/monthly_strategy.rb b/lib/gitlab/database/partitioning/monthly_strategy.rb
index 4c68399cb68..7992c2fdaa7 100644
--- a/lib/gitlab/database/partitioning/monthly_strategy.rb
+++ b/lib/gitlab/database/partitioning/monthly_strategy.rb
@@ -86,7 +86,7 @@ module Gitlab
end
def pruning_old_partitions?
- Feature.enabled?(:partition_pruning_dry_run) && retain_for.present?
+ retain_for.present?
end
def oldest_active_date
diff --git a/lib/gitlab/database/partitioning/partition_manager.rb b/lib/gitlab/database/partitioning/partition_manager.rb
index c2a9422a42a..7e433ecdd39 100644
--- a/lib/gitlab/database/partitioning/partition_manager.rb
+++ b/lib/gitlab/database/partitioning/partition_manager.rb
@@ -4,6 +4,8 @@ module Gitlab
module Database
module Partitioning
class PartitionManager
+ UnsafeToDetachPartitionError = Class.new(StandardError)
+
def self.register(model)
raise ArgumentError, "Only models with a #partitioning_strategy can be registered." unless model.respond_to?(:partitioning_strategy)
@@ -16,6 +18,7 @@ module Gitlab
LEASE_TIMEOUT = 1.minute
MANAGEMENT_LEASE_KEY = 'database_partition_management_%s'
+ RETAIN_DETACHED_PARTITIONS_FOR = 1.week
attr_reader :models
@@ -35,13 +38,16 @@ module Gitlab
partitions_to_create = missing_partitions(model)
create(partitions_to_create) unless partitions_to_create.empty?
- if Feature.enabled?(:partition_pruning_dry_run)
+ if Feature.enabled?(:partition_pruning, default_enabled: :yaml)
partitions_to_detach = extra_partitions(model)
detach(partitions_to_detach) unless partitions_to_detach.empty?
end
end
rescue StandardError => e
- Gitlab::AppLogger.error("Failed to create / detach partition(s) for #{model.table_name}: #{e.class}: #{e.message}")
+ Gitlab::AppLogger.error(message: "Failed to create / detach partition(s)",
+ table_name: model.table_name,
+ exception_class: e.class,
+ exception_message: e.message)
end
end
@@ -54,7 +60,6 @@ module Gitlab
end
def extra_partitions(model)
- return [] unless Feature.enabled?(:partition_pruning_dry_run)
return [] unless connection.table_exists?(model.table_name)
model.partitioning_strategy.extra_partitions
@@ -74,7 +79,9 @@ module Gitlab
partitions.each do |partition|
connection.execute partition.to_sql
- Gitlab::AppLogger.info("Created partition #{partition.partition_name} for table #{partition.table}")
+ Gitlab::AppLogger.info(message: "Created partition",
+ partition_name: partition.partition_name,
+ table_name: partition.table)
end
end
end
@@ -89,7 +96,24 @@ module Gitlab
end
def detach_one_partition(partition)
- Gitlab::AppLogger.info("Planning to detach #{partition.partition_name} for table #{partition.table}")
+ assert_partition_detachable!(partition)
+
+ connection.execute partition.to_detach_sql
+
+ Postgresql::DetachedPartition.create!(table_name: partition.partition_name,
+ drop_after: RETAIN_DETACHED_PARTITIONS_FOR.from_now)
+
+ Gitlab::AppLogger.info(message: "Detached Partition",
+ partition_name: partition.partition_name,
+ table_name: partition.table)
+ end
+
+ def assert_partition_detachable!(partition)
+ parent_table_identifier = "#{connection.current_schema}.#{partition.table}"
+
+ if (example_fk = PostgresForeignKey.by_referenced_table_identifier(parent_table_identifier).first)
+ raise UnsafeToDetachPartitionError, "Cannot detach #{partition.partition_name}, it would block while checking foreign key #{example_fk.name} on #{example_fk.constrained_table_identifier}"
+ end
end
def with_lock_retries(&block)
diff --git a/lib/gitlab/database/partitioning/partition_monitoring.rb b/lib/gitlab/database/partitioning/partition_monitoring.rb
index ad122fd47fe..6963ecd2cc1 100644
--- a/lib/gitlab/database/partitioning/partition_monitoring.rb
+++ b/lib/gitlab/database/partitioning/partition_monitoring.rb
@@ -16,6 +16,7 @@ module Gitlab
gauge_present.set({ table: model.table_name }, strategy.current_partitions.size)
gauge_missing.set({ table: model.table_name }, strategy.missing_partitions.size)
+ gauge_extra.set({ table: model.table_name }, strategy.extra_partitions.size)
end
end
@@ -28,6 +29,10 @@ module Gitlab
def gauge_missing
@gauge_missing ||= Gitlab::Metrics.gauge(:db_partitions_missing, 'Number of database partitions currently expected, but not present')
end
+
+ def gauge_extra
+ @gauge_extra ||= Gitlab::Metrics.gauge(:db_partitions_extra, 'Number of database partitions currently attached to tables, but outside of their retention window and scheduled to be dropped')
+ end
end
end
end
diff --git a/lib/gitlab/database/partitioning/time_partition.rb b/lib/gitlab/database/partitioning/time_partition.rb
index 7dca60c0854..1221f042530 100644
--- a/lib/gitlab/database/partitioning/time_partition.rb
+++ b/lib/gitlab/database/partitioning/time_partition.rb
@@ -47,6 +47,13 @@ module Gitlab
SQL
end
+ def to_detach_sql
+ <<~SQL
+ ALTER TABLE #{conn.quote_table_name(table)}
+ DETACH PARTITION #{fully_qualified_partition}
+ SQL
+ end
+
def ==(other)
table == other.table && partition_name == other.partition_name && from == other.from && to == other.to
end
diff --git a/lib/gitlab/database/postgres_foreign_key.rb b/lib/gitlab/database/postgres_foreign_key.rb
new file mode 100644
index 00000000000..94f74724295
--- /dev/null
+++ b/lib/gitlab/database/postgres_foreign_key.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class PostgresForeignKey < ApplicationRecord
+ self.primary_key = :oid
+
+ scope :by_referenced_table_identifier, ->(identifier) do
+ raise ArgumentError, "Referenced table name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
+
+ where(referenced_table_identifier: identifier)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
index 580cab5622d..2e3f674cf82 100644
--- a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
+++ b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
@@ -57,7 +57,7 @@ module Gitlab
# @param finish final pkey range
# @return [Gitlab::Database::PostgresHll::Buckets] HyperLogLog data structure instance that can estimate number of unique elements
def execute(batch_size: nil, start: nil, finish: nil)
- raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
+ raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? # rubocop: disable Database/MultipleDatabases
batch_size ||= DEFAULT_BATCH_SIZE
start = actual_start(start)
diff --git a/lib/gitlab/database/postgres_index.rb b/lib/gitlab/database/postgres_index.rb
index 58e4e7e7924..1079bfdeda3 100644
--- a/lib/gitlab/database/postgres_index.rb
+++ b/lib/gitlab/database/postgres_index.rb
@@ -19,7 +19,12 @@ module Gitlab
end
# Indexes with reindexing support
- scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) }
+ scope :reindexing_support, -> do
+ where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES)
+ .not_match("#{Gitlab::Database::Reindexing::ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$")
+ end
+
+ scope :reindexing_leftovers, -> { match("#{Gitlab::Database::Reindexing::ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$") }
scope :not_match, ->(regex) { where("name !~ ?", regex) }
diff --git a/lib/gitlab/database/postgres_partition.rb b/lib/gitlab/database/postgres_partition.rb
index 0986372586b..7da60d8375d 100644
--- a/lib/gitlab/database/postgres_partition.rb
+++ b/lib/gitlab/database/postgres_partition.rb
@@ -7,10 +7,14 @@ module Gitlab
belongs_to :postgres_partitioned_table, foreign_key: 'parent_identifier', primary_key: 'identifier'
- scope :by_identifier, ->(identifier) do
+ scope :for_identifier, ->(identifier) do
raise ArgumentError, "Partition name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
- find(identifier)
+ where(primary_key => identifier)
+ end
+
+ scope :by_identifier, ->(identifier) do
+ for_identifier(identifier).first!
end
scope :for_parent_table, ->(name) { where("parent_identifier = concat(current_schema(), '.', ?)", name).order(:name) }
diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb
index 841e04ccbd1..04b409a9306 100644
--- a/lib/gitlab/database/reindexing.rb
+++ b/lib/gitlab/database/reindexing.rb
@@ -8,6 +8,13 @@ module Gitlab
SUPPORTED_TYPES = %w(btree gist).freeze
+ # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
+ # which only conflicts with DDL and vacuum. We therefore execute this with a rather
+ # high lock timeout and a long pause in between retries. This is an alternative to
+ # setting a high statement timeout, which would lead to a long running query with effects
+ # on e.g. vacuum.
+ REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
+
# candidate_indexes: Array of Gitlab::Database::PostgresIndex
def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION)
IndexSelection.new(candidate_indexes).take(how_many).each do |index|
@@ -15,10 +22,22 @@ module Gitlab
end
end
- def self.candidate_indexes
- Gitlab::Database::PostgresIndex
- .not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$")
- .reindexing_support
+ def self.cleanup_leftovers!
+ PostgresIndex.reindexing_leftovers.each do |index|
+ Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity")
+
+ retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
+ timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
+ klass: self.class,
+ logger: Gitlab::AppLogger
+ )
+
+ retries.run(raise_on_exhaustion: false) do
+ ApplicationRecord.connection.tap do |conn|
+ conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}")
+ end
+ end
+ end
end
end
end
diff --git a/lib/gitlab/database/reindexing/reindex_concurrently.rb b/lib/gitlab/database/reindexing/reindex_concurrently.rb
index 8d9f9f5abdd..7a720f7c539 100644
--- a/lib/gitlab/database/reindexing/reindex_concurrently.rb
+++ b/lib/gitlab/database/reindexing/reindex_concurrently.rb
@@ -11,13 +11,6 @@ module Gitlab
STATEMENT_TIMEOUT = 9.hours
PG_MAX_INDEX_NAME_LENGTH = 63
- # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
- # which only conflicts with DDL and vacuum. We therefore execute this with a rather
- # high lock timeout and a long pause in between retries. This is an alternative to
- # setting a high statement timeout, which would lead to a long running query with effects
- # on e.g. vacuum.
- REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
-
attr_reader :index, :logger
def initialize(index, logger: Gitlab::AppLogger)
diff --git a/lib/gitlab/database/schema_migrations/context.rb b/lib/gitlab/database/schema_migrations/context.rb
index bd8b9bed2c1..35105121bbd 100644
--- a/lib/gitlab/database/schema_migrations/context.rb
+++ b/lib/gitlab/database/schema_migrations/context.rb
@@ -6,17 +6,14 @@ module Gitlab
class Context
attr_reader :connection
+ DEFAULT_SCHEMA_MIGRATIONS_PATH = "db/schema_migrations"
+
def initialize(connection)
@connection = connection
end
def schema_directory
- @schema_directory ||=
- if ActiveRecord::Base.configurations.primary?(database_name)
- File.join(db_dir, 'schema_migrations')
- else
- File.join(db_dir, "#{database_name}_schema_migrations")
- end
+ @schema_directory ||= Rails.root.join(database_schema_migrations_path).to_s
end
def versions_to_create
@@ -32,8 +29,8 @@ module Gitlab
@database_name ||= @connection.pool.db_config.name
end
- def db_dir
- @db_dir ||= Rails.application.config.paths["db"].first
+ def database_schema_migrations_path
+ @connection.pool.db_config.configuration_hash[:schema_migrations_path] || DEFAULT_SCHEMA_MIGRATIONS_PATH
end
end
end
diff --git a/lib/gitlab/database/similarity_score.rb b/lib/gitlab/database/similarity_score.rb
index 20bf6fa4d30..bb8b9f333fe 100644
--- a/lib/gitlab/database/similarity_score.rb
+++ b/lib/gitlab/database/similarity_score.rb
@@ -67,7 +67,7 @@ module Gitlab
def self.build_expression(search:, rules:)
return EXPRESSION_ON_INVALID_INPUT if search.blank? || rules.empty?
- quoted_search = ActiveRecord::Base.connection.quote(search.to_s)
+ quoted_search = ApplicationRecord.connection.quote(search.to_s)
first_expression, *expressions = rules.map do |rule|
rule_to_arel(quoted_search, rule)
@@ -110,7 +110,7 @@ module Gitlab
# CAST(multiplier AS numeric)
def self.multiplier_expression(rule)
- quoted_multiplier = ActiveRecord::Base.connection.quote(rule.fetch(:multiplier, DEFAULT_MULTIPLIER).to_s)
+ quoted_multiplier = ApplicationRecord.connection.quote(rule.fetch(:multiplier, DEFAULT_MULTIPLIER).to_s)
Arel::Nodes::NamedFunction.new('CAST', [Arel.sql(quoted_multiplier).as('numeric')])
end
diff --git a/lib/gitlab/database/transaction/context.rb b/lib/gitlab/database/transaction/context.rb
new file mode 100644
index 00000000000..a50dd30b75b
--- /dev/null
+++ b/lib/gitlab/database/transaction/context.rb
@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Transaction
+ class Context
+ attr_reader :context
+
+ LOG_DEPTH_THRESHOLD = 8
+ LOG_SAVEPOINTS_THRESHOLD = 32
+ LOG_DURATION_S_THRESHOLD = 300
+ LOG_THROTTLE_DURATION = 1
+
+ def initialize
+ @context = {}
+ end
+
+ def set_start_time
+ @context[:start_time] = current_timestamp
+ end
+
+ def increment_savepoints
+ @context[:savepoints] = @context[:savepoints].to_i + 1
+ end
+
+ def increment_rollbacks
+ @context[:rollbacks] = @context[:rollbacks].to_i + 1
+ end
+
+ def increment_releases
+ @context[:releases] = @context[:releases].to_i + 1
+ end
+
+ def set_depth(depth)
+ @context[:depth] = [@context[:depth].to_i, depth].max
+ end
+
+ def track_sql(sql)
+ (@context[:queries] ||= []).push(sql)
+ end
+
+ def duration
+ return unless @context[:start_time].present?
+
+ current_timestamp - @context[:start_time]
+ end
+
+ def depth_threshold_exceeded?
+ @context[:depth].to_i > LOG_DEPTH_THRESHOLD
+ end
+
+ def savepoints_threshold_exceeded?
+ @context[:savepoints].to_i > LOG_SAVEPOINTS_THRESHOLD
+ end
+
+ def duration_threshold_exceeded?
+ duration.to_i > LOG_DURATION_S_THRESHOLD
+ end
+
+ def log_savepoints?
+ depth_threshold_exceeded? || savepoints_threshold_exceeded?
+ end
+
+ def log_duration?
+ duration_threshold_exceeded?
+ end
+
+ def should_log?
+ !logged_already? && (log_savepoints? || log_duration?)
+ end
+
+ def commit
+ log(:commit)
+ end
+
+ def rollback
+ log(:rollback)
+ end
+
+ private
+
+ def queries
+ @context[:queries].to_a.join("\n")
+ end
+
+ def current_timestamp
+ ::Gitlab::Metrics::System.monotonic_time
+ end
+
+ def logged_already?
+ return false if @context[:last_log_timestamp].nil?
+
+ (current_timestamp - @context[:last_log_timestamp].to_i) < LOG_THROTTLE_DURATION
+ end
+
+ def set_last_log_timestamp
+ @context[:last_log_timestamp] = current_timestamp
+ end
+
+ def log(operation)
+ return unless should_log?
+
+ set_last_log_timestamp
+
+ attributes = {
+ class: self.class.name,
+ result: operation,
+ duration_s: duration,
+ depth: @context[:depth].to_i,
+ savepoints_count: @context[:savepoints].to_i,
+ rollbacks_count: @context[:rollbacks].to_i,
+ releases_count: @context[:releases].to_i,
+ sql: queries
+ }
+
+ application_info(attributes)
+ end
+
+ def application_info(attributes)
+ Gitlab::AppJsonLogger.info(attributes)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/transaction/observer.rb b/lib/gitlab/database/transaction/observer.rb
new file mode 100644
index 00000000000..7888f0916e3
--- /dev/null
+++ b/lib/gitlab/database/transaction/observer.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Transaction
+ class Observer
+ INSTRUMENTED_STATEMENTS = %w[BEGIN SAVEPOINT ROLLBACK RELEASE].freeze
+ LONGEST_COMMAND_LENGTH = 'ROLLBACK TO SAVEPOINT'.length
+ START_COMMENT = '/*'
+ END_COMMENT = '*/'
+
+ def self.instrument_transactions(cmd, event)
+ connection = event.payload[:connection]
+ manager = connection&.transaction_manager
+ return unless manager.respond_to?(:transaction_context)
+
+ context = manager.transaction_context
+ return if context.nil?
+
+ if cmd.start_with?('BEGIN')
+ context.set_start_time
+ context.set_depth(0)
+ context.track_sql(event.payload[:sql])
+ elsif cmd.start_with?('SAVEPOINT ')
+ context.set_depth(manager.open_transactions)
+ context.increment_savepoints
+ elsif cmd.start_with?('ROLLBACK TO SAVEPOINT')
+ context.increment_rollbacks
+ elsif cmd.start_with?('RELEASE SAVEPOINT ')
+ context.increment_releases
+ end
+ end
+
+ def self.register!
+ ActiveSupport::Notifications.subscribe('sql.active_record') do |event|
+ sql = event.payload.dig(:sql).to_s
+ cmd = extract_sql_command(sql)
+
+ if cmd.start_with?(*INSTRUMENTED_STATEMENTS)
+ self.instrument_transactions(cmd, event)
+ end
+ end
+ end
+
+ def self.extract_sql_command(sql)
+ return sql unless sql.start_with?(START_COMMENT)
+
+ index = sql.index(END_COMMENT)
+
+ return sql unless index
+
+ # /* comment */ SELECT
+ #
+ # We offset using a position of the end comment + 1 character to
+ # accomodate a space between Marginalia comment and a SQL statement.
+ offset = index + END_COMMENT.length + 1
+
+ # Avoid duplicating the entire string. This isn't optimized to
+ # strip extra spaces, but we assume that this doesn't happen
+ # for performance reasons.
+ sql[offset..offset + LONGEST_COMMAND_LENGTH]
+ end
+ end
+ end
+ end
+end