diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-08-19 12:08:42 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-08-19 12:08:42 +0300 |
commit | b76ae638462ab0f673e5915986070518dd3f9ad3 (patch) | |
tree | bdab0533383b52873be0ec0eb4d3c66598ff8b91 /lib/gitlab/database | |
parent | 434373eabe7b4be9593d18a585fb763f1e5f1a6f (diff) |
Add latest changes from gitlab-org/gitlab@14-2-stable-eev14.2.0-rc42
Diffstat (limited to 'lib/gitlab/database')
46 files changed, 1049 insertions, 302 deletions
diff --git a/lib/gitlab/database/as_with_materialized.rb b/lib/gitlab/database/as_with_materialized.rb index eda991efbd5..07809c5b592 100644 --- a/lib/gitlab/database/as_with_materialized.rb +++ b/lib/gitlab/database/as_with_materialized.rb @@ -19,7 +19,7 @@ module Gitlab # Note: to be deleted after the minimum PG version is set to 12.0 def self.materialized_supported? strong_memoize(:materialized_supported) do - Gitlab::Database.version.match?(/^1[2-9]\./) # version 12.x and above + Gitlab::Database.main.version.match?(/^1[2-9]\./) # version 12.x and above end end diff --git a/lib/gitlab/database/async_indexes.rb b/lib/gitlab/database/async_indexes.rb new file mode 100644 index 00000000000..d89d5238356 --- /dev/null +++ b/lib/gitlab/database/async_indexes.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module AsyncIndexes + DEFAULT_INDEXES_PER_INVOCATION = 2 + + def self.create_pending_indexes!(how_many: DEFAULT_INDEXES_PER_INVOCATION) + PostgresAsyncIndex.order(:id).limit(how_many).each do |async_index| + IndexCreator.new(async_index).perform + end + end + end + end +end diff --git a/lib/gitlab/database/async_indexes/index_creator.rb b/lib/gitlab/database/async_indexes/index_creator.rb new file mode 100644 index 00000000000..00de79ec970 --- /dev/null +++ b/lib/gitlab/database/async_indexes/index_creator.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module AsyncIndexes + class IndexCreator + include ExclusiveLeaseGuard + + TIMEOUT_PER_ACTION = 1.day + STATEMENT_TIMEOUT = 9.hours + + def initialize(async_index) + @async_index = async_index + end + + def perform + try_obtain_lease do + if index_exists? + log_index_info('Skipping index creation as the index exists') + else + log_index_info('Creating async index') + + set_statement_timeout do + connection.execute(async_index.definition) + end + + log_index_info('Finished creating async index') + end + + async_index.destroy + end + end + + private + + attr_reader :async_index + + def index_exists? + connection.indexes(async_index.table_name).any? { |index| index.name == async_index.name } + end + + def connection + @connection ||= ApplicationRecord.connection + end + + def lease_timeout + TIMEOUT_PER_ACTION + end + + def set_statement_timeout + connection.execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT) + yield + ensure + connection.execute('RESET statement_timeout') + end + + def log_index_info(message) + Gitlab::AppLogger.info(message: message, table_name: async_index.table_name, index_name: async_index.name) + end + end + end + end +end diff --git a/lib/gitlab/database/async_indexes/migration_helpers.rb b/lib/gitlab/database/async_indexes/migration_helpers.rb new file mode 100644 index 00000000000..dff6376270a --- /dev/null +++ b/lib/gitlab/database/async_indexes/migration_helpers.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module AsyncIndexes + module MigrationHelpers + def unprepare_async_index(table_name, column_name, **options) + return unless async_index_creation_available? + + index_name = options[:name] || index_name(table_name, column_name) + + raise 'Specifying index name is mandatory - specify name: argument' unless index_name + + unprepare_async_index_by_name(table_name, index_name) + end + + def unprepare_async_index_by_name(table_name, index_name, **options) + return unless async_index_creation_available? + + PostgresAsyncIndex.find_by(name: index_name).try do |async_index| + async_index.destroy + end + end + + # Prepares an index for asynchronous creation. + # + # Stores the index information in the postgres_async_indexes table to be created later. The + # index will be always be created CONCURRENTLY, so that option does not need to be given. + # If an existing asynchronous definition exists with the same name, the existing entry will be + # updated with the new definition. + # + # If the requested index has already been created, it is not stored in the table for + # asynchronous creation. + def prepare_async_index(table_name, column_name, **options) + return unless async_index_creation_available? + + index_name = options[:name] || index_name(table_name, column_name) + + raise 'Specifying index name is mandatory - specify name: argument' unless index_name + + options = options.merge({ algorithm: :concurrently }) + + if index_exists?(table_name, column_name, **options) + Gitlab::AppLogger.warn( + message: 'Index not prepared because it already exists', + table_name: table_name, + index_name: index_name) + + return + end + + index, algorithm, if_not_exists = add_index_options(table_name, column_name, **options) + + create_index = ActiveRecord::ConnectionAdapters::CreateIndexDefinition.new(index, algorithm, if_not_exists) + schema_creation = ActiveRecord::ConnectionAdapters::PostgreSQL::SchemaCreation.new(ApplicationRecord.connection) + definition = schema_creation.accept(create_index) + + async_index = PostgresAsyncIndex.safe_find_or_create_by!(name: index_name) do |rec| + rec.table_name = table_name + rec.definition = definition + end + + Gitlab::AppLogger.info( + message: 'Prepared index for async creation', + table_name: async_index.table_name, + index_name: async_index.name) + + async_index + end + + private + + def async_index_creation_available? + ApplicationRecord.connection.table_exists?(:postgres_async_indexes) && + Feature.enabled?(:database_async_index_creation, type: :ops) + end + end + end + end +end diff --git a/lib/gitlab/database/async_indexes/postgres_async_index.rb b/lib/gitlab/database/async_indexes/postgres_async_index.rb new file mode 100644 index 00000000000..236459e6216 --- /dev/null +++ b/lib/gitlab/database/async_indexes/postgres_async_index.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module AsyncIndexes + class PostgresAsyncIndex < ApplicationRecord + self.table_name = 'postgres_async_indexes' + + MAX_IDENTIFIER_LENGTH = Gitlab::Database::MigrationHelpers::MAX_IDENTIFIER_NAME_LENGTH + MAX_DEFINITION_LENGTH = 2048 + + validates :name, presence: true, length: { maximum: MAX_IDENTIFIER_LENGTH } + validates :table_name, presence: true, length: { maximum: MAX_IDENTIFIER_LENGTH } + validates :definition, presence: true, length: { maximum: MAX_DEFINITION_LENGTH } + + def to_s + definition + end + end + end + end +end diff --git a/lib/gitlab/database/batch_counter.rb b/lib/gitlab/database/batch_counter.rb index 5f2e404c9da..7efa5b46ecb 100644 --- a/lib/gitlab/database/batch_counter.rb +++ b/lib/gitlab/database/batch_counter.rb @@ -31,7 +31,7 @@ module Gitlab end def count(batch_size: nil, mode: :itself, start: nil, finish: nil) - raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? + raise 'BatchCount can not be run inside a transaction' if @relation.connection.transaction_open? check_mode!(mode) diff --git a/lib/gitlab/database/connection.rb b/lib/gitlab/database/connection.rb new file mode 100644 index 00000000000..21861e4fba8 --- /dev/null +++ b/lib/gitlab/database/connection.rb @@ -0,0 +1,249 @@ +# frozen_string_literal: true + +module Gitlab + module Database + # Configuration settings and methods for interacting with a PostgreSQL + # database, with support for multiple databases. + class Connection + DEFAULT_POOL_HEADROOM = 10 + + attr_reader :scope + + # Initializes a new `Database`. + # + # The `scope` argument must be an object (such as `ActiveRecord::Base`) + # that supports retrieving connections and connection pools. + def initialize(scope = ActiveRecord::Base) + @config = nil + @scope = scope + @version = nil + @open_transactions_baseline = 0 + end + + # We configure the database connection pool size automatically based on + # the configured concurrency. We also add some headroom, to make sure we + # don't run out of connections when more threads besides the 'user-facing' + # ones are running. + # + # Read more about this in + # doc/development/database/client_side_connection_pool.md + def default_pool_size + headroom = + (ENV["DB_POOL_HEADROOM"].presence || DEFAULT_POOL_HEADROOM).to_i + + Gitlab::Runtime.max_threads + headroom + end + + def config + # The result of this method must not be cached, as other methods may use + # it after making configuration changes and expect those changes to be + # present. For example, `disable_prepared_statements` expects the + # configuration settings to always be up to date. + # + # See the following for more information: + # + # - https://gitlab.com/gitlab-org/release/retrospectives/-/issues/39 + # - https://gitlab.com/gitlab-com/gl-infra/production/-/issues/5238 + scope.connection_db_config.configuration_hash.with_indifferent_access + end + + def pool_size + config[:pool] || default_pool_size + end + + def username + config[:username] || ENV['USER'] + end + + def database_name + config[:database] + end + + def adapter_name + config[:adapter] + end + + def human_adapter_name + if postgresql? + 'PostgreSQL' + else + 'Unknown' + end + end + + def postgresql? + adapter_name.casecmp('postgresql') == 0 + end + + def db_config_with_default_pool_size + db_config_object = scope.connection_db_config + config = db_config_object.configuration_hash.merge(pool: default_pool_size) + + ActiveRecord::DatabaseConfigurations::HashConfig.new( + db_config_object.env_name, + db_config_object.name, + config + ) + end + + # Disables prepared statements for the current database connection. + def disable_prepared_statements + scope.establish_connection(config.merge(prepared_statements: false)) + end + + # Check whether the underlying database is in read-only mode + def db_read_only? + pg_is_in_recovery = + scope + .connection + .execute('SELECT pg_is_in_recovery()') + .first + .fetch('pg_is_in_recovery') + + Gitlab::Utils.to_boolean(pg_is_in_recovery) + end + + def db_read_write? + !db_read_only? + end + + def version + @version ||= database_version.match(/\A(?:PostgreSQL |)([^\s]+).*\z/)[1] + end + + def database_version + connection.execute("SELECT VERSION()").first['version'] + end + + def postgresql_minimum_supported_version? + version.to_f >= MINIMUM_POSTGRES_VERSION + end + + # Bulk inserts a number of rows into a table, optionally returning their + # IDs. + # + # table - The name of the table to insert the rows into. + # rows - An Array of Hash instances, each mapping the columns to their + # values. + # return_ids - When set to true the return value will be an Array of IDs of + # the inserted rows + # disable_quote - A key or an Array of keys to exclude from quoting (You + # become responsible for protection from SQL injection for + # these keys!) + # on_conflict - Defines an upsert. Values can be: :disabled (default) or + # :do_nothing + def bulk_insert(table, rows, return_ids: false, disable_quote: [], on_conflict: nil) + return if rows.empty? + + keys = rows.first.keys + columns = keys.map { |key| connection.quote_column_name(key) } + + disable_quote = Array(disable_quote).to_set + tuples = rows.map do |row| + keys.map do |k| + disable_quote.include?(k) ? row[k] : connection.quote(row[k]) + end + end + + sql = <<-EOF + INSERT INTO #{table} (#{columns.join(', ')}) + VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')} + EOF + + sql = "#{sql} ON CONFLICT DO NOTHING" if on_conflict == :do_nothing + + sql = "#{sql} RETURNING id" if return_ids + + result = connection.execute(sql) + + if return_ids + result.values.map { |tuple| tuple[0].to_i } + else + [] + end + end + + def cached_column_exists?(table_name, column_name) + connection + .schema_cache.columns_hash(table_name) + .has_key?(column_name.to_s) + end + + def cached_table_exists?(table_name) + exists? && connection.schema_cache.data_source_exists?(table_name) + end + + def exists? + connection + + true + rescue StandardError + false + end + + def system_id + row = connection + .execute('SELECT system_identifier FROM pg_control_system()') + .first + + row['system_identifier'] + end + + # @param [ActiveRecord::Connection] ar_connection + # @return [String] + def get_write_location(ar_connection) + use_new_load_balancer_query = Gitlab::Utils + .to_boolean(ENV['USE_NEW_LOAD_BALANCER_QUERY'], default: true) + + sql = + if use_new_load_balancer_query + <<~NEWSQL + SELECT CASE + WHEN pg_is_in_recovery() = true AND EXISTS (SELECT 1 FROM pg_stat_get_wal_senders()) + THEN pg_last_wal_replay_lsn()::text + WHEN pg_is_in_recovery() = false + THEN pg_current_wal_insert_lsn()::text + ELSE NULL + END AS location; + NEWSQL + else + <<~SQL + SELECT pg_current_wal_insert_lsn()::text AS location + SQL + end + + row = ar_connection.select_all(sql).first + row['location'] if row + end + + # inside_transaction? will return true if the caller is running within a + # transaction. Handles special cases when running inside a test + # environment, where tests may be wrapped in transactions + def inside_transaction? + base = Rails.env.test? ? @open_transactions_baseline : 0 + + scope.connection.open_transactions > base + end + + # These methods that access @open_transactions_baseline are not + # thread-safe. These are fine though because we only call these in + # RSpec's main thread. If we decide to run specs multi-threaded, we would + # need to use something like ThreadGroup to keep track of this value + def set_open_transactions_baseline + @open_transactions_baseline = scope.connection.open_transactions + end + + def reset_open_transactions_baseline + @open_transactions_baseline = 0 + end + + private + + def connection + scope.connection + end + end + end +end + +Gitlab::Database::Connection.prepend_mod_with('Gitlab::Database::Connection') diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb index a7bfafe2815..870cf25984b 100644 --- a/lib/gitlab/database/count/reltuples_count_strategy.rb +++ b/lib/gitlab/database/count/reltuples_count_strategy.rb @@ -54,7 +54,7 @@ module Gitlab # Querying tuple stats only works on the primary. Due to load balancing, the # easiest way to do this is to start a transaction. - ActiveRecord::Base.transaction do + ActiveRecord::Base.transaction do # rubocop: disable Database/MultipleDatabases get_statistics(non_sti_table_names, check_statistics: check_statistics).each_with_object({}) do |row, data| model = table_to_model[row.table_name] data[model] = row.estimate diff --git a/lib/gitlab/database/count/tablesample_count_strategy.rb b/lib/gitlab/database/count/tablesample_count_strategy.rb index e9387a91a14..489bc0aacea 100644 --- a/lib/gitlab/database/count/tablesample_count_strategy.rb +++ b/lib/gitlab/database/count/tablesample_count_strategy.rb @@ -61,7 +61,7 @@ module Gitlab #{where_clause(model)} SQL - rows = ActiveRecord::Base.connection.select_all(query) + rows = ActiveRecord::Base.connection.select_all(query) # rubocop: disable Database/MultipleDatabases Integer(rows.first['count']) end diff --git a/lib/gitlab/database/grant.rb b/lib/gitlab/database/grant.rb index 7774dd9fffe..c8a30c68bc6 100644 --- a/lib/gitlab/database/grant.rb +++ b/lib/gitlab/database/grant.rb @@ -10,7 +10,7 @@ module Gitlab # We _must not_ use quote_table_name as this will produce double # quotes on PostgreSQL and for "has_table_privilege" we need single # quotes. - connection = ActiveRecord::Base.connection + connection = ActiveRecord::Base.connection # rubocop: disable Database/MultipleDatabases quoted_table = connection.quote(table) begin diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb index 31d41a6d6c0..08f108eb8e4 100644 --- a/lib/gitlab/database/load_balancing.rb +++ b/lib/gitlab/database/load_balancing.rb @@ -23,7 +23,7 @@ module Gitlab # The connection proxy to use for load balancing (if enabled). def self.proxy - unless @proxy + unless load_balancing_proxy = ActiveRecord::Base.load_balancing_proxy Gitlab::ErrorTracking.track_exception( ProxyNotConfiguredError.new( "Attempting to access the database load balancing proxy, but it wasn't configured.\n" \ @@ -31,12 +31,12 @@ module Gitlab )) end - @proxy + load_balancing_proxy end # Returns a Hash containing the load balancing configuration. def self.configuration - Gitlab::Database.config[:load_balancing] || {} + Gitlab::Database.main.config[:load_balancing] || {} end # Returns the maximum replica lag size in bytes. @@ -79,7 +79,7 @@ module Gitlab end def self.pool_size - Gitlab::Database.config[:pool] + Gitlab::Database.main.pool_size end # Returns true if load balancing is to be enabled. @@ -107,12 +107,12 @@ module Gitlab # Configures proxying of requests. def self.configure_proxy(proxy = ConnectionProxy.new(hosts)) - @proxy = proxy + ActiveRecord::Base.load_balancing_proxy = proxy - # This hijacks the "connection" method to ensure both - # `ActiveRecord::Base.connection` and all models use the same load - # balancing proxy. - ActiveRecord::Base.singleton_class.prepend(ActiveRecordProxy) + # Populate service discovery immediately if it is configured + if service_discovery_enabled? + ServiceDiscovery.new(service_discovery_configuration).perform_service_discovery + end end def self.active_record_models @@ -132,9 +132,22 @@ module Gitlab # recognize the connection, this method returns the primary role # directly. In future, we may need to check for other sources. def self.db_role_for_connection(connection) - return ROLE_PRIMARY if !enable? || @proxy.blank? + return ROLE_UNKNOWN unless connection + + # The connection proxy does not have a role assigned + # as this is dependent on a execution context + return ROLE_UNKNOWN if connection.is_a?(ConnectionProxy) - proxy.load_balancer.db_role_for_connection(connection) + # During application init we might receive `NullPool` + return ROLE_UNKNOWN unless connection.respond_to?(:pool) && + connection.pool.respond_to?(:db_config) && + connection.pool.db_config.respond_to?(:name) + + if connection.pool.db_config.name.ends_with?(LoadBalancer::REPLICA_SUFFIX) + ROLE_REPLICA + else + ROLE_PRIMARY + end end end end diff --git a/lib/gitlab/database/load_balancing/active_record_proxy.rb b/lib/gitlab/database/load_balancing/active_record_proxy.rb index 7763497e770..deaea62d774 100644 --- a/lib/gitlab/database/load_balancing/active_record_proxy.rb +++ b/lib/gitlab/database/load_balancing/active_record_proxy.rb @@ -7,7 +7,7 @@ module Gitlab # "connection" method. module ActiveRecordProxy def connection - LoadBalancing.proxy + ::Gitlab::Database::LoadBalancing.proxy end end end diff --git a/lib/gitlab/database/load_balancing/connection_proxy.rb b/lib/gitlab/database/load_balancing/connection_proxy.rb index 3a09689a724..938f4951532 100644 --- a/lib/gitlab/database/load_balancing/connection_proxy.rb +++ b/lib/gitlab/database/load_balancing/connection_proxy.rb @@ -41,31 +41,31 @@ module Gitlab def select_all(arel, name = nil, binds = [], preparable: nil) if arel.respond_to?(:locked) && arel.locked # SELECT ... FOR UPDATE queries should be sent to the primary. - write_using_load_balancer(:select_all, [arel, name, binds], + write_using_load_balancer(:select_all, arel, name, binds, sticky: true) else - read_using_load_balancer(:select_all, [arel, name, binds]) + read_using_load_balancer(:select_all, arel, name, binds) end end NON_STICKY_READS.each do |name| - define_method(name) do |*args, &block| - read_using_load_balancer(name, args, &block) + define_method(name) do |*args, **kwargs, &block| + read_using_load_balancer(name, *args, **kwargs, &block) end end STICKY_WRITES.each do |name| - define_method(name) do |*args, &block| - write_using_load_balancer(name, args, sticky: true, &block) + define_method(name) do |*args, **kwargs, &block| + write_using_load_balancer(name, *args, sticky: true, **kwargs, &block) end end - def transaction(*args, &block) + def transaction(*args, **kwargs, &block) if current_session.fallback_to_replicas_for_ambiguous_queries? track_read_only_transaction! - read_using_load_balancer(:transaction, args, &block) + read_using_load_balancer(:transaction, *args, **kwargs, &block) else - write_using_load_balancer(:transaction, args, sticky: true, &block) + write_using_load_balancer(:transaction, *args, sticky: true, **kwargs, &block) end ensure @@ -73,26 +73,26 @@ module Gitlab end # Delegates all unknown messages to a read-write connection. - def method_missing(name, *args, &block) + def method_missing(...) if current_session.fallback_to_replicas_for_ambiguous_queries? - read_using_load_balancer(name, args, &block) + read_using_load_balancer(...) else - write_using_load_balancer(name, args, &block) + write_using_load_balancer(...) end end # Performs a read using the load balancer. # # name - The name of the method to call on a connection object. - def read_using_load_balancer(name, args, &block) + def read_using_load_balancer(...) if current_session.use_primary? && !current_session.use_replicas_for_read_queries? @load_balancer.read_write do |connection| - connection.send(name, *args, &block) + connection.send(...) end else @load_balancer.read do |connection| - connection.send(name, *args, &block) + connection.send(...) end end end @@ -102,7 +102,7 @@ module Gitlab # name - The name of the method to call on a connection object. # sticky - If set to true the session will stick to the master after # the write. - def write_using_load_balancer(name, args, sticky: false, &block) + def write_using_load_balancer(name, *args, sticky: false, **kwargs, &block) if read_only_transaction? raise WriteInsideReadOnlyTransactionError, 'A write query is performed inside a read-only transaction' end @@ -113,7 +113,7 @@ module Gitlab # secondary instead of on a primary (when necessary). current_session.write! if sticky - connection.send(name, *args, &block) + connection.send(name, *args, **kwargs, &block) end end diff --git a/lib/gitlab/database/load_balancing/host.rb b/lib/gitlab/database/load_balancing/host.rb index 3e74b5ea727..4c5357ae8e3 100644 --- a/lib/gitlab/database/load_balancing/host.rb +++ b/lib/gitlab/database/load_balancing/host.rb @@ -29,11 +29,11 @@ module Gitlab @host = host @port = port @load_balancer = load_balancer - @pool = Database.create_connection_pool(LoadBalancing.pool_size, host, port) + @pool = load_balancer.create_replica_connection_pool(::Gitlab::Database::LoadBalancing.pool_size, host, port) @online = true @last_checked_at = Time.zone.now - interval = LoadBalancing.replica_check_interval + interval = ::Gitlab::Database::LoadBalancing.replica_check_interval @intervals = (interval..(interval * 2)).step(0.5).to_a end @@ -41,10 +41,10 @@ module Gitlab # # timeout - The time after which the pool should be forcefully # disconnected. - def disconnect!(timeout = 120) - start_time = Metrics::System.monotonic_time + def disconnect!(timeout: 120) + start_time = ::Gitlab::Metrics::System.monotonic_time - while (Metrics::System.monotonic_time - start_time) <= timeout + while (::Gitlab::Metrics::System.monotonic_time - start_time) <= timeout break if pool.connections.none?(&:in_use?) sleep(2) @@ -54,7 +54,7 @@ module Gitlab end def offline! - LoadBalancing::Logger.warn( + ::Gitlab::Database::LoadBalancing::Logger.warn( event: :host_offline, message: 'Marking host as offline', db_host: @host, @@ -72,14 +72,14 @@ module Gitlab refresh_status if @online - LoadBalancing::Logger.info( + ::Gitlab::Database::LoadBalancing::Logger.info( event: :host_online, message: 'Host is online after replica status check', db_host: @host, db_port: @port ) else - LoadBalancing::Logger.warn( + ::Gitlab::Database::LoadBalancing::Logger.warn( event: :host_offline, message: 'Host is offline after replica status check', db_host: @host, @@ -108,7 +108,7 @@ module Gitlab def replication_lag_below_threshold? if (lag_time = replication_lag_time) - lag_time <= LoadBalancing.max_replication_lag_time + lag_time <= ::Gitlab::Database::LoadBalancing.max_replication_lag_time else false end @@ -125,7 +125,7 @@ module Gitlab # only do this if we haven't replicated in a while so we only need # to connect to the primary when truly necessary. if (lag_size = replication_lag_size) - lag_size <= LoadBalancing.max_replication_difference + lag_size <= ::Gitlab::Database::LoadBalancing.max_replication_difference else false end diff --git a/lib/gitlab/database/load_balancing/host_list.rb b/lib/gitlab/database/load_balancing/host_list.rb index 24800012947..aa731521732 100644 --- a/lib/gitlab/database/load_balancing/host_list.rb +++ b/lib/gitlab/database/load_balancing/host_list.rb @@ -8,13 +8,11 @@ module Gitlab # hosts - The list of secondary hosts to add. def initialize(hosts = []) @hosts = hosts.shuffle - @pools = Set.new @index = 0 @mutex = Mutex.new @hosts_gauge = Gitlab::Metrics.gauge(:db_load_balancing_hosts, 'Current number of load balancing hosts') set_metrics! - update_pools end def hosts @@ -35,15 +33,16 @@ module Gitlab @mutex.synchronize { @hosts.map { |host| [host.host, host.port] } } end - def manage_pool?(pool) - @pools.include?(pool) - end - def hosts=(hosts) @mutex.synchronize do + ::Gitlab::Database::LoadBalancing::Logger.info( + event: :host_list_update, + message: "Updating the host list for service discovery", + host_list_length: hosts.length, + old_host_list_length: @hosts.length + ) @hosts = hosts unsafe_shuffle - update_pools end set_metrics! @@ -89,10 +88,6 @@ module Gitlab def set_metrics! @hosts_gauge.set({}, @hosts.length) end - - def update_pools - @pools = Set.new(@hosts.map(&:pool)) - end end end end diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb index a5d67ebc050..e3f5d0ac470 100644 --- a/lib/gitlab/database/load_balancing/load_balancer.rb +++ b/lib/gitlab/database/load_balancing/load_balancer.rb @@ -7,20 +7,21 @@ module Gitlab # # Each host in the load balancer uses the same credentials as the primary # database. - # - # This class *requires* that `ActiveRecord::Base.retrieve_connection` - # always returns a connection to the primary. class LoadBalancer CACHE_KEY = :gitlab_load_balancer_host - VALID_HOSTS_CACHE_KEY = :gitlab_load_balancer_valid_hosts + + REPLICA_SUFFIX = '_replica' attr_reader :host_list # hosts - The hostnames/addresses of the additional databases. - def initialize(hosts = []) + def initialize(hosts = [], model = ActiveRecord::Base) + @model = model @host_list = HostList.new(hosts.map { |addr| Host.new(addr, self) }) - @connection_db_roles = {}.compare_by_identity - @connection_db_roles_count = {}.compare_by_identity + end + + def disconnect!(timeout: 120) + host_list.hosts.each { |host| host.disconnect!(timeout: timeout) } end # Yields a connection that can be used for reads. @@ -28,7 +29,6 @@ module Gitlab # If no secondaries were available this method will use the primary # instead. def read(&block) - connection = nil conflict_retried = 0 while host @@ -36,12 +36,8 @@ module Gitlab begin connection = host.connection - track_connection_role(connection, ROLE_REPLICA) - return yield connection rescue StandardError => error - untrack_connection_role(connection) - if serialization_failure?(error) # This error can occur when a query conflicts. See # https://www.postgresql.org/docs/current/static/hot-standby.html#HOT-STANDBY-CONFLICT @@ -84,8 +80,6 @@ module Gitlab ) read_write(&block) - ensure - untrack_connection_role(connection) end # Yields a connection that can be used for both reads and writes. @@ -95,22 +89,9 @@ module Gitlab # Instead of immediately grinding to a halt we'll retry the operation # a few times. retry_with_backoff do - connection = ActiveRecord::Base.retrieve_connection - track_connection_role(connection, ROLE_PRIMARY) - + connection = pool.connection yield connection end - ensure - untrack_connection_role(connection) - end - - # Recognize the role (primary/replica) of the database this connection - # is connecting to. If the connection is not issued by this load - # balancer, return nil - def db_role_for_connection(connection) - return @connection_db_roles[connection] if @connection_db_roles[connection] - return ROLE_REPLICA if @host_list.manage_pool?(connection.pool) - return ROLE_PRIMARY if connection.pool == ActiveRecord::Base.connection_pool end # Returns a host to use for queries. @@ -118,28 +99,27 @@ module Gitlab # Hosts are scoped per thread so that multiple threads don't # accidentally re-use the same host + connection. def host - RequestStore[CACHE_KEY] ||= current_host_list.next + request_cache[CACHE_KEY] ||= @host_list.next end # Releases the host and connection for the current thread. def release_host - if host = RequestStore[CACHE_KEY] + if host = request_cache[CACHE_KEY] host.disable_query_cache! host.release_connection end - RequestStore.delete(CACHE_KEY) - RequestStore.delete(VALID_HOSTS_CACHE_KEY) + request_cache.delete(CACHE_KEY) end def release_primary_connection - ActiveRecord::Base.connection_pool.release_connection + pool.release_connection end # Returns the transaction write location of the primary. def primary_write_location location = read_write do |connection| - ::Gitlab::Database.get_write_location(connection) + ::Gitlab::Database.main.get_write_location(connection) end return location if location @@ -148,55 +128,17 @@ module Gitlab end # Returns true if there was at least one host that has caught up with the given transaction. - # - # In case of a retry, this method also stores the set of hosts that have caught up. - # - # UPD: `select_caught_up_hosts` seems to have redundant logic managing host list (`:gitlab_load_balancer_valid_hosts`), - # while we only need a single host: https://gitlab.com/gitlab-org/gitlab/-/issues/326125#note_615271604 - # Also, shuffling the list afterwards doesn't seem to be necessary. - # This may be improved by merging this method with `select_up_to_date_host`. - # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out - def select_caught_up_hosts(location) - all_hosts = @host_list.hosts - valid_hosts = all_hosts.select { |host| host.caught_up?(location) } - - return false if valid_hosts.empty? - - # Hosts can come online after the time when this scan was done, - # so we need to remember the ones that can be used. If the host went - # offline, we'll just rely on the retry mechanism to use the primary. - set_consistent_hosts_for_request(HostList.new(valid_hosts)) - - # Since we will be using a subset from the original list, let's just - # pick a random host and mix up the original list to ensure we don't - # only end up using one replica. - RequestStore[CACHE_KEY] = valid_hosts.sample - @host_list.shuffle - - true - end - - # Returns true if there was at least one host that has caught up with the given transaction. - # Similar to `#select_caught_up_hosts`, picks a random host, to rotate replicas we use. - # Unlike `#select_caught_up_hosts`, does not iterate over all hosts if finds any. - # - # It is going to be merged with `select_caught_up_hosts`, because they intend to do the same. def select_up_to_date_host(location) all_hosts = @host_list.hosts.shuffle host = all_hosts.find { |host| host.caught_up?(location) } return false unless host - RequestStore[CACHE_KEY] = host + request_cache[CACHE_KEY] = host true end - # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out - def set_consistent_hosts_for_request(hosts) - RequestStore[VALID_HOSTS_CACHE_KEY] = hosts - end - # Yields a block, retrying it upon error using an exponential backoff. def retry_with_backoff(retries = 3, time = 2) retried = 0 @@ -247,30 +189,50 @@ module Gitlab end end - private + # pool_size - The size of the DB pool. + # host - An optional host name to use instead of the default one. + # port - An optional port to connect to. + def create_replica_connection_pool(pool_size, host = nil, port = nil) + db_config = pool.db_config - def ensure_caching! - host.enable_query_cache! unless host.query_cache_enabled - end + env_config = db_config.configuration_hash.dup + env_config[:pool] = pool_size + env_config[:host] = host if host + env_config[:port] = port if port + + replica_db_config = ActiveRecord::DatabaseConfigurations::HashConfig.new( + db_config.env_name, + db_config.name + REPLICA_SUFFIX, + env_config + ) - def track_connection_role(connection, role) - @connection_db_roles[connection] = role - @connection_db_roles_count[connection] ||= 0 - @connection_db_roles_count[connection] += 1 + # We cannot use ActiveRecord::Base.connection_handler.establish_connection + # as it will rewrite ActiveRecord::Base.connection + ActiveRecord::ConnectionAdapters::ConnectionHandler + .new + .establish_connection(replica_db_config) end - def untrack_connection_role(connection) - return if connection.blank? || @connection_db_roles_count[connection].blank? + private - @connection_db_roles_count[connection] -= 1 - if @connection_db_roles_count[connection] <= 0 - @connection_db_roles.delete(connection) - @connection_db_roles_count.delete(connection) - end + # ActiveRecord::ConnectionAdapters::ConnectionHandler handles fetching, + # and caching for connections pools for each "connection", so we + # leverage that. + def pool + ActiveRecord::Base.connection_handler.retrieve_connection_pool( + @model.connection_specification_name, + role: ActiveRecord::Base.writing_role, + shard: ActiveRecord::Base.default_shard + ) + end + + def ensure_caching! + host.enable_query_cache! unless host.query_cache_enabled end - def current_host_list - RequestStore[VALID_HOSTS_CACHE_KEY] || @host_list + def request_cache + base = RequestStore[:gitlab_load_balancer] ||= {} + base[pool] ||= {} end end end diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb index 8e7e6865402..f8a31622b7d 100644 --- a/lib/gitlab/database/load_balancing/rack_middleware.rb +++ b/lib/gitlab/database/load_balancing/rack_middleware.rb @@ -18,9 +18,9 @@ module Gitlab # namespace - The namespace to use for sticking. # id - The identifier to use for sticking. def self.stick_or_unstick(env, namespace, id) - return unless LoadBalancing.enable? + return unless ::Gitlab::Database::LoadBalancing.enable? - Sticking.unstick_or_continue_sticking(namespace, id) + ::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(namespace, id) env[STICK_OBJECT] ||= Set.new env[STICK_OBJECT] << [namespace, id] @@ -56,7 +56,7 @@ module Gitlab namespaces_and_ids = sticking_namespaces_and_ids(env) namespaces_and_ids.each do |namespace, id| - Sticking.unstick_or_continue_sticking(namespace, id) + ::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(namespace, id) end end @@ -65,17 +65,17 @@ module Gitlab namespaces_and_ids = sticking_namespaces_and_ids(env) namespaces_and_ids.each do |namespace, id| - Sticking.stick_if_necessary(namespace, id) + ::Gitlab::Database::LoadBalancing::Sticking.stick_if_necessary(namespace, id) end end def clear load_balancer.release_host - Session.clear_session + ::Gitlab::Database::LoadBalancing::Session.clear_session end def load_balancer - LoadBalancing.proxy.load_balancer + ::Gitlab::Database::LoadBalancing.proxy.load_balancer end # Determines the sticking namespace and identifier based on the Rack diff --git a/lib/gitlab/database/load_balancing/service_discovery.rb b/lib/gitlab/database/load_balancing/service_discovery.rb index 9b42b25be1c..251961c8246 100644 --- a/lib/gitlab/database/load_balancing/service_discovery.rb +++ b/lib/gitlab/database/load_balancing/service_discovery.rb @@ -13,7 +13,8 @@ module Gitlab # balancer with said hosts. Requests may continue to use the old hosts # until they complete. class ServiceDiscovery - attr_reader :interval, :record, :record_type, :disconnect_timeout + attr_reader :interval, :record, :record_type, :disconnect_timeout, + :load_balancer MAX_SLEEP_ADJUSTMENT = 10 @@ -40,7 +41,17 @@ module Gitlab # disconnect_timeout - The time after which an old host should be # forcefully disconnected. # use_tcp - Use TCP instaed of UDP to look up resources - def initialize(nameserver:, port:, record:, record_type: 'A', interval: 60, disconnect_timeout: 120, use_tcp: false) + # load_balancer - The load balancer instance to use + def initialize( + nameserver:, + port:, + record:, + record_type: 'A', + interval: 60, + disconnect_timeout: 120, + use_tcp: false, + load_balancer: LoadBalancing.proxy.load_balancer + ) @nameserver = nameserver @port = port @record = record @@ -48,34 +59,36 @@ module Gitlab @interval = interval @disconnect_timeout = disconnect_timeout @use_tcp = use_tcp + @load_balancer = load_balancer end def start Thread.new do loop do - interval = - begin - refresh_if_necessary - rescue StandardError => error - # Any exceptions that might occur should be reported to - # Sentry, instead of silently terminating this thread. - Gitlab::ErrorTracking.track_exception(error) - - Gitlab::AppLogger.error( - "Service discovery encountered an error: #{error.message}" - ) - - self.interval - end + next_sleep_duration = perform_service_discovery # We slightly randomize the sleep() interval. This should reduce # the likelihood of _all_ processes refreshing at the same time, # possibly putting unnecessary pressure on the DNS server. - sleep(interval + rand(MAX_SLEEP_ADJUSTMENT)) + sleep(next_sleep_duration + rand(MAX_SLEEP_ADJUSTMENT)) end end end + def perform_service_discovery + refresh_if_necessary + rescue StandardError => error + # Any exceptions that might occur should be reported to + # Sentry, instead of silently terminating this thread. + Gitlab::ErrorTracking.track_exception(error) + + Gitlab::AppLogger.error( + "Service discovery encountered an error: #{error.message}" + ) + + interval + end + # Refreshes the hosts, but only if the DNS record returned a new list of # addresses. # @@ -108,7 +121,7 @@ module Gitlab # host/connection. While this connection will be checked in and out, # it won't be explicitly disconnected. old_hosts.each do |host| - host.disconnect!(disconnect_timeout) + host.disconnect!(timeout: disconnect_timeout) end end @@ -147,10 +160,6 @@ module Gitlab end.sort end - def load_balancer - LoadBalancing.proxy.load_balancer - end - def resolver @resolver ||= Net::DNS::Resolver.new( nameservers: Resolver.new(@nameserver).resolve, diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb index 8e1aa079216..20d42b9a694 100644 --- a/lib/gitlab/database/load_balancing/sticking.rb +++ b/lib/gitlab/database/load_balancing/sticking.rb @@ -53,14 +53,8 @@ module Gitlab # write location. If no such location exists, err on the side of caution. return false unless location - if ::Feature.enabled?(:load_balancing_refine_load_balancer_methods) - load_balancer.select_up_to_date_host(location).tap do |selected| - unstick(namespace, id) if selected - end - else - load_balancer.select_caught_up_hosts(location).tap do |selected| - unstick(namespace, id) if selected - end + load_balancer.select_up_to_date_host(location).tap do |selected| + unstick(namespace, id) if selected end end @@ -109,7 +103,7 @@ module Gitlab if LoadBalancing.enable? load_balancer.primary_write_location else - Gitlab::Database.get_write_location(ActiveRecord::Base.connection) + Gitlab::Database.main.get_write_location(ActiveRecord::Base.connection) end return if location.blank? diff --git a/lib/gitlab/database/metrics.rb b/lib/gitlab/database/metrics.rb new file mode 100644 index 00000000000..5dabbc81b9c --- /dev/null +++ b/lib/gitlab/database/metrics.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Gitlab + module Database + class Metrics + extend ::Gitlab::Utils::StrongMemoize + + class << self + def subtransactions_increment(model_name) + subtransactions_counter.increment(model: model_name) + end + + private + + def subtransactions_counter + strong_memoize(:subtransactions_counter) do + name = :gitlab_active_record_subtransactions_total + comment = 'Total amount of subtransactions created by ActiveRecord' + + ::Gitlab::Metrics.counter(name, comment) + end + end + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 842ab4f7b80..23d9b16dc09 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -6,6 +6,7 @@ module Gitlab include Migrations::BackgroundMigrationHelpers include DynamicModelHelpers include RenameTableHelpers + include AsyncIndexes::MigrationHelpers # https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS MAX_IDENTIFIER_NAME_LENGTH = 63 @@ -152,6 +153,9 @@ module Gitlab disable_statement_timeout do add_index(table_name, column_name, **options) end + + # We created this index. Now let's remove the queuing entry for async creation in case it's still there. + unprepare_async_index(table_name, column_name, **options) end # Removes an existed index, concurrently @@ -178,6 +182,9 @@ module Gitlab disable_statement_timeout do remove_index(table_name, **options.merge({ column: column_name })) end + + # We removed this index. Now let's make sure it's not queued for async creation. + unprepare_async_index(table_name, column_name, **options) end # Removes an existing index, concurrently @@ -208,6 +215,9 @@ module Gitlab disable_statement_timeout do remove_index(table_name, **options.merge({ name: index_name })) end + + # We removed this index. Now let's make sure it's not queued for async creation. + unprepare_async_index_by_name(table_name, index_name, **options) end # Adds a foreign key with only minimal locking on the tables involved. @@ -221,8 +231,13 @@ module Gitlab # on_delete - The action to perform when associated data is removed, # defaults to "CASCADE". # name - The name of the foreign key. + # validate - Flag that controls whether the new foreign key will be validated after creation. + # If the flag is not set, the constraint will only be enforced for new data. + # reverse_lock_order - Flag that controls whether we should attempt to acquire locks in the reverse + # order of the ALTER TABLE. This can be useful in situations where the foreign + # key creation could deadlock with another process. # - def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_column: :id, name: nil, validate: true) + def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_column: :id, name: nil, validate: true, reverse_lock_order: false) # Transactions would result in ALTER TABLE locks being held for the # duration of the transaction, defeating the purpose of this method. if transaction_open? @@ -250,6 +265,8 @@ module Gitlab # data. with_lock_retries do + execute("LOCK TABLE #{target}, #{source} IN SHARE ROW EXCLUSIVE MODE") if reverse_lock_order + execute <<-EOF.strip_heredoc ALTER TABLE #{source} ADD CONSTRAINT #{options[:name]} @@ -324,9 +341,9 @@ module Gitlab # - Per connection (requires a cleanup after the execution) # # When using a per connection disable statement, code must be inside - # a block so we can automatically execute `RESET ALL` after block finishes + # a block so we can automatically execute `RESET statement_timeout` after block finishes # otherwise the statement will still be disabled until connection is dropped - # or `RESET ALL` is executed + # or `RESET statement_timeout` is executed def disable_statement_timeout if block_given? if statement_timeout_disabled? @@ -340,7 +357,7 @@ module Gitlab yield ensure - execute('RESET ALL') + execute('RESET statement_timeout') end end else @@ -1248,8 +1265,8 @@ module Gitlab def check_trigger_permissions!(table) unless Grant.create_and_execute_trigger?(table) - dbname = Database.database_name - user = Database.username + dbname = Database.main.database_name + user = Database.main.username raise <<-EOF Your database user is not allowed to create, drop, or execute triggers on the @@ -1569,8 +1586,8 @@ into similar problems in the future (e.g. when new tables are created). def create_extension(extension) execute('CREATE EXTENSION IF NOT EXISTS %s' % extension) rescue ActiveRecord::StatementInvalid => e - dbname = Database.database_name - user = Database.username + dbname = Database.main.database_name + user = Database.main.username warn(<<~MSG) if e.to_s =~ /permission denied/ GitLab requires the PostgreSQL extension '#{extension}' installed in database '#{dbname}', but @@ -1597,8 +1614,8 @@ into similar problems in the future (e.g. when new tables are created). def drop_extension(extension) execute('DROP EXTENSION IF EXISTS %s' % extension) rescue ActiveRecord::StatementInvalid => e - dbname = Database.database_name - user = Database.username + dbname = Database.main.database_name + user = Database.main.username warn(<<~MSG) if e.to_s =~ /permission denied/ This migration attempts to drop the PostgreSQL extension '#{extension}' diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb index 28491a934a0..19d80ba1d64 100644 --- a/lib/gitlab/database/migrations/background_migration_helpers.rb +++ b/lib/gitlab/database/migrations/background_migration_helpers.rb @@ -264,6 +264,34 @@ module Gitlab migration end + # Force a background migration to complete. + # + # WARNING: This method will block the caller and move the background migration from an + # asynchronous migration to a synchronous migration. + # + # 1. Steal work from sidekiq and perform immediately (avoid duplicates generated by step 2). + # 2. Process any pending tracked jobs. + # 3. Steal work from sidekiq and perform immediately (clear anything left from step 2). + # 4. Optionally remove job tracking information. + # + # This method does not garauntee that all jobs completed successfully. + def finalize_background_migration(class_name, delete_tracking_jobs: ['succeeded']) + # Empty the sidekiq queue. + Gitlab::BackgroundMigration.steal(class_name) + + # Process pending tracked jobs. + jobs = Gitlab::Database::BackgroundMigrationJob.pending.for_migration_class(class_name) + jobs.find_each do |job| + BackgroundMigrationWorker.new.perform(job.class_name, job.arguments) + end + + # Empty the sidekiq queue. + Gitlab::BackgroundMigration.steal(class_name) + + # Delete job tracking rows. + delete_job_tracking(class_name, status: delete_tracking_jobs) if delete_tracking_jobs + end + def perform_background_migration_inline? Rails.env.test? || Rails.env.development? end @@ -304,6 +332,12 @@ module Gitlab end end + def delete_job_tracking(class_name, status: 'succeeded') + status = Array(status).map { |s| Gitlab::Database::BackgroundMigrationJob.statuses[s] } + jobs = Gitlab::Database::BackgroundMigrationJob.where(status: status).for_migration_class(class_name) + jobs.each_batch { |batch| batch.delete_all } + end + private def track_in_database(class_name, arguments) diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb index e9ef80d5198..d1e55eb825c 100644 --- a/lib/gitlab/database/migrations/instrumentation.rb +++ b/lib/gitlab/database/migrations/instrumentation.rb @@ -9,18 +9,20 @@ module Gitlab attr_reader :observations - def initialize(observers = ::Gitlab::Database::Migrations::Observers.all_observers) - @observers = observers + def initialize(observer_classes = ::Gitlab::Database::Migrations::Observers.all_observers) + @observer_classes = observer_classes @observations = [] end - def observe(migration, &block) - observation = Observation.new(migration) + def observe(version:, name:, &block) + observation = Observation.new(version, name) observation.success = true + observers = observer_classes.map { |c| c.new(observation) } + exception = nil - on_each_observer { |observer| observer.before } + on_each_observer(observers) { |observer| observer.before } observation.walltime = Benchmark.realtime do yield @@ -29,8 +31,8 @@ module Gitlab observation.success = false end - on_each_observer { |observer| observer.after } - on_each_observer { |observer| observer.record(observation) } + on_each_observer(observers) { |observer| observer.after } + on_each_observer(observers) { |observer| observer.record } record_observation(observation) @@ -41,13 +43,13 @@ module Gitlab private - attr_reader :observers + attr_reader :observer_classes def record_observation(observation) @observations << observation end - def on_each_observer(&block) + def on_each_observer(observers, &block) observers.each do |observer| yield observer rescue StandardError => e diff --git a/lib/gitlab/database/migrations/observation.rb b/lib/gitlab/database/migrations/observation.rb index 046843824a4..54eedec3c7b 100644 --- a/lib/gitlab/database/migrations/observation.rb +++ b/lib/gitlab/database/migrations/observation.rb @@ -4,7 +4,8 @@ module Gitlab module Database module Migrations Observation = Struct.new( - :migration, + :version, + :name, :walltime, :success, :total_database_size_change, diff --git a/lib/gitlab/database/migrations/observers.rb b/lib/gitlab/database/migrations/observers.rb index 979a098d699..140b3feed64 100644 --- a/lib/gitlab/database/migrations/observers.rb +++ b/lib/gitlab/database/migrations/observers.rb @@ -6,10 +6,10 @@ module Gitlab module Observers def self.all_observers [ - TotalDatabaseSizeChange.new, - QueryStatistics.new, - QueryLog.new, - QueryDetails.new + TotalDatabaseSizeChange, + QueryStatistics, + QueryLog, + QueryDetails ] end end diff --git a/lib/gitlab/database/migrations/observers/migration_observer.rb b/lib/gitlab/database/migrations/observers/migration_observer.rb index 9bfbf35887d..85d18abb9ef 100644 --- a/lib/gitlab/database/migrations/observers/migration_observer.rb +++ b/lib/gitlab/database/migrations/observers/migration_observer.rb @@ -5,10 +5,11 @@ module Gitlab module Migrations module Observers class MigrationObserver - attr_reader :connection + attr_reader :connection, :observation - def initialize + def initialize(observation) @connection = ActiveRecord::Base.connection + @observation = observation end def before @@ -19,7 +20,7 @@ module Gitlab # implement in subclass end - def record(observation) + def record raise NotImplementedError, 'implement in subclass' end end diff --git a/lib/gitlab/database/migrations/observers/query_details.rb b/lib/gitlab/database/migrations/observers/query_details.rb index 52b6464d449..dadacd2d2fc 100644 --- a/lib/gitlab/database/migrations/observers/query_details.rb +++ b/lib/gitlab/database/migrations/observers/query_details.rb @@ -6,8 +6,8 @@ module Gitlab module Observers class QueryDetails < MigrationObserver def before - @file_path = File.join(Instrumentation::RESULT_DIR, 'current-details.json') - @file = File.open(@file_path, 'wb') + file_path = File.join(Instrumentation::RESULT_DIR, "#{observation.version}_#{observation.name}-query-details.json") + @file = File.open(file_path, 'wb') @writer = Oj::StreamWriter.new(@file, {}) @writer.push_array @subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |*args| @@ -22,8 +22,8 @@ module Gitlab @file.close end - def record(observation) - File.rename(@file_path, File.join(Instrumentation::RESULT_DIR, "#{observation.migration}-query-details.json")) + def record + # no-op end def record_sql_event(_name, started, finished, _unique_id, payload) diff --git a/lib/gitlab/database/migrations/observers/query_log.rb b/lib/gitlab/database/migrations/observers/query_log.rb index 45df07fe391..e15d733d2a2 100644 --- a/lib/gitlab/database/migrations/observers/query_log.rb +++ b/lib/gitlab/database/migrations/observers/query_log.rb @@ -7,8 +7,8 @@ module Gitlab class QueryLog < MigrationObserver def before @logger_was = ActiveRecord::Base.logger - @log_file_path = File.join(Instrumentation::RESULT_DIR, 'current.log') - @logger = Logger.new(@log_file_path) + file_path = File.join(Instrumentation::RESULT_DIR, "#{observation.version}_#{observation.name}.log") + @logger = Logger.new(file_path) ActiveRecord::Base.logger = @logger end @@ -17,8 +17,8 @@ module Gitlab @logger.close end - def record(observation) - File.rename(@log_file_path, File.join(Instrumentation::RESULT_DIR, "#{observation.migration}.log")) + def record + # no-op end end end diff --git a/lib/gitlab/database/migrations/observers/query_statistics.rb b/lib/gitlab/database/migrations/observers/query_statistics.rb index 466f4724256..54504646a79 100644 --- a/lib/gitlab/database/migrations/observers/query_statistics.rb +++ b/lib/gitlab/database/migrations/observers/query_statistics.rb @@ -16,7 +16,7 @@ module Gitlab connection.execute('select pg_stat_statements_reset()') end - def record(observation) + def record return unless enabled? observation.query_statistics = connection.execute(<<~SQL) diff --git a/lib/gitlab/database/migrations/observers/total_database_size_change.rb b/lib/gitlab/database/migrations/observers/total_database_size_change.rb index 0b76b0bef5e..2e89498b79f 100644 --- a/lib/gitlab/database/migrations/observers/total_database_size_change.rb +++ b/lib/gitlab/database/migrations/observers/total_database_size_change.rb @@ -13,7 +13,7 @@ module Gitlab @size_after = get_total_database_size end - def record(observation) + def record return unless @size_after && @size_before observation.total_database_size_change = @size_after - @size_before diff --git a/lib/gitlab/database/multi_threaded_migration.rb b/lib/gitlab/database/multi_threaded_migration.rb deleted file mode 100644 index 65a6cb8e369..00000000000 --- a/lib/gitlab/database/multi_threaded_migration.rb +++ /dev/null @@ -1,52 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - module MultiThreadedMigration - MULTI_THREAD_AR_CONNECTION = :thread_local_ar_connection - - # This overwrites the default connection method so that every thread can - # use a thread-local connection, while still supporting all of Rails' - # migration methods. - def connection - Thread.current[MULTI_THREAD_AR_CONNECTION] || - ActiveRecord::Base.connection - end - - # Starts a thread-pool for N threads, along with N threads each using a - # single connection. The provided block is yielded from inside each - # thread. - # - # Example: - # - # with_multiple_threads(4) do - # execute('SELECT ...') - # end - # - # thread_count - The number of threads to start. - # - # join - When set to true this method will join the threads, blocking the - # caller until all threads have finished running. - # - # Returns an Array containing the started threads. - def with_multiple_threads(thread_count, join: true) - pool = Gitlab::Database.create_connection_pool(thread_count) - - threads = Array.new(thread_count) do - Thread.new do - pool.with_connection do |connection| - Thread.current[MULTI_THREAD_AR_CONNECTION] = connection - yield - ensure - Thread.current[MULTI_THREAD_AR_CONNECTION] = nil - end - end - end - - threads.each(&:join) if join - - threads - end - end - end -end diff --git a/lib/gitlab/database/partitioning/detached_partition_dropper.rb b/lib/gitlab/database/partitioning/detached_partition_dropper.rb new file mode 100644 index 00000000000..dc63d93fd07 --- /dev/null +++ b/lib/gitlab/database/partitioning/detached_partition_dropper.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true +module Gitlab + module Database + module Partitioning + class DetachedPartitionDropper + def perform + return unless Feature.enabled?(:drop_detached_partitions, default_enabled: :yaml) + + Gitlab::AppLogger.info(message: "Checking for previously detached partitions to drop") + Postgresql::DetachedPartition.ready_to_drop.find_each do |detached_partition| + conn.transaction do + # Another process may have already dropped the table and deleted this entry + next unless (detached_partition = Postgresql::DetachedPartition.lock.find_by(id: detached_partition.id)) + + unless check_partition_detached?(detached_partition) + Gitlab::AppLogger.error(message: "Attempt to drop attached database partition", partition_name: detached_partition.table_name) + detached_partition.destroy! + next + end + + drop_one(detached_partition) + end + rescue StandardError => e + Gitlab::AppLogger.error(message: "Failed to drop previously detached partition", + partition_name: detached_partition.table_name, + exception_class: e.class, + exception_message: e.message) + end + end + + private + + def drop_one(detached_partition) + conn.transaction do + conn.execute(<<~SQL) + DROP TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{conn.quote_table_name(detached_partition.table_name)} + SQL + + detached_partition.destroy! + end + Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: detached_partition.table_name) + end + + def check_partition_detached?(detached_partition) + # PostgresPartition checks the pg_inherits view, so our partition will only show here if it's still attached + # and thus should not be dropped + !PostgresPartition.for_identifier("#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{detached_partition.table_name}").exists? + end + + def conn + @conn ||= ApplicationRecord.connection + end + end + end + end +end diff --git a/lib/gitlab/database/partitioning/monthly_strategy.rb b/lib/gitlab/database/partitioning/monthly_strategy.rb index 4c68399cb68..7992c2fdaa7 100644 --- a/lib/gitlab/database/partitioning/monthly_strategy.rb +++ b/lib/gitlab/database/partitioning/monthly_strategy.rb @@ -86,7 +86,7 @@ module Gitlab end def pruning_old_partitions? - Feature.enabled?(:partition_pruning_dry_run) && retain_for.present? + retain_for.present? end def oldest_active_date diff --git a/lib/gitlab/database/partitioning/partition_manager.rb b/lib/gitlab/database/partitioning/partition_manager.rb index c2a9422a42a..7e433ecdd39 100644 --- a/lib/gitlab/database/partitioning/partition_manager.rb +++ b/lib/gitlab/database/partitioning/partition_manager.rb @@ -4,6 +4,8 @@ module Gitlab module Database module Partitioning class PartitionManager + UnsafeToDetachPartitionError = Class.new(StandardError) + def self.register(model) raise ArgumentError, "Only models with a #partitioning_strategy can be registered." unless model.respond_to?(:partitioning_strategy) @@ -16,6 +18,7 @@ module Gitlab LEASE_TIMEOUT = 1.minute MANAGEMENT_LEASE_KEY = 'database_partition_management_%s' + RETAIN_DETACHED_PARTITIONS_FOR = 1.week attr_reader :models @@ -35,13 +38,16 @@ module Gitlab partitions_to_create = missing_partitions(model) create(partitions_to_create) unless partitions_to_create.empty? - if Feature.enabled?(:partition_pruning_dry_run) + if Feature.enabled?(:partition_pruning, default_enabled: :yaml) partitions_to_detach = extra_partitions(model) detach(partitions_to_detach) unless partitions_to_detach.empty? end end rescue StandardError => e - Gitlab::AppLogger.error("Failed to create / detach partition(s) for #{model.table_name}: #{e.class}: #{e.message}") + Gitlab::AppLogger.error(message: "Failed to create / detach partition(s)", + table_name: model.table_name, + exception_class: e.class, + exception_message: e.message) end end @@ -54,7 +60,6 @@ module Gitlab end def extra_partitions(model) - return [] unless Feature.enabled?(:partition_pruning_dry_run) return [] unless connection.table_exists?(model.table_name) model.partitioning_strategy.extra_partitions @@ -74,7 +79,9 @@ module Gitlab partitions.each do |partition| connection.execute partition.to_sql - Gitlab::AppLogger.info("Created partition #{partition.partition_name} for table #{partition.table}") + Gitlab::AppLogger.info(message: "Created partition", + partition_name: partition.partition_name, + table_name: partition.table) end end end @@ -89,7 +96,24 @@ module Gitlab end def detach_one_partition(partition) - Gitlab::AppLogger.info("Planning to detach #{partition.partition_name} for table #{partition.table}") + assert_partition_detachable!(partition) + + connection.execute partition.to_detach_sql + + Postgresql::DetachedPartition.create!(table_name: partition.partition_name, + drop_after: RETAIN_DETACHED_PARTITIONS_FOR.from_now) + + Gitlab::AppLogger.info(message: "Detached Partition", + partition_name: partition.partition_name, + table_name: partition.table) + end + + def assert_partition_detachable!(partition) + parent_table_identifier = "#{connection.current_schema}.#{partition.table}" + + if (example_fk = PostgresForeignKey.by_referenced_table_identifier(parent_table_identifier).first) + raise UnsafeToDetachPartitionError, "Cannot detach #{partition.partition_name}, it would block while checking foreign key #{example_fk.name} on #{example_fk.constrained_table_identifier}" + end end def with_lock_retries(&block) diff --git a/lib/gitlab/database/partitioning/partition_monitoring.rb b/lib/gitlab/database/partitioning/partition_monitoring.rb index ad122fd47fe..6963ecd2cc1 100644 --- a/lib/gitlab/database/partitioning/partition_monitoring.rb +++ b/lib/gitlab/database/partitioning/partition_monitoring.rb @@ -16,6 +16,7 @@ module Gitlab gauge_present.set({ table: model.table_name }, strategy.current_partitions.size) gauge_missing.set({ table: model.table_name }, strategy.missing_partitions.size) + gauge_extra.set({ table: model.table_name }, strategy.extra_partitions.size) end end @@ -28,6 +29,10 @@ module Gitlab def gauge_missing @gauge_missing ||= Gitlab::Metrics.gauge(:db_partitions_missing, 'Number of database partitions currently expected, but not present') end + + def gauge_extra + @gauge_extra ||= Gitlab::Metrics.gauge(:db_partitions_extra, 'Number of database partitions currently attached to tables, but outside of their retention window and scheduled to be dropped') + end end end end diff --git a/lib/gitlab/database/partitioning/time_partition.rb b/lib/gitlab/database/partitioning/time_partition.rb index 7dca60c0854..1221f042530 100644 --- a/lib/gitlab/database/partitioning/time_partition.rb +++ b/lib/gitlab/database/partitioning/time_partition.rb @@ -47,6 +47,13 @@ module Gitlab SQL end + def to_detach_sql + <<~SQL + ALTER TABLE #{conn.quote_table_name(table)} + DETACH PARTITION #{fully_qualified_partition} + SQL + end + def ==(other) table == other.table && partition_name == other.partition_name && from == other.from && to == other.to end diff --git a/lib/gitlab/database/postgres_foreign_key.rb b/lib/gitlab/database/postgres_foreign_key.rb new file mode 100644 index 00000000000..94f74724295 --- /dev/null +++ b/lib/gitlab/database/postgres_foreign_key.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Database + class PostgresForeignKey < ApplicationRecord + self.primary_key = :oid + + scope :by_referenced_table_identifier, ->(identifier) do + raise ArgumentError, "Referenced table name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/ + + where(referenced_table_identifier: identifier) + end + end + end +end diff --git a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb index 580cab5622d..2e3f674cf82 100644 --- a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb +++ b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb @@ -57,7 +57,7 @@ module Gitlab # @param finish final pkey range # @return [Gitlab::Database::PostgresHll::Buckets] HyperLogLog data structure instance that can estimate number of unique elements def execute(batch_size: nil, start: nil, finish: nil) - raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? + raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? # rubocop: disable Database/MultipleDatabases batch_size ||= DEFAULT_BATCH_SIZE start = actual_start(start) diff --git a/lib/gitlab/database/postgres_index.rb b/lib/gitlab/database/postgres_index.rb index 58e4e7e7924..1079bfdeda3 100644 --- a/lib/gitlab/database/postgres_index.rb +++ b/lib/gitlab/database/postgres_index.rb @@ -19,7 +19,12 @@ module Gitlab end # Indexes with reindexing support - scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) } + scope :reindexing_support, -> do + where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) + .not_match("#{Gitlab::Database::Reindexing::ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$") + end + + scope :reindexing_leftovers, -> { match("#{Gitlab::Database::Reindexing::ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$") } scope :not_match, ->(regex) { where("name !~ ?", regex) } diff --git a/lib/gitlab/database/postgres_partition.rb b/lib/gitlab/database/postgres_partition.rb index 0986372586b..7da60d8375d 100644 --- a/lib/gitlab/database/postgres_partition.rb +++ b/lib/gitlab/database/postgres_partition.rb @@ -7,10 +7,14 @@ module Gitlab belongs_to :postgres_partitioned_table, foreign_key: 'parent_identifier', primary_key: 'identifier' - scope :by_identifier, ->(identifier) do + scope :for_identifier, ->(identifier) do raise ArgumentError, "Partition name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/ - find(identifier) + where(primary_key => identifier) + end + + scope :by_identifier, ->(identifier) do + for_identifier(identifier).first! end scope :for_parent_table, ->(name) { where("parent_identifier = concat(current_schema(), '.', ?)", name).order(:name) } diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb index 841e04ccbd1..04b409a9306 100644 --- a/lib/gitlab/database/reindexing.rb +++ b/lib/gitlab/database/reindexing.rb @@ -8,6 +8,13 @@ module Gitlab SUPPORTED_TYPES = %w(btree gist).freeze + # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock, + # which only conflicts with DDL and vacuum. We therefore execute this with a rather + # high lock timeout and a long pause in between retries. This is an alternative to + # setting a high statement timeout, which would lead to a long running query with effects + # on e.g. vacuum. + REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30 + # candidate_indexes: Array of Gitlab::Database::PostgresIndex def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION) IndexSelection.new(candidate_indexes).take(how_many).each do |index| @@ -15,10 +22,22 @@ module Gitlab end end - def self.candidate_indexes - Gitlab::Database::PostgresIndex - .not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$") - .reindexing_support + def self.cleanup_leftovers! + PostgresIndex.reindexing_leftovers.each do |index| + Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity") + + retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( + timing_configuration: REMOVE_INDEX_RETRY_CONFIG, + klass: self.class, + logger: Gitlab::AppLogger + ) + + retries.run(raise_on_exhaustion: false) do + ApplicationRecord.connection.tap do |conn| + conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}") + end + end + end end end end diff --git a/lib/gitlab/database/reindexing/reindex_concurrently.rb b/lib/gitlab/database/reindexing/reindex_concurrently.rb index 8d9f9f5abdd..7a720f7c539 100644 --- a/lib/gitlab/database/reindexing/reindex_concurrently.rb +++ b/lib/gitlab/database/reindexing/reindex_concurrently.rb @@ -11,13 +11,6 @@ module Gitlab STATEMENT_TIMEOUT = 9.hours PG_MAX_INDEX_NAME_LENGTH = 63 - # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock, - # which only conflicts with DDL and vacuum. We therefore execute this with a rather - # high lock timeout and a long pause in between retries. This is an alternative to - # setting a high statement timeout, which would lead to a long running query with effects - # on e.g. vacuum. - REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30 - attr_reader :index, :logger def initialize(index, logger: Gitlab::AppLogger) diff --git a/lib/gitlab/database/schema_migrations/context.rb b/lib/gitlab/database/schema_migrations/context.rb index bd8b9bed2c1..35105121bbd 100644 --- a/lib/gitlab/database/schema_migrations/context.rb +++ b/lib/gitlab/database/schema_migrations/context.rb @@ -6,17 +6,14 @@ module Gitlab class Context attr_reader :connection + DEFAULT_SCHEMA_MIGRATIONS_PATH = "db/schema_migrations" + def initialize(connection) @connection = connection end def schema_directory - @schema_directory ||= - if ActiveRecord::Base.configurations.primary?(database_name) - File.join(db_dir, 'schema_migrations') - else - File.join(db_dir, "#{database_name}_schema_migrations") - end + @schema_directory ||= Rails.root.join(database_schema_migrations_path).to_s end def versions_to_create @@ -32,8 +29,8 @@ module Gitlab @database_name ||= @connection.pool.db_config.name end - def db_dir - @db_dir ||= Rails.application.config.paths["db"].first + def database_schema_migrations_path + @connection.pool.db_config.configuration_hash[:schema_migrations_path] || DEFAULT_SCHEMA_MIGRATIONS_PATH end end end diff --git a/lib/gitlab/database/similarity_score.rb b/lib/gitlab/database/similarity_score.rb index 20bf6fa4d30..bb8b9f333fe 100644 --- a/lib/gitlab/database/similarity_score.rb +++ b/lib/gitlab/database/similarity_score.rb @@ -67,7 +67,7 @@ module Gitlab def self.build_expression(search:, rules:) return EXPRESSION_ON_INVALID_INPUT if search.blank? || rules.empty? - quoted_search = ActiveRecord::Base.connection.quote(search.to_s) + quoted_search = ApplicationRecord.connection.quote(search.to_s) first_expression, *expressions = rules.map do |rule| rule_to_arel(quoted_search, rule) @@ -110,7 +110,7 @@ module Gitlab # CAST(multiplier AS numeric) def self.multiplier_expression(rule) - quoted_multiplier = ActiveRecord::Base.connection.quote(rule.fetch(:multiplier, DEFAULT_MULTIPLIER).to_s) + quoted_multiplier = ApplicationRecord.connection.quote(rule.fetch(:multiplier, DEFAULT_MULTIPLIER).to_s) Arel::Nodes::NamedFunction.new('CAST', [Arel.sql(quoted_multiplier).as('numeric')]) end diff --git a/lib/gitlab/database/transaction/context.rb b/lib/gitlab/database/transaction/context.rb new file mode 100644 index 00000000000..a50dd30b75b --- /dev/null +++ b/lib/gitlab/database/transaction/context.rb @@ -0,0 +1,125 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Transaction + class Context + attr_reader :context + + LOG_DEPTH_THRESHOLD = 8 + LOG_SAVEPOINTS_THRESHOLD = 32 + LOG_DURATION_S_THRESHOLD = 300 + LOG_THROTTLE_DURATION = 1 + + def initialize + @context = {} + end + + def set_start_time + @context[:start_time] = current_timestamp + end + + def increment_savepoints + @context[:savepoints] = @context[:savepoints].to_i + 1 + end + + def increment_rollbacks + @context[:rollbacks] = @context[:rollbacks].to_i + 1 + end + + def increment_releases + @context[:releases] = @context[:releases].to_i + 1 + end + + def set_depth(depth) + @context[:depth] = [@context[:depth].to_i, depth].max + end + + def track_sql(sql) + (@context[:queries] ||= []).push(sql) + end + + def duration + return unless @context[:start_time].present? + + current_timestamp - @context[:start_time] + end + + def depth_threshold_exceeded? + @context[:depth].to_i > LOG_DEPTH_THRESHOLD + end + + def savepoints_threshold_exceeded? + @context[:savepoints].to_i > LOG_SAVEPOINTS_THRESHOLD + end + + def duration_threshold_exceeded? + duration.to_i > LOG_DURATION_S_THRESHOLD + end + + def log_savepoints? + depth_threshold_exceeded? || savepoints_threshold_exceeded? + end + + def log_duration? + duration_threshold_exceeded? + end + + def should_log? + !logged_already? && (log_savepoints? || log_duration?) + end + + def commit + log(:commit) + end + + def rollback + log(:rollback) + end + + private + + def queries + @context[:queries].to_a.join("\n") + end + + def current_timestamp + ::Gitlab::Metrics::System.monotonic_time + end + + def logged_already? + return false if @context[:last_log_timestamp].nil? + + (current_timestamp - @context[:last_log_timestamp].to_i) < LOG_THROTTLE_DURATION + end + + def set_last_log_timestamp + @context[:last_log_timestamp] = current_timestamp + end + + def log(operation) + return unless should_log? + + set_last_log_timestamp + + attributes = { + class: self.class.name, + result: operation, + duration_s: duration, + depth: @context[:depth].to_i, + savepoints_count: @context[:savepoints].to_i, + rollbacks_count: @context[:rollbacks].to_i, + releases_count: @context[:releases].to_i, + sql: queries + } + + application_info(attributes) + end + + def application_info(attributes) + Gitlab::AppJsonLogger.info(attributes) + end + end + end + end +end diff --git a/lib/gitlab/database/transaction/observer.rb b/lib/gitlab/database/transaction/observer.rb new file mode 100644 index 00000000000..7888f0916e3 --- /dev/null +++ b/lib/gitlab/database/transaction/observer.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Transaction + class Observer + INSTRUMENTED_STATEMENTS = %w[BEGIN SAVEPOINT ROLLBACK RELEASE].freeze + LONGEST_COMMAND_LENGTH = 'ROLLBACK TO SAVEPOINT'.length + START_COMMENT = '/*' + END_COMMENT = '*/' + + def self.instrument_transactions(cmd, event) + connection = event.payload[:connection] + manager = connection&.transaction_manager + return unless manager.respond_to?(:transaction_context) + + context = manager.transaction_context + return if context.nil? + + if cmd.start_with?('BEGIN') + context.set_start_time + context.set_depth(0) + context.track_sql(event.payload[:sql]) + elsif cmd.start_with?('SAVEPOINT ') + context.set_depth(manager.open_transactions) + context.increment_savepoints + elsif cmd.start_with?('ROLLBACK TO SAVEPOINT') + context.increment_rollbacks + elsif cmd.start_with?('RELEASE SAVEPOINT ') + context.increment_releases + end + end + + def self.register! + ActiveSupport::Notifications.subscribe('sql.active_record') do |event| + sql = event.payload.dig(:sql).to_s + cmd = extract_sql_command(sql) + + if cmd.start_with?(*INSTRUMENTED_STATEMENTS) + self.instrument_transactions(cmd, event) + end + end + end + + def self.extract_sql_command(sql) + return sql unless sql.start_with?(START_COMMENT) + + index = sql.index(END_COMMENT) + + return sql unless index + + # /* comment */ SELECT + # + # We offset using a position of the end comment + 1 character to + # accomodate a space between Marginalia comment and a SQL statement. + offset = index + END_COMMENT.length + 1 + + # Avoid duplicating the entire string. This isn't optimized to + # strip extra spaces, but we assume that this doesn't happen + # for performance reasons. + sql[offset..offset + LONGEST_COMMAND_LENGTH] + end + end + end + end +end |