diff options
Diffstat (limited to 'lib/gitlab/database')
29 files changed, 307 insertions, 142 deletions
diff --git a/lib/gitlab/database/as_with_materialized.rb b/lib/gitlab/database/as_with_materialized.rb index a04ea97117d..417e9f211b9 100644 --- a/lib/gitlab/database/as_with_materialized.rb +++ b/lib/gitlab/database/as_with_materialized.rb @@ -25,7 +25,7 @@ module Gitlab # Note: to be deleted after the minimum PG version is set to 12.0 # Update the documentation together when deleting the method - # https://docs.gitlab.com/ee/development/merge_request_performance_guidelines.html#use-ctes-wisely + # https://docs.gitlab.com/ee/development/merge_request_concepts/performance.html#use-ctes-wisely def self.materialized_if_supported materialized_supported? ? 'MATERIALIZED' : '' end diff --git a/lib/gitlab/database/async_indexes/index_creator.rb b/lib/gitlab/database/async_indexes/index_creator.rb index 2fb4cc8f675..3ae2bb7b3e5 100644 --- a/lib/gitlab/database/async_indexes/index_creator.rb +++ b/lib/gitlab/database/async_indexes/index_creator.rb @@ -4,10 +4,10 @@ module Gitlab module Database module AsyncIndexes class IndexCreator - include ExclusiveLeaseGuard + include IndexingExclusiveLeaseGuard TIMEOUT_PER_ACTION = 1.day - STATEMENT_TIMEOUT = 9.hours + STATEMENT_TIMEOUT = 20.hours def initialize(async_index) @async_index = async_index @@ -47,10 +47,6 @@ module Gitlab TIMEOUT_PER_ACTION end - def lease_key - [super, async_index.connection_db_config.name].join('/') - end - def set_statement_timeout connection.execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT) yield diff --git a/lib/gitlab/database/async_indexes/index_destructor.rb b/lib/gitlab/database/async_indexes/index_destructor.rb index fe05872b87a..66955df9d04 100644 --- a/lib/gitlab/database/async_indexes/index_destructor.rb +++ b/lib/gitlab/database/async_indexes/index_destructor.rb @@ -4,7 +4,7 @@ module Gitlab module Database module AsyncIndexes class IndexDestructor - include ExclusiveLeaseGuard + include IndexingExclusiveLeaseGuard TIMEOUT_PER_ACTION = 1.day @@ -53,10 +53,6 @@ module Gitlab TIMEOUT_PER_ACTION end - def lease_key - [super, async_index.connection_db_config.name].join('/') - end - def log_index_info(message) Gitlab::AppLogger.info(message: message, table_name: async_index.table_name, index_name: async_index.name) end diff --git a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb index ad747a8131d..f1fc3efae9e 100644 --- a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb +++ b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb @@ -49,6 +49,8 @@ module Gitlab def execute_job(tracking_record) job_class = tracking_record.migration_job_class + ApplicationContext.push(feature_category: fetch_feature_category(job_class)) + if job_class < Gitlab::BackgroundMigration::BatchedMigrationJob execute_batched_migration_job(job_class, tracking_record) else @@ -86,6 +88,14 @@ module Gitlab job_instance end + + def fetch_feature_category(job_class) + if job_class.respond_to?(:feature_category) + job_class.feature_category.to_s + else + Gitlab::BackgroundMigration::BatchedMigrationJob::DEFAULT_FEATURE_CATEGORY + end + end end end end diff --git a/lib/gitlab/database/gitlab_schema.rb b/lib/gitlab/database/gitlab_schema.rb index 0f848ed40fb..38558512b6a 100644 --- a/lib/gitlab/database/gitlab_schema.rb +++ b/lib/gitlab/database/gitlab_schema.rb @@ -15,46 +15,12 @@ module Gitlab module Database module GitlabSchema + UnknownSchemaError = Class.new(StandardError) + DICTIONARY_PATH = 'db/docs/' - # These tables are deleted/renamed, but still referenced by migrations. - # This is needed for now, but should be removed in the future - DELETED_TABLES = { - # main tables - 'alerts_service_data' => :gitlab_main, - 'analytics_devops_adoption_segment_selections' => :gitlab_main, - 'analytics_repository_file_commits' => :gitlab_main, - 'analytics_repository_file_edits' => :gitlab_main, - 'analytics_repository_files' => :gitlab_main, - 'audit_events_archived' => :gitlab_main, - 'backup_labels' => :gitlab_main, - 'clusters_applications_fluentd' => :gitlab_main, - 'forked_project_links' => :gitlab_main, - 'issue_milestones' => :gitlab_main, - 'merge_request_milestones' => :gitlab_main, - 'namespace_onboarding_actions' => :gitlab_main, - 'services' => :gitlab_main, - 'terraform_state_registry' => :gitlab_main, - 'tmp_fingerprint_sha256_migration' => :gitlab_main, # used by lib/gitlab/background_migration/migrate_fingerprint_sha256_within_keys.rb - 'web_hook_logs_archived' => :gitlab_main, - 'vulnerability_export_registry' => :gitlab_main, - 'vulnerability_finding_fingerprints' => :gitlab_main, - 'vulnerability_export_verification_status' => :gitlab_main, - - # CI tables - 'ci_build_trace_sections' => :gitlab_ci, - 'ci_build_trace_section_names' => :gitlab_ci, - 'ci_daily_report_results' => :gitlab_ci, - 'ci_test_cases' => :gitlab_ci, - 'ci_test_case_failures' => :gitlab_ci, - - # leftovers from early implementation of partitioning - 'audit_events_part_5fc467ac26' => :gitlab_main, - 'web_hook_logs_part_0c5294f417' => :gitlab_main - }.freeze - - def self.table_schemas(tables) - tables.map { |table| table_schema(table) }.to_set + def self.table_schemas(tables, undefined: true) + tables.map { |table| table_schema(table, undefined: undefined) }.to_set end def self.table_schema(name, undefined: true) @@ -69,13 +35,13 @@ module Gitlab # strip partition number of a form `loose_foreign_keys_deleted_records_1` table_name.gsub!(/_[0-9]+$/, '') - # Tables that are properly mapped + # Tables and views that are properly mapped if gitlab_schema = views_and_tables_to_schema[table_name] return gitlab_schema end - # Tables that are deleted, but we still need to reference them - if gitlab_schema = DELETED_TABLES[table_name] + # Tables and views that are deleted, but we still need to reference them + if gitlab_schema = deleted_views_and_tables_to_schema[table_name] return gitlab_schema end @@ -106,29 +72,58 @@ module Gitlab [Rails.root.join(DICTIONARY_PATH, 'views', '*.yml')] end + def self.deleted_views_path_globs + [Rails.root.join(DICTIONARY_PATH, 'deleted_views', '*.yml')] + end + + def self.deleted_tables_path_globs + [Rails.root.join(DICTIONARY_PATH, 'deleted_tables', '*.yml')] + end + def self.views_and_tables_to_schema @views_and_tables_to_schema ||= self.tables_to_schema.merge(self.views_to_schema) end - def self.tables_to_schema - @tables_to_schema ||= Dir.glob(self.dictionary_path_globs).each_with_object({}) do |file_path, dic| - data = YAML.load_file(file_path) + def self.table_schema!(name) + self.table_schema(name, undefined: false) || raise( + UnknownSchemaError, + "Could not find gitlab schema for table #{name}: Any new tables must be added to the database dictionary" + ) + end - dic[data['table_name']] = data['gitlab_schema'].to_sym - end + def self.deleted_views_and_tables_to_schema + @deleted_views_and_tables_to_schema ||= self.deleted_tables_to_schema.merge(self.deleted_views_to_schema) end - def self.views_to_schema - @views_to_schema ||= Dir.glob(self.view_path_globs).each_with_object({}) do |file_path, dic| - data = YAML.load_file(file_path) + def self.deleted_tables_to_schema + @deleted_tables_to_schema ||= self.build_dictionary(self.deleted_tables_path_globs) + end - dic[data['view_name']] = data['gitlab_schema'].to_sym - end + def self.deleted_views_to_schema + @deleted_views_to_schema ||= self.build_dictionary(self.deleted_views_path_globs) + end + + def self.tables_to_schema + @tables_to_schema ||= self.build_dictionary(self.dictionary_path_globs) + end + + def self.views_to_schema + @views_to_schema ||= self.build_dictionary(self.view_path_globs) end def self.schema_names @schema_names ||= self.views_and_tables_to_schema.values.to_set end + + private_class_method def self.build_dictionary(path_globs) + Dir.glob(path_globs).each_with_object({}) do |file_path, dic| + data = YAML.load_file(file_path) + + key_name = data['table_name'] || data['view_name'] + + dic[key_name] = data['gitlab_schema'].to_sym + end + end end end end diff --git a/lib/gitlab/database/indexing_exclusive_lease_guard.rb b/lib/gitlab/database/indexing_exclusive_lease_guard.rb new file mode 100644 index 00000000000..fb45de347e6 --- /dev/null +++ b/lib/gitlab/database/indexing_exclusive_lease_guard.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module IndexingExclusiveLeaseGuard + extend ActiveSupport::Concern + include ExclusiveLeaseGuard + + def lease_key + @lease_key ||= "gitlab/database/indexing/actions/#{database_config_name}" + end + + def database_config_name + Gitlab::Database.db_config_name(connection) + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/resolver.rb b/lib/gitlab/database/load_balancing/resolver.rb index a291080cc3d..3e3998cae92 100644 --- a/lib/gitlab/database/load_balancing/resolver.rb +++ b/lib/gitlab/database/load_balancing/resolver.rb @@ -7,8 +7,21 @@ module Gitlab module Database module LoadBalancing class Resolver + FAR_FUTURE_TTL = 100.years.from_now + UnresolvableNameserverError = Class.new(StandardError) + Response = Class.new do + attr_reader :address, :ttl + + def initialize(address:, ttl:) + raise ArgumentError unless ttl.present? && address.present? + + @address = address + @ttl = ttl + end + end + def initialize(nameserver) @nameserver = nameserver end @@ -28,13 +41,14 @@ module Gitlab private def ip_address - IPAddr.new(@nameserver) + # IP addresses are valid forever + Response.new(address: IPAddr.new(@nameserver), ttl: FAR_FUTURE_TTL) rescue IPAddr::InvalidAddressError end def ip_address_from_hosts_file ip = Resolv::Hosts.new.getaddress(@nameserver) - IPAddr.new(ip) + Response.new(address: IPAddr.new(ip), ttl: FAR_FUTURE_TTL) rescue Resolv::ResolvError end @@ -42,7 +56,12 @@ module Gitlab answer = Net::DNS::Resolver.start(@nameserver, Net::DNS::A).answer return if answer.empty? - answer.first.address + raw_response = answer.first + + # Defaults to 30 seconds if there is no TTL present + ttl_in_seconds = raw_response.ttl.presence || 30 + + Response.new(address: answer.first.address, ttl: ttl_in_seconds.seconds.from_now) rescue Net::DNS::Resolver::NoResponseError raise UnresolvableNameserverError, "no response from DNS server(s)" end diff --git a/lib/gitlab/database/load_balancing/service_discovery.rb b/lib/gitlab/database/load_balancing/service_discovery.rb index 3295301a2d7..5059b3b5c93 100644 --- a/lib/gitlab/database/load_balancing/service_discovery.rb +++ b/lib/gitlab/database/load_balancing/service_discovery.rb @@ -69,6 +69,7 @@ module Gitlab @use_tcp = use_tcp @load_balancer = load_balancer @max_replica_pools = max_replica_pools + @nameserver_ttl = 1.second.ago # Begin with an expired ttl to trigger a nameserver dns lookup end # rubocop:enable Metrics/ParameterLists @@ -191,8 +192,14 @@ module Gitlab end def resolver - @resolver ||= Net::DNS::Resolver.new( - nameservers: Resolver.new(@nameserver).resolve, + return @resolver if defined?(@resolver) && @nameserver_ttl.future? + + response = Resolver.new(@nameserver).resolve + + @nameserver_ttl = response.ttl + + @resolver = Net::DNS::Resolver.new( + nameservers: response.address, port: @port, use_tcp: @use_tcp ) diff --git a/lib/gitlab/database/lock_writes_manager.rb b/lib/gitlab/database/lock_writes_manager.rb index e3ae2892668..2e08e1ffb42 100644 --- a/lib/gitlab/database/lock_writes_manager.rb +++ b/lib/gitlab/database/lock_writes_manager.rb @@ -22,37 +22,38 @@ module Gitlab end end - def initialize(table_name:, connection:, database_name:, logger: nil, dry_run: false) + def initialize(table_name:, connection:, database_name:, with_retries: true, logger: nil, dry_run: false) @table_name = table_name @connection = connection @database_name = database_name @logger = logger @dry_run = dry_run + @with_retries = with_retries @table_name_without_schema = ActiveRecord::ConnectionAdapters::PostgreSQL::Utils .extract_schema_qualified_name(table_name) .identifier end - def table_locked_for_writes?(table_name) + def table_locked_for_writes? query = <<~SQL SELECT COUNT(*) from information_schema.triggers WHERE event_object_table = '#{table_name_without_schema}' - AND trigger_name = '#{write_trigger_name(table_name)}' + AND trigger_name = '#{write_trigger_name}' SQL connection.select_value(query) == EXPECTED_TRIGGER_RECORD_COUNT end def lock_writes - if table_locked_for_writes?(table_name) + if table_locked_for_writes? logger&.info "Skipping lock_writes, because #{table_name} is already locked for writes" return end logger&.info "Database: '#{database_name}', Table: '#{table_name}': Lock Writes".color(:yellow) sql_statement = <<~SQL - CREATE TRIGGER #{write_trigger_name(table_name)} + CREATE TRIGGER #{write_trigger_name} BEFORE INSERT OR UPDATE OR DELETE OR TRUNCATE ON #{table_name} FOR EACH STATEMENT EXECUTE FUNCTION #{TRIGGER_FUNCTION_NAME}(); @@ -64,7 +65,7 @@ module Gitlab def unlock_writes logger&.info "Database: '#{database_name}', Table: '#{table_name}': Allow Writes".color(:green) sql_statement = <<~SQL - DROP TRIGGER IF EXISTS #{write_trigger_name(table_name)} ON #{table_name}; + DROP TRIGGER IF EXISTS #{write_trigger_name} ON #{table_name}; SQL execute_sql_statement(sql_statement) @@ -72,19 +73,23 @@ module Gitlab private - attr_reader :table_name, :connection, :database_name, :logger, :dry_run, :table_name_without_schema + attr_reader :table_name, :connection, :database_name, :logger, :dry_run, :table_name_without_schema, :with_retries def execute_sql_statement(sql) if dry_run logger&.info sql - else - with_retries(connection) do + elsif with_retries + raise "Cannot call lock_retries_helper if a transaction is already open" if connection.transaction_open? + + run_with_retries(connection) do connection.execute(sql) end + else + connection.execute(sql) end end - def with_retries(connection, &block) + def run_with_retries(connection, &block) with_statement_timeout_retries do with_lock_retries(connection) do yield @@ -110,11 +115,12 @@ module Gitlab Gitlab::Database::WithLockRetries.new( klass: "gitlab:db:lock_writes", logger: logger || Gitlab::AppLogger, - connection: connection + connection: connection, + allow_savepoints: false # this causes the WithLockRetries to fail if sub-transaction has been detected. ).run(&block) end - def write_trigger_name(table_name) + def write_trigger_name "gitlab_schema_write_trigger_for_#{table_name_without_schema}" end end diff --git a/lib/gitlab/database/loose_foreign_keys.rb b/lib/gitlab/database/loose_foreign_keys.rb index 1338b18a099..6512c672965 100644 --- a/lib/gitlab/database/loose_foreign_keys.rb +++ b/lib/gitlab/database/loose_foreign_keys.rb @@ -22,7 +22,7 @@ module Gitlab { column: config.fetch('column'), on_delete: config.fetch('on_delete').to_sym, - gitlab_schema: GitlabSchema.table_schema(child_table_name) + gitlab_schema: GitlabSchema.table_schema!(child_table_name) } ) end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 4858a96c173..e41107370ec 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -281,6 +281,9 @@ module Gitlab # target_column - The name of the referenced column, defaults to "id". # on_delete - The action to perform when associated data is removed, # defaults to "CASCADE". + # on_update - The action to perform when associated data is updated, + # defaults to nil. This is useful for multi column FKs if + # it's desirable to update one of the columns. # name - The name of the foreign key. # validate - Flag that controls whether the new foreign key will be validated after creation. # If the flag is not set, the constraint will only be enforced for new data. @@ -288,7 +291,8 @@ module Gitlab # order of the ALTER TABLE. This can be useful in situations where the foreign # key creation could deadlock with another process. # - def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_column: :id, name: nil, validate: true, reverse_lock_order: false) + # rubocop: disable Metrics/ParameterLists + def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, on_update: nil, target_column: :id, name: nil, validate: true, reverse_lock_order: false) # Transactions would result in ALTER TABLE locks being held for the # duration of the transaction, defeating the purpose of this method. if transaction_open? @@ -298,6 +302,7 @@ module Gitlab options = { column: column, on_delete: on_delete, + on_update: on_update, name: name.presence || concurrent_foreign_key_name(source, column), primary_key: target_column } @@ -306,7 +311,8 @@ module Gitlab warning_message = "Foreign key not created because it exists already " \ "(this may be due to an aborted migration or similar): " \ "source: #{source}, target: #{target}, column: #{options[:column]}, "\ - "name: #{options[:name]}, on_delete: #{options[:on_delete]}" + "name: #{options[:name]}, on_update: #{options[:on_update]}, "\ + "on_delete: #{options[:on_delete]}" Gitlab::AppLogger.warn warning_message else @@ -322,6 +328,7 @@ module Gitlab ADD CONSTRAINT #{options[:name]} FOREIGN KEY (#{multiple_columns(options[:column])}) REFERENCES #{target} (#{multiple_columns(target_column)}) + #{on_update_statement(options[:on_update])} #{on_delete_statement(options[:on_delete])} NOT VALID; EOF @@ -343,6 +350,7 @@ module Gitlab end end end + # rubocop: enable Metrics/ParameterLists def validate_foreign_key(source, column, name: nil) fk_name = name || concurrent_foreign_key_name(source, column) @@ -357,10 +365,28 @@ module Gitlab end def foreign_key_exists?(source, target = nil, **options) - foreign_keys(source).any? do |foreign_key| - tables_match?(target.to_s, foreign_key.to_table.to_s) && - options_match?(foreign_key.options, options) + # This if block is necessary because foreign_key_exists? is called in down migrations that may execute before + # the postgres_foreign_keys view had necessary columns added, or even before the view existed. + # In that case, we revert to the previous behavior of this method. + # The behavior in the if block has a bug: it always returns false if the fk being checked has multiple columns. + # This can be removed after init_schema.rb passes 20221122210711_add_columns_to_postgres_foreign_keys.rb + # Tracking issue: https://gitlab.com/gitlab-org/gitlab/-/issues/386796 + if ActiveRecord::Migrator.current_version < 20221122210711 + return foreign_keys(source).any? do |foreign_key| + tables_match?(target.to_s, foreign_key.to_table.to_s) && + options_match?(foreign_key.options, options) + end end + + fks = Gitlab::Database::PostgresForeignKey.by_constrained_table_name(source) + + fks = fks.by_referenced_table_name(target) if target + fks = fks.by_name(options[:name]) if options[:name] + fks = fks.by_constrained_columns(options[:column]) if options[:column] + fks = fks.by_referenced_columns(options[:primary_key]) if options[:primary_key] + fks = fks.by_on_delete_action(options[:on_delete]) if options[:on_delete] + + fks.exists? end # Returns the name for a concurrent foreign key. @@ -1278,6 +1304,13 @@ into similar problems in the future (e.g. when new tables are created). "ON DELETE #{on_delete.upcase}" end + def on_update_statement(on_update) + return '' if on_update.blank? + return 'ON UPDATE SET NULL' if on_update == :nullify + + "ON UPDATE #{on_update.upcase}" + end + def create_column_from(table, old, new, type: nil, batch_column_name: :id, type_cast_function: nil, limit: nil) old_col = column_for(table, old) new_type = type || old_col.type diff --git a/lib/gitlab/database/migration_helpers/automatic_lock_writes_on_tables.rb b/lib/gitlab/database/migration_helpers/automatic_lock_writes_on_tables.rb index 0aa4b0d01c4..c59139344ea 100644 --- a/lib/gitlab/database/migration_helpers/automatic_lock_writes_on_tables.rb +++ b/lib/gitlab/database/migration_helpers/automatic_lock_writes_on_tables.rb @@ -42,7 +42,7 @@ module Gitlab def should_lock_writes_on_table?(table_name) # currently gitlab_schema represents only present existing tables, this is workaround for deleted tables # that should be skipped as they will be removed in a future migration. - return false if Gitlab::Database::GitlabSchema::DELETED_TABLES[table_name] + return false if Gitlab::Database::GitlabSchema.deleted_tables_to_schema[table_name] table_schema = Gitlab::Database::GitlabSchema.table_schema(table_name.to_s, undefined: false) @@ -60,12 +60,15 @@ module Gitlab Gitlab::Database.gitlab_schemas_for_connection(connection).exclude?(table_schema) end + # with_retries creates new a transaction. So we set it to false if the connection is + # already has an open transaction, to avoid sub-transactions. def lock_writes_on_table(connection, table_name) database_name = Gitlab::Database.db_config_name(connection) LockWritesManager.new( table_name: table_name, connection: connection, database_name: database_name, + with_retries: !connection.transaction_open?, logger: Logger.new($stdout) ).lock_writes end diff --git a/lib/gitlab/database/migrations/base_background_runner.rb b/lib/gitlab/database/migrations/base_background_runner.rb index dbb85bad95c..8975c04e33a 100644 --- a/lib/gitlab/database/migrations/base_background_runner.rb +++ b/lib/gitlab/database/migrations/base_background_runner.rb @@ -44,13 +44,20 @@ module Gitlab jobs.each do |j| break if run_until <= Time.current + meta = migration_meta(j) + instrumentation.observe(version: nil, name: batch_names.next, - connection: connection) do + connection: connection, + meta: meta) do run_job(j) end end end + + def migration_meta(_job) + {} + end end end end diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb index 7c21346007a..8c479d7eda2 100644 --- a/lib/gitlab/database/migrations/instrumentation.rb +++ b/lib/gitlab/database/migrations/instrumentation.rb @@ -11,8 +11,8 @@ module Gitlab @result_dir = result_dir end - def observe(version:, name:, connection:, &block) - observation = Observation.new(version: version, name: name, success: false) + def observe(version:, name:, connection:, meta: {}, &block) + observation = Observation.new(version: version, name: name, success: false, meta: meta) per_migration_result_dir = File.join(@result_dir, name) diff --git a/lib/gitlab/database/migrations/observation.rb b/lib/gitlab/database/migrations/observation.rb index 228eea3393c..80388c4dbbb 100644 --- a/lib/gitlab/database/migrations/observation.rb +++ b/lib/gitlab/database/migrations/observation.rb @@ -10,6 +10,7 @@ module Gitlab :walltime, :success, :total_database_size_change, + :meta, :query_statistics, keyword_init: true ) diff --git a/lib/gitlab/database/migrations/test_batched_background_runner.rb b/lib/gitlab/database/migrations/test_batched_background_runner.rb index a16103f452c..c123d01f327 100644 --- a/lib/gitlab/database/migrations/test_batched_background_runner.rb +++ b/lib/gitlab/database/migrations/test_batched_background_runner.rb @@ -13,7 +13,7 @@ module Gitlab end def jobs_by_migration_name - Gitlab::Database::SharedModel.using_connection(connection) do + set_shared_model_connection do Gitlab::Database::BackgroundMigration::BatchedMigration .executable .where('id > ?', from_id) @@ -70,7 +70,7 @@ module Gitlab end def run_job(job) - Gitlab::Database::SharedModel.using_connection(connection) do + set_shared_model_connection do Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper.new(connection: connection).perform(job) end end @@ -107,6 +107,16 @@ module Gitlab private attr_reader :from_id + + def set_shared_model_connection(&block) + Gitlab::Database::SharedModel.using_connection(connection, &block) + end + + def migration_meta(job) + set_shared_model_connection do + job.batched_migration.slice(:max_batch_size, :total_tuple_count, :interval) + end + end end end end diff --git a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb index bd8ed677d77..8849191f356 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb @@ -5,6 +5,7 @@ module Gitlab module PartitioningMigrationHelpers module ForeignKeyHelpers include ::Gitlab::Database::SchemaHelpers + include ::Gitlab::Database::Migrations::LockRetriesHelpers ERROR_SCOPE = 'foreign keys' diff --git a/lib/gitlab/database/postgres_foreign_key.rb b/lib/gitlab/database/postgres_foreign_key.rb index 241b6f009f7..d3ede45fe86 100644 --- a/lib/gitlab/database/postgres_foreign_key.rb +++ b/lib/gitlab/database/postgres_foreign_key.rb @@ -5,17 +5,44 @@ module Gitlab class PostgresForeignKey < SharedModel self.primary_key = :oid + # These values come from the possible confdeltype values in pg_constraint + enum on_delete_action: { + restrict: 'r', + cascade: 'c', + nullify: 'n', + set_default: 'd', + no_action: 'a' + } + scope :by_referenced_table_identifier, ->(identifier) do raise ArgumentError, "Referenced table name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/ where(referenced_table_identifier: identifier) end + scope :by_referenced_table_name, ->(name) { where(referenced_table_name: name) } + scope :by_constrained_table_identifier, ->(identifier) do raise ArgumentError, "Constrained table name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/ where(constrained_table_identifier: identifier) end + + scope :by_constrained_table_name, ->(name) { where(constrained_table_name: name) } + + scope :not_inherited, -> { where(is_inherited: false) } + + scope :by_name, ->(name) { where(name: name) } + + scope :by_constrained_columns, ->(cols) { where(constrained_columns: Array.wrap(cols)) } + + scope :by_referenced_columns, ->(cols) { where(referenced_columns: Array.wrap(cols)) } + + scope :by_on_delete_action, ->(on_delete) do + raise ArgumentError, "Invalid on_delete action #{on_delete}" unless on_delete_actions.key?(on_delete) + + where(on_delete_action: on_delete) + end end end end diff --git a/lib/gitlab/database/postgres_partition.rb b/lib/gitlab/database/postgres_partition.rb index eda11fd8382..e4f70ee1745 100644 --- a/lib/gitlab/database/postgres_partition.rb +++ b/lib/gitlab/database/postgres_partition.rb @@ -17,7 +17,9 @@ module Gitlab for_identifier(identifier).first! end - scope :for_parent_table, ->(name) { where("parent_identifier = concat(current_schema(), '.', ?)", name).order(:name) } + scope :for_parent_table, ->(name) do + where("parent_identifier = concat(current_schema(), '.', ?)", name).order(:name) + end def self.partition_exists?(table_name) where("identifier = concat(current_schema(), '.', ?)", table_name).exists? diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb index 1280789b30c..6f64d04270f 100644 --- a/lib/gitlab/database/query_analyzer.rb +++ b/lib/gitlab/database/query_analyzer.rb @@ -86,11 +86,7 @@ module Gitlab analyzers.each do |analyzer| next if analyzer.suppressed? && !analyzer.requires_tracking?(parsed) - if analyzer.raw? - analyzer.analyze(sql) - else - analyzer.analyze(parsed) - end + analyzer.analyze(parsed) rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e # We catch all standard errors to prevent validation errors to introduce fatal errors in production Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) diff --git a/lib/gitlab/database/query_analyzers/base.rb b/lib/gitlab/database/query_analyzers/base.rb index 9c2c228f869..9a52a4f6e23 100644 --- a/lib/gitlab/database/query_analyzers/base.rb +++ b/lib/gitlab/database/query_analyzers/base.rb @@ -53,10 +53,6 @@ module Gitlab Thread.current[self.context_key] end - def self.raw? - false - end - def self.enabled? raise NotImplementedError end diff --git a/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection.rb b/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection.rb index 3de9e8011fb..c966ae0e105 100644 --- a/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection.rb +++ b/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection.rb @@ -22,13 +22,16 @@ module Gitlab return unless allowed_schemas invalid_schemas = table_schemas - allowed_schemas - if invalid_schemas.any? - message = "The query tried to access #{tables} (of #{table_schemas.to_a}) " - message += "which is outside of allowed schemas (#{allowed_schemas}) " - message += "for the current connection '#{Gitlab::Database.db_config_name(parsed.connection)}'" - raise CrossSchemaAccessError, message - end + return if invalid_schemas.empty? + + schema_list = table_schemas.sort.join(',') + + message = "The query tried to access #{tables} (of #{schema_list}) " + message += "which is outside of allowed schemas (#{allowed_schemas}) " + message += "for the current connection '#{Gitlab::Database.db_config_name(parsed.connection)}'" + + raise CrossSchemaAccessError, message end end end diff --git a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb index dd10e0d7992..713e1f772e3 100644 --- a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb +++ b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb @@ -87,15 +87,15 @@ module Gitlab return if tables == ['schema_migrations'] context[:modified_tables_by_db][database].merge(tables) - all_tables = context[:modified_tables_by_db].values.map(&:to_a).flatten + all_tables = context[:modified_tables_by_db].values.flat_map(&:to_a) schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables) schemas += ApplicationRecord.gitlab_transactions_stack if schemas.many? message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \ - "a transaction modifying the '#{all_tables.to_a.join(", ")}' tables. " \ - "Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception." + "a transaction modifying the '#{all_tables.to_a.join(", ")}' tables. " \ + "Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception." if schemas.any? { |s| s.to_s.start_with?("undefined") } message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to lib/gitlab/database/gitlab_schemas.yml ." diff --git a/lib/gitlab/database/query_analyzers/query_recorder.rb b/lib/gitlab/database/query_analyzers/query_recorder.rb index b54f3442512..63b4fbb8c1d 100644 --- a/lib/gitlab/database/query_analyzers/query_recorder.rb +++ b/lib/gitlab/database/query_analyzers/query_recorder.rb @@ -5,21 +5,19 @@ module Gitlab module QueryAnalyzers class QueryRecorder < Base LOG_PATH = 'query_recorder/' + LIST_PARAMETER_REGEX = %r{\$\d+(?:\s*,\s*\$\d+)+}.freeze + SINGLE_PARAMETER_REGEX = %r{\$\d+}.freeze class << self - def raw? - true - end - def enabled? # Only enable QueryRecorder in CI on database MRs or default branch ENV['CI_MERGE_REQUEST_LABELS']&.include?('database') || (ENV['CI_COMMIT_REF_NAME'].present? && ENV['CI_COMMIT_REF_NAME'] == ENV['CI_DEFAULT_BRANCH']) end - def analyze(sql) + def analyze(parsed) payload = { - sql: sql + normalized: normalize_query(parsed.sql) } log_query(payload) @@ -42,6 +40,12 @@ module Gitlab File.write(log_file, log_line, mode: 'a') end + + def normalize_query(query) + query + .gsub(LIST_PARAMETER_REGEX, '?,?,?') # Replace list parameters with ?,?,? + .gsub(SINGLE_PARAMETER_REGEX, '?') # Replace single parameters with ? + end end end end diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb index b4f7da999df..eca118a4ff2 100644 --- a/lib/gitlab/database/reindexing/coordinator.rb +++ b/lib/gitlab/database/reindexing/coordinator.rb @@ -4,7 +4,7 @@ module Gitlab module Database module Reindexing class Coordinator - include ExclusiveLeaseGuard + include IndexingExclusiveLeaseGuard # Maximum lease time for the global Redis lease # This should be higher than the maximum time for any @@ -20,6 +20,8 @@ module Gitlab end def perform + return if too_late_for_reindexing? + # This obtains a global lease such that there's # only one live reindexing process at a time. try_obtain_lease do @@ -32,26 +34,28 @@ module Gitlab end def drop + return if too_late_for_reindexing? + try_obtain_lease do Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity") retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( - connection: index.connection, + connection: connection, timing_configuration: REMOVE_INDEX_RETRY_CONFIG, klass: self.class, logger: Gitlab::AppLogger ) retries.run(raise_on_exhaustion: false) do - index.connection.tap do |conn| - conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}") - end + connection.execute("DROP INDEX CONCURRENTLY IF EXISTS #{full_index_name}") end end end private + delegate :connection, to: :index + def with_notifications(action) notifier.notify_start(action) yield @@ -73,8 +77,18 @@ module Gitlab TIMEOUT_PER_ACTION end - def lease_key - [super, index.connection_db_config.name].join('/') + def full_index_name + [ + connection.quote_table_name(index.schema), + connection.quote_table_name(index.name) + ].join('.') + end + + # We need to check the time explicitly because we execute 4 reindexing + # action per rake invocation and one action can take up to 24 hours. + # This means that it can span for more than the weekend. + def too_late_for_reindexing? + !Time.current.on_weekend? end end end diff --git a/lib/gitlab/database/reindexing/grafana_notifier.rb b/lib/gitlab/database/reindexing/grafana_notifier.rb index ece9327b658..e43eddbefc0 100644 --- a/lib/gitlab/database/reindexing/grafana_notifier.rb +++ b/lib/gitlab/database/reindexing/grafana_notifier.rb @@ -60,7 +60,9 @@ module Gitlab "Authorization": "Bearer #{@api_key}" } - success = Gitlab::HTTP.post("#{@api_url}/api/annotations", body: payload.to_json, headers: headers, allow_local_requests: true).success? + success = Gitlab::HTTP.post( + "#{@api_url}/api/annotations", body: payload.to_json, headers: headers, allow_local_requests: true + ).success? log_error("Response code #{response.code}") unless success diff --git a/lib/gitlab/database/reindexing/index_selection.rb b/lib/gitlab/database/reindexing/index_selection.rb index 2d384f2f9e2..ebe245bfadb 100644 --- a/lib/gitlab/database/reindexing/index_selection.rb +++ b/lib/gitlab/database/reindexing/index_selection.rb @@ -12,6 +12,10 @@ module Gitlab # Only consider indexes beyond this size (before reindexing) INDEX_SIZE_MINIMUM = 1.gigabyte + VERY_LARGE_TABLES = %i[ + ci_builds + ].freeze + delegate :each, to: :indexes def initialize(candidates) @@ -30,13 +34,24 @@ module Gitlab # we force a N+1 pattern here and estimate bloat on a per-index # basis. - @indexes ||= candidates - .not_recently_reindexed - .where('ondisk_size_bytes >= ?', INDEX_SIZE_MINIMUM) + @indexes ||= relations_that_need_cleaning_before_deadline .sort_by(&:relative_bloat_level) # forced N+1 .reverse .select { |candidate| candidate.relative_bloat_level >= MINIMUM_RELATIVE_BLOAT } end + + def relations_that_need_cleaning_before_deadline + relation = candidates.not_recently_reindexed.where('ondisk_size_bytes >= ?', INDEX_SIZE_MINIMUM) + relation = relation.where.not(tablename: VERY_LARGE_TABLES) if too_late_for_very_large_table? + relation + end + + # The reindexing process takes place during the weekends and starting a + # reindexing action on a large table late on Sunday could span during + # Monday. We don't want this because it prevents vacuum from running. + def too_late_for_very_large_table? + !Date.today.saturday? + end end end end diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb index 2c7ca28942e..d81ff4ff1ae 100644 --- a/lib/gitlab/database/schema_helpers.rb +++ b/lib/gitlab/database/schema_helpers.rb @@ -71,19 +71,6 @@ module Gitlab "#{type}_#{hashed_identifier}" end - def with_lock_retries(*args, **kwargs, &block) - raise_on_exhaustion = !!kwargs.delete(:raise_on_exhaustion) - merged_args = { - connection: connection, - klass: self.class, - logger: Gitlab::BackgroundMigration::Logger, - allow_savepoints: true - }.merge(kwargs) - - Gitlab::Database::WithLockRetries.new(**merged_args) - .run(raise_on_exhaustion: raise_on_exhaustion, &block) - end - def assert_not_in_transaction_block(scope:) return unless transaction_open? diff --git a/lib/gitlab/database/tables_truncate.rb b/lib/gitlab/database/tables_truncate.rb index 807ecdb862a..daef0402742 100644 --- a/lib/gitlab/database/tables_truncate.rb +++ b/lib/gitlab/database/tables_truncate.rb @@ -40,11 +40,12 @@ module Gitlab table_name: table_name, connection: connection, database_name: database_name, + with_retries: true, logger: logger, dry_run: dry_run ) - unless lock_writes_manager.table_locked_for_writes?(table_name) + unless lock_writes_manager.table_locked_for_writes? raise "Table '#{table_name}' is not locked for writes. Run the rake task gitlab:db:lock_writes first" end end @@ -81,6 +82,22 @@ module Gitlab sql_statement = "SELECT set_config('lock_writes.#{table_name_without_schema}', 'false', false)" logger&.info(sql_statement) connection.execute(sql_statement) unless dry_run + + # Temporarily unlocking writes on the attached partitions of the table. + # Because in some cases they might have been locked for writes as well, when they used to be + # normal tables before being converted into attached partitions. + Gitlab::Database::SharedModel.using_connection(connection) do + table_partitions = Gitlab::Database::PostgresPartition.for_parent_table(table_name_without_schema) + table_partitions.each do |table_partition| + partition_name_without_schema = ActiveRecord::ConnectionAdapters::PostgreSQL::Utils + .extract_schema_qualified_name(table_partition.identifier) + .identifier + + sql_statement = "SELECT set_config('lock_writes.#{partition_name_without_schema}', 'false', false)" + logger&.info(sql_statement) + connection.execute(sql_statement) unless dry_run + end + end end # We do the truncation in stages to avoid high IO |