diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2022-11-17 14:33:21 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2022-11-17 14:33:21 +0300 |
commit | 7021455bd1ed7b125c55eb1b33c5a01f2bc55ee0 (patch) | |
tree | 5bdc2229f5198d516781f8d24eace62fc7e589e9 /lib/gitlab/database | |
parent | 185b095e93520f96e9cfc31d9c3e69b498cdab7c (diff) |
Add latest changes from gitlab-org/gitlab@15-6-stable-eev15.6.0-rc42
Diffstat (limited to 'lib/gitlab/database')
29 files changed, 1103 insertions, 552 deletions
diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb index 81898a59da7..6b7ff308c7e 100644 --- a/lib/gitlab/database/background_migration/batched_job.rb +++ b/lib/gitlab/database/background_migration/batched_job.rb @@ -14,7 +14,8 @@ module Gitlab MAX_ATTEMPTS = 3 STUCK_JOBS_TIMEOUT = 1.hour.freeze TIMEOUT_EXCEPTIONS = [ActiveRecord::StatementTimeout, ActiveRecord::ConnectionTimeoutError, - ActiveRecord::AdapterTimeout, ActiveRecord::LockWaitTimeout].freeze + ActiveRecord::AdapterTimeout, ActiveRecord::LockWaitTimeout, + ActiveRecord::QueryCanceled].freeze belongs_to :batched_migration, foreign_key: :batched_background_migration_id has_many :batched_job_transition_logs, foreign_key: :batched_background_migration_job_id @@ -112,7 +113,10 @@ module Gitlab end def can_split?(exception) - attempts >= MAX_ATTEMPTS && TIMEOUT_EXCEPTIONS.include?(exception&.class) && batch_size > sub_batch_size && batch_size > 1 + attempts >= MAX_ATTEMPTS && + exception&.class&.in?(TIMEOUT_EXCEPTIONS) && + batch_size > sub_batch_size && + batch_size > 1 end def split_and_retry! diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb index 92cafd1d00e..61a660ad14c 100644 --- a/lib/gitlab/database/background_migration/batched_migration.rb +++ b/lib/gitlab/database/background_migration/batched_migration.rb @@ -94,8 +94,21 @@ module Gitlab end def self.active_migration(connection:) + active_migrations_distinct_on_table(connection: connection, limit: 1).first + end + + def self.find_executable(id, connection:) for_gitlab_schema(Gitlab::Database.gitlab_schemas_for_connection(connection)) - .executable.queue_order.first + .executable.find_by_id(id) + end + + def self.active_migrations_distinct_on_table(connection:, limit:) + distinct_on_table = select('DISTINCT ON (table_name) id') + .for_gitlab_schema(Gitlab::Database.gitlab_schemas_for_connection(connection)) + .executable + .order(table_name: :asc, id: :asc) + + where(id: distinct_on_table).queue_order.limit(limit) end def self.successful_rows_counts(migrations) diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml index c4a9cf8b80f..bf6ebb21f7d 100644 --- a/lib/gitlab/database/gitlab_schemas.yml +++ b/lib/gitlab/database/gitlab_schemas.yml @@ -40,6 +40,7 @@ atlassian_identities: :gitlab_main audit_events_external_audit_event_destinations: :gitlab_main audit_events: :gitlab_main audit_events_streaming_headers: :gitlab_main +audit_events_streaming_event_type_filters: :gitlab_main authentication_events: :gitlab_main award_emoji: :gitlab_main aws_roles: :gitlab_main @@ -167,6 +168,7 @@ dast_site_profiles_pipelines: :gitlab_main dast_sites: :gitlab_main dast_site_tokens: :gitlab_main dast_site_validations: :gitlab_main +dependency_proxy_blob_states: :gitlab_main dependency_proxy_blobs: :gitlab_main dependency_proxy_group_settings: :gitlab_main dependency_proxy_image_ttl_group_policies: :gitlab_main @@ -206,7 +208,6 @@ events: :gitlab_main evidences: :gitlab_main experiments: :gitlab_main experiment_subjects: :gitlab_main -experiment_users: :gitlab_main external_approval_rules: :gitlab_main external_approval_rules_protected_branches: :gitlab_main external_pull_requests: :gitlab_ci @@ -342,6 +343,7 @@ namespace_limits: :gitlab_main namespace_package_settings: :gitlab_main namespace_root_storage_statistics: :gitlab_main namespace_ci_cd_settings: :gitlab_main +namespace_commit_emails: :gitlab_main namespace_settings: :gitlab_main namespace_details: :gitlab_main namespaces: :gitlab_main @@ -363,6 +365,7 @@ operations_scopes: :gitlab_main operations_strategies: :gitlab_main operations_strategies_user_lists: :gitlab_main operations_user_lists: :gitlab_main +p_ci_builds_metadata: :gitlab_ci packages_build_infos: :gitlab_main packages_cleanup_policies: :gitlab_main packages_composer_cache_files: :gitlab_main @@ -451,6 +454,7 @@ projects: :gitlab_main projects_sync_events: :gitlab_main project_statistics: :gitlab_main project_topics: :gitlab_main +project_wiki_repositories: :gitlab_main project_wiki_repository_states: :gitlab_main prometheus_alert_events: :gitlab_main prometheus_alerts: :gitlab_main diff --git a/lib/gitlab/database/load_balancing/configuration.rb b/lib/gitlab/database/load_balancing/configuration.rb index 59b08fac7e9..50472bd5780 100644 --- a/lib/gitlab/database/load_balancing/configuration.rb +++ b/lib/gitlab/database/load_balancing/configuration.rb @@ -57,7 +57,8 @@ module Gitlab record_type: 'A', interval: 60, disconnect_timeout: 120, - use_tcp: false + use_tcp: false, + max_replica_pools: nil } end diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb index 0881025b425..cb3a378ad64 100644 --- a/lib/gitlab/database/load_balancing/load_balancer.rb +++ b/lib/gitlab/database/load_balancing/load_balancer.rb @@ -119,6 +119,13 @@ module Gitlab connection = pool.connection transaction_open = connection.transaction_open? + if attempt && attempt > 1 + ::Gitlab::Database::LoadBalancing::Logger.warn( + event: :read_write_retry, + message: 'A read_write block was retried because of connection error' + ) + end + yield connection rescue StandardError => e # No leaking will happen on the final attempt. Leaks are caused by subsequent retries diff --git a/lib/gitlab/database/load_balancing/service_discovery.rb b/lib/gitlab/database/load_balancing/service_discovery.rb index dfd4892371c..52a9e8798d4 100644 --- a/lib/gitlab/database/load_balancing/service_discovery.rb +++ b/lib/gitlab/database/load_balancing/service_discovery.rb @@ -48,6 +48,7 @@ module Gitlab # forcefully disconnected. # use_tcp - Use TCP instaed of UDP to look up resources # load_balancer - The load balancer instance to use + # rubocop:disable Metrics/ParameterLists def initialize( load_balancer, nameserver:, @@ -56,7 +57,8 @@ module Gitlab record_type: 'A', interval: 60, disconnect_timeout: 120, - use_tcp: false + use_tcp: false, + max_replica_pools: nil ) @nameserver = nameserver @port = port @@ -66,7 +68,9 @@ module Gitlab @disconnect_timeout = disconnect_timeout @use_tcp = use_tcp @load_balancer = load_balancer + @max_replica_pools = max_replica_pools end + # rubocop:enable Metrics/ParameterLists def start Thread.new do @@ -170,6 +174,8 @@ module Gitlab addresses_from_srv_record(response) end + addresses = sampler.sample(addresses) + raise EmptyDnsResponse if addresses.empty? # Addresses are sorted so we can directly compare the old and new @@ -221,6 +227,11 @@ module Gitlab def addresses_from_a_record(resources) resources.map { |r| Address.new(r.address.to_s) } end + + def sampler + @sampler ||= ::Gitlab::Database::LoadBalancing::ServiceDiscovery::Sampler + .new(max_replica_pools: @max_replica_pools) + end end end end diff --git a/lib/gitlab/database/load_balancing/service_discovery/sampler.rb b/lib/gitlab/database/load_balancing/service_discovery/sampler.rb new file mode 100644 index 00000000000..71870214156 --- /dev/null +++ b/lib/gitlab/database/load_balancing/service_discovery/sampler.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + class ServiceDiscovery + class Sampler + def initialize(max_replica_pools:, seed: Random.new_seed) + # seed must be set once and consistent + # for every invocation of #sample on + # the same instance of Sampler + @seed = seed + @max_replica_pools = max_replica_pools + end + + def sample(addresses) + return addresses if @max_replica_pools.nil? || addresses.count <= @max_replica_pools + + ::Gitlab::Database::LoadBalancing::Logger.info( + event: :host_list_limit_exceeded, + message: "Host list length exceeds max_replica_pools so random hosts will be chosen.", + max_replica_pools: @max_replica_pools, + total_host_list_length: addresses.count, + randomization_seed: @seed + ) + + # First sort them in case the ordering from DNS server changes + # then randomly order all addresses using consistent seed so + # this process always gives the same set for this instance of + # Sampler + addresses = addresses.sort + addresses = addresses.shuffle(random: Random.new(@seed)) + + # Group by hostname so that we can sample evenly across hosts + addresses_by_host = addresses.group_by(&:hostname) + + selected_addresses = [] + while selected_addresses.count < @max_replica_pools + # Loop over all hostnames grabbing one address at a time to + # evenly distribute across all hostnames + addresses_by_host.each do |host, addresses| + next if addresses.empty? + + selected_addresses << addresses.pop + + break unless selected_addresses.count < @max_replica_pools + end + end + + selected_addresses + end + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb index 3180289ec69..737852d5ccb 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb @@ -4,7 +4,7 @@ module Gitlab module Database module LoadBalancing class SidekiqServerMiddleware - JobReplicaNotUpToDate = Class.new(StandardError) + JobReplicaNotUpToDate = Class.new(::Gitlab::SidekiqMiddleware::RetryError) MINIMUM_DELAY_INTERVAL_SECONDS = 0.8 diff --git a/lib/gitlab/database/lock_writes_manager.rb b/lib/gitlab/database/lock_writes_manager.rb index fe75cd763b4..2594ee04b35 100644 --- a/lib/gitlab/database/lock_writes_manager.rb +++ b/lib/gitlab/database/lock_writes_manager.rb @@ -5,6 +5,11 @@ module Gitlab class LockWritesManager TRIGGER_FUNCTION_NAME = 'gitlab_schema_prevent_write' + # Triggers to block INSERT / UPDATE / DELETE + # Triggers on TRUNCATE are not added to the information_schema.triggers + # See https://www.postgresql.org/message-id/16934.1568989957%40sss.pgh.pa.us + EXPECTED_TRIGGER_RECORD_COUNT = 3 + def initialize(table_name:, connection:, database_name:, logger: nil, dry_run: false) @table_name = table_name @connection = connection @@ -20,7 +25,7 @@ module Gitlab AND trigger_name = '#{write_trigger_name(table_name)}' SQL - connection.select_value(query) == 3 + connection.select_value(query) == EXPECTED_TRIGGER_RECORD_COUNT end def lock_writes diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index df40e3b3868..16416dd2507 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -6,6 +6,10 @@ module Gitlab include Migrations::ReestablishedConnectionStack include Migrations::BackgroundMigrationHelpers include Migrations::BatchedBackgroundMigrationHelpers + include Migrations::LockRetriesHelpers + include Migrations::TimeoutHelpers + include Migrations::ConstraintsHelpers + include Migrations::ExtensionHelpers include DynamicModelHelpers include RenameTableHelpers include AsyncIndexes::MigrationHelpers @@ -22,8 +26,6 @@ module Gitlab super(table_name, connection: connection, **kwargs) end - # https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS - MAX_IDENTIFIER_NAME_LENGTH = 63 DEFAULT_TIMESTAMP_COLUMNS = %i[created_at updated_at].freeze # Adds `created_at` and `updated_at` columns with timezone information. @@ -146,6 +148,12 @@ module Gitlab 'in the body of your migration class' end + if !options.delete(:allow_partition) && partition?(table_name) + raise ArgumentError, 'add_concurrent_index can not be used on a partitioned ' \ + 'table. Please use add_concurrent_partitioned_index on the partitioned table ' \ + 'as we need to create indexes on each partition and an index on the parent table' + end + options = options.merge({ algorithm: :concurrently }) if index_exists?(table_name, column_name, **options) @@ -202,6 +210,12 @@ module Gitlab 'in the body of your migration class' end + if partition?(table_name) + raise ArgumentError, 'remove_concurrent_index can not be used on a partitioned ' \ + 'table. Please use remove_concurrent_partitioned_index_by_name on the partitioned table ' \ + 'as we need to remove the index on the parent table' + end + options = options.merge({ algorithm: :concurrently }) unless index_exists?(table_name, column_name, **options) @@ -231,6 +245,12 @@ module Gitlab 'in the body of your migration class' end + if partition?(table_name) + raise ArgumentError, 'remove_concurrent_index_by_name can not be used on a partitioned ' \ + 'table. Please use remove_concurrent_partitioned_index_by_name on the partitioned table ' \ + 'as we need to remove the index on the parent table' + end + index_name = index_name[:name] if index_name.is_a?(Hash) raise 'remove_concurrent_index_by_name must get an index name as the second argument' if index_name.blank? @@ -360,97 +380,6 @@ module Gitlab "#{prefix}#{hashed_identifier}" end - # Long-running migrations may take more than the timeout allowed by - # the database. Disable the session's statement timeout to ensure - # migrations don't get killed prematurely. - # - # There are two possible ways to disable the statement timeout: - # - # - Per transaction (this is the preferred and default mode) - # - Per connection (requires a cleanup after the execution) - # - # When using a per connection disable statement, code must be inside - # a block so we can automatically execute `RESET statement_timeout` after block finishes - # otherwise the statement will still be disabled until connection is dropped - # or `RESET statement_timeout` is executed - def disable_statement_timeout - if block_given? - if statement_timeout_disabled? - # Don't do anything if the statement_timeout is already disabled - # Allows for nested calls of disable_statement_timeout without - # resetting the timeout too early (before the outer call ends) - yield - else - begin - execute('SET statement_timeout TO 0') - - yield - ensure - execute('RESET statement_timeout') - end - end - else - unless transaction_open? - raise <<~ERROR - Cannot call disable_statement_timeout() without a transaction open or outside of a transaction block. - If you don't want to use a transaction wrap your code in a block call: - - disable_statement_timeout { # code that requires disabled statement here } - - This will make sure statement_timeout is disabled before and reset after the block execution is finished. - ERROR - end - - execute('SET LOCAL statement_timeout TO 0') - end - end - - # Executes the block with a retry mechanism that alters the +lock_timeout+ and +sleep_time+ between attempts. - # The timings can be controlled via the +timing_configuration+ parameter. - # If the lock was not acquired within the retry period, a last attempt is made without using +lock_timeout+. - # - # Note this helper uses subtransactions when run inside an already open transaction. - # - # ==== Examples - # # Invoking without parameters - # with_lock_retries do - # drop_table :my_table - # end - # - # # Invoking with custom +timing_configuration+ - # t = [ - # [1.second, 1.second], - # [2.seconds, 2.seconds] - # ] - # - # with_lock_retries(timing_configuration: t) do - # drop_table :my_table # this will be retried twice - # end - # - # # Disabling the retries using an environment variable - # > export DISABLE_LOCK_RETRIES=true - # - # with_lock_retries do - # drop_table :my_table # one invocation, it will not retry at all - # end - # - # ==== Parameters - # * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the block, sleep time before the next iteration, defaults to `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION` - # * +logger+ - [Gitlab::JsonLogger] - # * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES` - def with_lock_retries(*args, **kwargs, &block) - raise_on_exhaustion = !!kwargs.delete(:raise_on_exhaustion) - merged_args = { - connection: connection, - klass: self.class, - logger: Gitlab::BackgroundMigration::Logger, - allow_savepoints: true - }.merge(kwargs) - - Gitlab::Database::WithLockRetries.new(**merged_args) - .run(raise_on_exhaustion: raise_on_exhaustion, &block) - end - def true_value Database.true_value end @@ -796,6 +725,10 @@ module Gitlab install_rename_triggers(table, old, new) end + def convert_to_type_column(column, from_type, to_type) + "#{column}_convert_#{from_type}_to_#{to_type}" + end + def convert_to_bigint_column(column) "#{column}_convert_to_bigint" end @@ -826,7 +759,22 @@ module Gitlab # columns - The name, or array of names, of the column(s) that we want to convert to bigint. # primary_key - The name of the primary key column (most often :id) def initialize_conversion_of_integer_to_bigint(table, columns, primary_key: :id) - create_temporary_columns_and_triggers(table, columns, primary_key: primary_key, data_type: :bigint) + mappings = Array(columns).map do |c| + { + c => { + from_type: :int, + to_type: :bigint, + default_value: 0 + } + } + end.reduce(&:merge) + + create_temporary_columns_and_triggers( + table, + mappings, + primary_key: primary_key, + old_bigint_column_naming: true + ) end # Reverts `initialize_conversion_of_integer_to_bigint` @@ -849,9 +797,23 @@ module Gitlab # table - The name of the database table containing the columns # columns - The name, or array of names, of the column(s) that we have converted to bigint. # primary_key - The name of the primary key column (most often :id) - def restore_conversion_of_integer_to_bigint(table, columns, primary_key: :id) - create_temporary_columns_and_triggers(table, columns, primary_key: primary_key, data_type: :int) + mappings = Array(columns).map do |c| + { + c => { + from_type: :bigint, + to_type: :int, + default_value: 0 + } + } + end.reduce(&:merge) + + create_temporary_columns_and_triggers( + table, + mappings, + primary_key: primary_key, + old_bigint_column_naming: true + ) end # Backfills the new columns used in an integer-to-bigint conversion using background migrations. @@ -947,43 +909,6 @@ module Gitlab execute("DELETE FROM batched_background_migrations WHERE #{conditions}") end - def ensure_batched_background_migration_is_finished(job_class_name:, table_name:, column_name:, job_arguments:, finalize: true) - Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode! - - Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information - migration = Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration( - Gitlab::Database.gitlab_schemas_for_connection(connection), - job_class_name, table_name, column_name, job_arguments - ) - - configuration = { - job_class_name: job_class_name, - table_name: table_name, - column_name: column_name, - job_arguments: job_arguments - } - - return Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}" if migration.nil? - - return if migration.finished? - - finalize_batched_background_migration(job_class_name: job_class_name, table_name: table_name, column_name: column_name, job_arguments: job_arguments) if finalize - - unless migration.reload.finished? # rubocop:disable Cop/ActiveRecordAssociationReload - raise "Expected batched background migration for the given configuration to be marked as 'finished', " \ - "but it is '#{migration.status_name}':" \ - "\t#{configuration}" \ - "\n\n" \ - "Finalize it manually by running the following command in a `bash` or `sh` shell:" \ - "\n\n" \ - "\tsudo gitlab-rake gitlab:background_migrations:finalize[#{job_class_name},#{table_name},#{column_name},'#{job_arguments.to_json.gsub(',', '\,')}']" \ - "\n\n" \ - "For more information, check the documentation" \ - "\n\n" \ - "\thttps://docs.gitlab.com/ee/user/admin_area/monitoring/background_migrations.html#database-migrations-failing-because-of-batched-background-migration-not-finished" - end - end - # Returns an Array containing the indexes for the given column def indexes_for(table, column) column = column.to_s @@ -1102,6 +1027,24 @@ module Gitlab rescue ArgumentError end + # Remove any instances of deprecated job classes lingering in queues. + # + # rubocop:disable Cop/SidekiqApiUsage + def sidekiq_remove_jobs(job_klass:) + Sidekiq::Queue.new(job_klass.queue).each do |job| + job.delete if job.klass == job_klass.to_s + end + + Sidekiq::RetrySet.new.each do |retri| + retri.delete if retri.klass == job_klass.to_s + end + + Sidekiq::ScheduledSet.new.each do |scheduled| + scheduled.delete if scheduled.klass == job_klass.to_s + end + end + # rubocop:enable Cop/SidekiqApiUsage + def sidekiq_queue_migrate(queue_from, to:) while sidekiq_queue_length(queue_from) > 0 Sidekiq.redis do |conn| @@ -1194,320 +1137,6 @@ into similar problems in the future (e.g. when new tables are created). execute(sql) end - # Returns the name for a check constraint - # - # type: - # - Any value, as long as it is unique - # - Constraint names are unique per table in Postgres, and, additionally, - # we can have multiple check constraints over a column - # So we use the (table, column, type) triplet as a unique name - # - e.g. we use 'max_length' when adding checks for text limits - # or 'not_null' when adding a NOT NULL constraint - # - def check_constraint_name(table, column, type) - identifier = "#{table}_#{column}_check_#{type}" - # Check concurrent_foreign_key_name() for info on why we use a hash - hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10) - - "check_#{hashed_identifier}" - end - - def check_constraint_exists?(table, constraint_name) - # Constraint names are unique per table in Postgres, not per schema - # Two tables can have constraints with the same name, so we filter by - # the table name in addition to using the constraint_name - check_sql = <<~SQL - SELECT COUNT(*) - FROM pg_catalog.pg_constraint con - INNER JOIN pg_catalog.pg_class rel - ON rel.oid = con.conrelid - INNER JOIN pg_catalog.pg_namespace nsp - ON nsp.oid = con.connamespace - WHERE con.contype = 'c' - AND con.conname = #{connection.quote(constraint_name)} - AND nsp.nspname = #{connection.quote(current_schema)} - AND rel.relname = #{connection.quote(table)} - SQL - - connection.select_value(check_sql) > 0 - end - - # Adds a check constraint to a table - # - # This method is the generic helper for adding any check constraint - # More specialized helpers may use it (e.g. add_text_limit or add_not_null) - # - # This method only requires minimal locking: - # - The constraint is added using NOT VALID - # This allows us to add the check constraint without validating it - # - The check will be enforced for new data (inserts) coming in - # - If `validate: true` the constraint is also validated - # Otherwise, validate_check_constraint() can be used at a later stage - # - Check comments on add_concurrent_foreign_key for more info - # - # table - The table the constraint will be added to - # check - The check clause to add - # e.g. 'char_length(name) <= 5' or 'store IS NOT NULL' - # constraint_name - The name of the check constraint (otherwise auto-generated) - # Should be unique per table (not per column) - # validate - Whether to validate the constraint in this call - # - def add_check_constraint(table, check, constraint_name, validate: true) - # Transactions would result in ALTER TABLE locks being held for the - # duration of the transaction, defeating the purpose of this method. - validate_not_in_transaction!(:add_check_constraint) - - validate_check_constraint_name!(constraint_name) - - if check_constraint_exists?(table, constraint_name) - warning_message = <<~MESSAGE - Check constraint was not created because it exists already - (this may be due to an aborted migration or similar) - table: #{table}, check: #{check}, constraint name: #{constraint_name} - MESSAGE - - Gitlab::AppLogger.warn warning_message - else - # Only add the constraint without validating it - # Even though it is fast, ADD CONSTRAINT requires an EXCLUSIVE lock - # Use with_lock_retries to make sure that this operation - # will not timeout on tables accessed by many processes - with_lock_retries do - execute <<-EOF.strip_heredoc - ALTER TABLE #{table} - ADD CONSTRAINT #{constraint_name} - CHECK ( #{check} ) - NOT VALID; - EOF - end - end - - if validate - validate_check_constraint(table, constraint_name) - end - end - - def validate_check_constraint(table, constraint_name) - validate_check_constraint_name!(constraint_name) - - unless check_constraint_exists?(table, constraint_name) - raise missing_schema_object_message(table, "check constraint", constraint_name) - end - - disable_statement_timeout do - # VALIDATE CONSTRAINT only requires a SHARE UPDATE EXCLUSIVE LOCK - # It only conflicts with other validations and creating indexes - execute("ALTER TABLE #{table} VALIDATE CONSTRAINT #{constraint_name};") - end - end - - def remove_check_constraint(table, constraint_name) - # This is technically not necessary, but aligned with add_check_constraint - # and allows us to continue use with_lock_retries here - validate_not_in_transaction!(:remove_check_constraint) - - validate_check_constraint_name!(constraint_name) - - # DROP CONSTRAINT requires an EXCLUSIVE lock - # Use with_lock_retries to make sure that this will not timeout - with_lock_retries do - execute <<-EOF.strip_heredoc - ALTER TABLE #{table} - DROP CONSTRAINT IF EXISTS #{constraint_name} - EOF - end - end - - # Copies all check constraints for the old column to the new column. - # - # table - The table containing the columns. - # old - The old column. - # new - The new column. - # schema - The schema the table is defined for - # If it is not provided, then the current_schema is used - def copy_check_constraints(table, old, new, schema: nil) - if transaction_open? - raise 'copy_check_constraints can not be run inside a transaction' - end - - unless column_exists?(table, old) - raise "Column #{old} does not exist on #{table}" - end - - unless column_exists?(table, new) - raise "Column #{new} does not exist on #{table}" - end - - table_with_schema = schema.present? ? "#{schema}.#{table}" : table - - check_constraints_for(table, old, schema: schema).each do |check_c| - validate = !(check_c["constraint_def"].end_with? "NOT VALID") - - # Normalize: - # - Old constraint definitions: - # '(char_length(entity_path) <= 5500)' - # - Definitionss from pg_get_constraintdef(oid): - # 'CHECK ((char_length(entity_path) <= 5500))' - # - Definitions from pg_get_constraintdef(oid, pretty_bool): - # 'CHECK (char_length(entity_path) <= 5500)' - # - Not valid constraints: 'CHECK (...) NOT VALID' - # to a single format that we can use: - # '(char_length(entity_path) <= 5500)' - check_definition = check_c["constraint_def"] - .sub(/^\s*(CHECK)?\s*\({0,2}/, '(') - .sub(/\){0,2}\s*(NOT VALID)?\s*$/, ')') - - constraint_name = begin - if check_definition == "(#{old} IS NOT NULL)" - not_null_constraint_name(table_with_schema, new) - elsif check_definition.start_with? "(char_length(#{old}) <=" - text_limit_name(table_with_schema, new) - else - check_constraint_name(table_with_schema, new, 'copy_check_constraint') - end - end - - add_check_constraint( - table_with_schema, - check_definition.gsub(old.to_s, new.to_s), - constraint_name, - validate: validate - ) - end - end - - # Migration Helpers for adding limit to text columns - def add_text_limit(table, column, limit, constraint_name: nil, validate: true) - add_check_constraint( - table, - "char_length(#{column}) <= #{limit}", - text_limit_name(table, column, name: constraint_name), - validate: validate - ) - end - - def validate_text_limit(table, column, constraint_name: nil) - validate_check_constraint(table, text_limit_name(table, column, name: constraint_name)) - end - - def remove_text_limit(table, column, constraint_name: nil) - remove_check_constraint(table, text_limit_name(table, column, name: constraint_name)) - end - - def check_text_limit_exists?(table, column, constraint_name: nil) - check_constraint_exists?(table, text_limit_name(table, column, name: constraint_name)) - end - - # Migration Helpers for managing not null constraints - def add_not_null_constraint(table, column, constraint_name: nil, validate: true) - if column_is_nullable?(table, column) - add_check_constraint( - table, - "#{column} IS NOT NULL", - not_null_constraint_name(table, column, name: constraint_name), - validate: validate - ) - else - warning_message = <<~MESSAGE - NOT NULL check constraint was not created: - column #{table}.#{column} is already defined as `NOT NULL` - MESSAGE - - Gitlab::AppLogger.warn warning_message - end - end - - def validate_not_null_constraint(table, column, constraint_name: nil) - validate_check_constraint( - table, - not_null_constraint_name(table, column, name: constraint_name) - ) - end - - def remove_not_null_constraint(table, column, constraint_name: nil) - remove_check_constraint( - table, - not_null_constraint_name(table, column, name: constraint_name) - ) - end - - def check_not_null_constraint_exists?(table, column, constraint_name: nil) - check_constraint_exists?( - table, - not_null_constraint_name(table, column, name: constraint_name) - ) - end - - def create_extension(extension) - execute('CREATE EXTENSION IF NOT EXISTS %s' % extension) - rescue ActiveRecord::StatementInvalid => e - dbname = ApplicationRecord.database.database_name - user = ApplicationRecord.database.username - - warn(<<~MSG) if e.to_s =~ /permission denied/ - GitLab requires the PostgreSQL extension '#{extension}' installed in database '#{dbname}', but - the database user is not allowed to install the extension. - - You can either install the extension manually using a database superuser: - - CREATE EXTENSION IF NOT EXISTS #{extension} - - Or, you can solve this by logging in to the GitLab - database (#{dbname}) using a superuser and running: - - ALTER #{user} WITH SUPERUSER - - This query will grant the user superuser permissions, ensuring any database extensions - can be installed through migrations. - - For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html. - MSG - - raise - end - - def drop_extension(extension) - execute('DROP EXTENSION IF EXISTS %s' % extension) - rescue ActiveRecord::StatementInvalid => e - dbname = ApplicationRecord.database.database_name - user = ApplicationRecord.database.username - - warn(<<~MSG) if e.to_s =~ /permission denied/ - This migration attempts to drop the PostgreSQL extension '#{extension}' - installed in database '#{dbname}', but the database user is not allowed - to drop the extension. - - You can either drop the extension manually using a database superuser: - - DROP EXTENSION IF EXISTS #{extension} - - Or, you can solve this by logging in to the GitLab - database (#{dbname}) using a superuser and running: - - ALTER #{user} WITH SUPERUSER - - This query will grant the user superuser permissions, ensuring any database extensions - can be dropped through migrations. - - For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html. - MSG - - raise - end - - def rename_constraint(table_name, old_name, new_name) - execute <<~SQL - ALTER TABLE #{quote_table_name(table_name)} - RENAME CONSTRAINT #{quote_column_name(old_name)} TO #{quote_column_name(new_name)} - SQL - end - - def drop_constraint(table_name, constraint_name, cascade: false) - execute <<~SQL - ALTER TABLE #{quote_table_name(table_name)} DROP CONSTRAINT #{quote_column_name(constraint_name)} #{cascade_statement(cascade)} - SQL - end - def add_primary_key_using_index(table_name, pk_name, index_to_use) execute <<~SQL ALTER TABLE #{quote_table_name(table_name)} ADD CONSTRAINT #{quote_table_name(pk_name)} PRIMARY KEY USING INDEX #{quote_table_name(index_to_use)} @@ -1536,17 +1165,20 @@ into similar problems in the future (e.g. when new tables are created). SQL end - private + # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity + def create_temporary_columns_and_triggers(table, mappings, primary_key: :id, old_bigint_column_naming: false) + raise ArgumentError, "No mappings for column conversion provided" if mappings.blank? - def multiple_columns(columns, separator: ', ') - Array.wrap(columns).join(separator) - end + unless mappings.values.all? { |values| mapping_has_required_columns?(values) } + raise ArgumentError, "Some mappings don't have required keys provided" + end - def cascade_statement(cascade) - cascade ? 'CASCADE' : '' - end + neutral_values_for_type = { + int: 0, + bigint: 0, + uuid: '00000000-0000-0000-0000-000000000000' + } - def create_temporary_columns_and_triggers(table, columns, primary_key: :id, data_type: :bigint) unless table_exists?(table) raise "Table #{table} does not exist" end @@ -1555,7 +1187,7 @@ into similar problems in the future (e.g. when new tables are created). raise "Column #{primary_key} does not exist on #{table}" end - columns = Array.wrap(columns) + columns = mappings.keys columns.each do |column| next if column_exists?(table, column) @@ -1564,67 +1196,88 @@ into similar problems in the future (e.g. when new tables are created). check_trigger_permissions!(table) - conversions = columns.to_h { |column| [column, convert_to_bigint_column(column)] } + if old_bigint_column_naming + mappings.each do |column, params| + params.merge!( + temporary_column_name: convert_to_bigint_column(column) + ) + end + else + mappings.each do |column, params| + params.merge!( + temporary_column_name: convert_to_type_column(column, params[:from_type], params[:to_type]) + ) + end + end with_lock_retries do - conversions.each do |(source_column, temporary_name)| - column = column_for(table, source_column) + mappings.each do |(column_name, params)| + column = column_for(table, column_name) + temporary_name = params[:temporary_column_name] + data_type = params[:to_type] + default_value = params[:default_value] if (column.name.to_s == primary_key.to_s) || !column.null # If the column to be converted is either a PK or is defined as NOT NULL, # set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow # That way, we skip the expensive validation step required to add # a NOT NULL constraint at the end of the process - add_column(table, temporary_name, data_type, default: column.default || 0, null: false) + add_column( + table, + temporary_name, + data_type, + default: column.default || default_value || neutral_values_for_type.fetch(data_type), + null: false + ) else - add_column(table, temporary_name, data_type, default: column.default) + add_column( + table, + temporary_name, + data_type, + default: column.default + ) end end - install_rename_triggers(table, conversions.keys, conversions.values) + old_column_names = mappings.keys + temporary_column_names = mappings.values.map { |v| v[:temporary_column_name] } + install_rename_triggers(table, old_column_names, temporary_column_names) end end + # rubocop:enable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity - def validate_check_constraint_name!(constraint_name) - if constraint_name.to_s.length > MAX_IDENTIFIER_NAME_LENGTH - raise "The maximum allowed constraint name is #{MAX_IDENTIFIER_NAME_LENGTH} characters" + def partition?(table_name) + if view_exists?(:postgres_partitions) + Gitlab::Database::PostgresPartition.partition_exists?(table_name) + else + Gitlab::Database::PostgresPartition.legacy_partition_exists?(table_name) end end - # Returns an ActiveRecord::Result containing the check constraints - # defined for the given column. - # - # If the schema is not provided, then the current_schema is used - def check_constraints_for(table, column, schema: nil) - check_sql = <<~SQL - SELECT - ccu.table_schema as schema_name, - ccu.table_name as table_name, - ccu.column_name as column_name, - con.conname as constraint_name, - pg_get_constraintdef(con.oid) as constraint_def - FROM pg_catalog.pg_constraint con - INNER JOIN pg_catalog.pg_class rel - ON rel.oid = con.conrelid - INNER JOIN pg_catalog.pg_namespace nsp - ON nsp.oid = con.connamespace - INNER JOIN information_schema.constraint_column_usage ccu - ON con.conname = ccu.constraint_name - AND nsp.nspname = ccu.constraint_schema - AND rel.relname = ccu.table_name - WHERE nsp.nspname = #{connection.quote(schema.presence || current_schema)} - AND rel.relname = #{connection.quote(table)} - AND ccu.column_name = #{connection.quote(column)} - AND con.contype = 'c' - ORDER BY constraint_name - SQL + private + + def multiple_columns(columns, separator: ', ') + Array.wrap(columns).join(separator) + end + + def cascade_statement(cascade) + cascade ? 'CASCADE' : '' + end - connection.exec_query(check_sql) + def validate_check_constraint_name!(constraint_name) + if constraint_name.to_s.length > MAX_IDENTIFIER_NAME_LENGTH + raise "The maximum allowed constraint name is #{MAX_IDENTIFIER_NAME_LENGTH} characters" + end end - def statement_timeout_disabled? - # This is a string of the form "100ms" or "0" when disabled - connection.select_value('SHOW statement_timeout') == "0" + # mappings => {} where keys are column names and values are hashes with the following keys: + # from_type - from which type we're migrating + # to_type - to which type we're migrating + # default_value - custom default value, if not provided will be taken from neutral_values_for_type + def mapping_has_required_columns?(mapping) + %i[from_type to_type].map do |required_key| + mapping.has_key?(required_key) + end.all? end def column_is_nullable?(table, column) @@ -1640,14 +1293,6 @@ into similar problems in the future (e.g. when new tables are created). connection.select_value(check_sql) == 'YES' end - def text_limit_name(table, column, name: nil) - name.presence || check_constraint_name(table, column, 'max_length') - end - - def not_null_constraint_name(table, column, name: nil) - name.presence || check_constraint_name(table, column, 'not_null') - end - def missing_schema_object_message(table, type, name) <<~MESSAGE Could not find #{type} "#{name}" on table "#{table}" which was referenced during the migration. @@ -1717,17 +1362,6 @@ into similar problems in the future (e.g. when new tables are created). Must end with `_at`} MESSAGE end - - def validate_not_in_transaction!(method_name, modifier = nil) - return unless transaction_open? - - raise <<~ERROR - #{["`#{method_name}`", modifier].compact.join(' ')} cannot be run inside a transaction. - - You can disable transactions by calling `disable_ddl_transaction!` in the body of - your migration class - ERROR - end end end end diff --git a/lib/gitlab/database/migration_helpers/v2.rb b/lib/gitlab/database/migration_helpers/v2.rb index dd426962033..b5b8b58681c 100644 --- a/lib/gitlab/database/migration_helpers/v2.rb +++ b/lib/gitlab/database/migration_helpers/v2.rb @@ -205,8 +205,8 @@ module Gitlab raise "Column #{old_column} does not exist on #{table}" end - if column.default - raise "#{calling_operation} does not currently support columns with default values" + if column.default_function + raise "#{calling_operation} does not currently support columns with default functions" end unless column_exists?(table, batch_column_name) @@ -269,17 +269,20 @@ module Gitlab def create_insert_trigger(trigger_name, quoted_table, quoted_old_column, quoted_new_column) function_name = function_name_for_trigger(trigger_name) + column = columns(quoted_table.delete('"').to_sym).find { |column| column.name == quoted_old_column.delete('"') } + quoted_default_value = connection.quote(column.default) + execute(<<~SQL) CREATE OR REPLACE FUNCTION #{function_name}() RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN - IF NEW.#{quoted_old_column} IS NULL AND NEW.#{quoted_new_column} IS NOT NULL THEN + IF NEW.#{quoted_old_column} IS NOT DISTINCT FROM #{quoted_default_value} AND NEW.#{quoted_new_column} IS DISTINCT FROM #{quoted_default_value} THEN NEW.#{quoted_old_column} = NEW.#{quoted_new_column}; END IF; - IF NEW.#{quoted_new_column} IS NULL AND NEW.#{quoted_old_column} IS NOT NULL THEN + IF NEW.#{quoted_new_column} IS NOT DISTINCT FROM #{quoted_default_value} AND NEW.#{quoted_old_column} IS DISTINCT FROM #{quoted_default_value} THEN NEW.#{quoted_new_column} = NEW.#{quoted_old_column}; END IF; diff --git a/lib/gitlab/database/migrations/batched_background_migration_helpers.rb b/lib/gitlab/database/migrations/batched_background_migration_helpers.rb index 363fd0598f9..e958ce2aba4 100644 --- a/lib/gitlab/database/migrations/batched_background_migration_helpers.rb +++ b/lib/gitlab/database/migrations/batched_background_migration_helpers.rb @@ -196,6 +196,43 @@ module Gitlab :gitlab_main end end + + def ensure_batched_background_migration_is_finished(job_class_name:, table_name:, column_name:, job_arguments:, finalize: true) + Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode! + + Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information + migration = Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration( + Gitlab::Database.gitlab_schemas_for_connection(connection), + job_class_name, table_name, column_name, job_arguments + ) + + configuration = { + job_class_name: job_class_name, + table_name: table_name, + column_name: column_name, + job_arguments: job_arguments + } + + return Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}" if migration.nil? + + return if migration.finished? + + finalize_batched_background_migration(job_class_name: job_class_name, table_name: table_name, column_name: column_name, job_arguments: job_arguments) if finalize + + return if migration.reload.finished? # rubocop:disable Cop/ActiveRecordAssociationReload + + raise "Expected batched background migration for the given configuration to be marked as 'finished', " \ + "but it is '#{migration.status_name}':" \ + "\t#{configuration}" \ + "\n\n" \ + "Finalize it manually by running the following command in a `bash` or `sh` shell:" \ + "\n\n" \ + "\tsudo gitlab-rake gitlab:background_migrations:finalize[#{job_class_name},#{table_name},#{column_name},'#{job_arguments.to_json.gsub(',', '\,')}']" \ + "\n\n" \ + "For more information, check the documentation" \ + "\n\n" \ + "\thttps://docs.gitlab.com/ee/user/admin_area/monitoring/background_migrations.html#database-migrations-failing-because-of-batched-background-migration-not-finished" + end end end end diff --git a/lib/gitlab/database/migrations/constraints_helpers.rb b/lib/gitlab/database/migrations/constraints_helpers.rb new file mode 100644 index 00000000000..7b849e3137a --- /dev/null +++ b/lib/gitlab/database/migrations/constraints_helpers.rb @@ -0,0 +1,337 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + module ConstraintsHelpers + include LockRetriesHelpers + include TimeoutHelpers + + # https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS + MAX_IDENTIFIER_NAME_LENGTH = 63 + + # Returns the name for a check constraint + # + # type: + # - Any value, as long as it is unique + # - Constraint names are unique per table in Postgres, and, additionally, + # we can have multiple check constraints over a column + # So we use the (table, column, type) triplet as a unique name + # - e.g. we use 'max_length' when adding checks for text limits + # or 'not_null' when adding a NOT NULL constraint + # + def check_constraint_name(table, column, type) + identifier = "#{table}_#{column}_check_#{type}" + # Check concurrent_foreign_key_name() for info on why we use a hash + hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10) + + "check_#{hashed_identifier}" + end + + def check_constraint_exists?(table, constraint_name) + # Constraint names are unique per table in Postgres, not per schema + # Two tables can have constraints with the same name, so we filter by + # the table name in addition to using the constraint_name + + check_sql = <<~SQL + SELECT COUNT(*) + FROM pg_catalog.pg_constraint con + INNER JOIN pg_catalog.pg_class rel + ON rel.oid = con.conrelid + INNER JOIN pg_catalog.pg_namespace nsp + ON nsp.oid = con.connamespace + WHERE con.contype = 'c' + AND con.conname = #{connection.quote(constraint_name)} + AND nsp.nspname = #{connection.quote(current_schema)} + AND rel.relname = #{connection.quote(table)} + SQL + + connection.select_value(check_sql) > 0 + end + + # Adds a check constraint to a table + # + # This method is the generic helper for adding any check constraint + # More specialized helpers may use it (e.g. add_text_limit or add_not_null) + # + # This method only requires minimal locking: + # - The constraint is added using NOT VALID + # This allows us to add the check constraint without validating it + # - The check will be enforced for new data (inserts) coming in + # - If `validate: true` the constraint is also validated + # Otherwise, validate_check_constraint() can be used at a later stage + # - Check comments on add_concurrent_foreign_key for more info + # + # table - The table the constraint will be added to + # check - The check clause to add + # e.g. 'char_length(name) <= 5' or 'store IS NOT NULL' + # constraint_name - The name of the check constraint (otherwise auto-generated) + # Should be unique per table (not per column) + # validate - Whether to validate the constraint in this call + # + def add_check_constraint(table, check, constraint_name, validate: true) + # Transactions would result in ALTER TABLE locks being held for the + # duration of the transaction, defeating the purpose of this method. + validate_not_in_transaction!(:add_check_constraint) + + validate_check_constraint_name!(constraint_name) + + if check_constraint_exists?(table, constraint_name) + warning_message = <<~MESSAGE + Check constraint was not created because it exists already + (this may be due to an aborted migration or similar) + table: #{table}, check: #{check}, constraint name: #{constraint_name} + MESSAGE + + Gitlab::AppLogger.warn warning_message + else + # Only add the constraint without validating it + # Even though it is fast, ADD CONSTRAINT requires an EXCLUSIVE lock + # Use with_lock_retries to make sure that this operation + # will not timeout on tables accessed by many processes + with_lock_retries do + execute <<~SQL + ALTER TABLE #{table} + ADD CONSTRAINT #{constraint_name} + CHECK ( #{check} ) + NOT VALID; + SQL + end + end + + validate_check_constraint(table, constraint_name) if validate + end + + def validate_check_constraint(table, constraint_name) + validate_check_constraint_name!(constraint_name) + + unless check_constraint_exists?(table, constraint_name) + raise missing_schema_object_message(table, "check constraint", constraint_name) + end + + disable_statement_timeout do + # VALIDATE CONSTRAINT only requires a SHARE UPDATE EXCLUSIVE LOCK + # It only conflicts with other validations and creating indexes + execute("ALTER TABLE #{table} VALIDATE CONSTRAINT #{constraint_name};") + end + end + + def remove_check_constraint(table, constraint_name) + # This is technically not necessary, but aligned with add_check_constraint + # and allows us to continue use with_lock_retries here + validate_not_in_transaction!(:remove_check_constraint) + + validate_check_constraint_name!(constraint_name) + + # DROP CONSTRAINT requires an EXCLUSIVE lock + # Use with_lock_retries to make sure that this will not timeout + with_lock_retries do + execute <<-SQL + ALTER TABLE #{table} + DROP CONSTRAINT IF EXISTS #{constraint_name} + SQL + end + end + + # Copies all check constraints for the old column to the new column. + # + # table - The table containing the columns. + # old - The old column. + # new - The new column. + # schema - The schema the table is defined for + # If it is not provided, then the current_schema is used + def copy_check_constraints(table, old, new, schema: nil) + raise 'copy_check_constraints can not be run inside a transaction' if transaction_open? + + raise "Column #{old} does not exist on #{table}" unless column_exists?(table, old) + + raise "Column #{new} does not exist on #{table}" unless column_exists?(table, new) + + table_with_schema = schema.present? ? "#{schema}.#{table}" : table + + check_constraints_for(table, old, schema: schema).each do |check_c| + validate = !(check_c["constraint_def"].end_with? "NOT VALID") + + # Normalize: + # - Old constraint definitions: + # '(char_length(entity_path) <= 5500)' + # - Definitionss from pg_get_constraintdef(oid): + # 'CHECK ((char_length(entity_path) <= 5500))' + # - Definitions from pg_get_constraintdef(oid, pretty_bool): + # 'CHECK (char_length(entity_path) <= 5500)' + # - Not valid constraints: 'CHECK (...) NOT VALID' + # to a single format that we can use: + # '(char_length(entity_path) <= 5500)' + check_definition = check_c["constraint_def"] + .sub(/^\s*(CHECK)?\s*\({0,2}/, '(') + .sub(/\){0,2}\s*(NOT VALID)?\s*$/, ')') + + constraint_name = if check_definition == "(#{old} IS NOT NULL)" + not_null_constraint_name(table_with_schema, new) + elsif check_definition.start_with? "(char_length(#{old}) <=" + text_limit_name(table_with_schema, new) + else + check_constraint_name(table_with_schema, new, 'copy_check_constraint') + end + + add_check_constraint( + table_with_schema, + check_definition.gsub(old.to_s, new.to_s), + constraint_name, + validate: validate + ) + end + end + + # Migration Helpers for adding limit to text columns + def add_text_limit(table, column, limit, constraint_name: nil, validate: true) + add_check_constraint( + table, + "char_length(#{column}) <= #{limit}", + text_limit_name(table, column, name: constraint_name), + validate: validate + ) + end + + def validate_text_limit(table, column, constraint_name: nil) + validate_check_constraint(table, text_limit_name(table, column, name: constraint_name)) + end + + def remove_text_limit(table, column, constraint_name: nil) + remove_check_constraint(table, text_limit_name(table, column, name: constraint_name)) + end + + def check_text_limit_exists?(table, column, constraint_name: nil) + check_constraint_exists?(table, text_limit_name(table, column, name: constraint_name)) + end + + # Migration Helpers for managing not null constraints + def add_not_null_constraint(table, column, constraint_name: nil, validate: true) + if column_is_nullable?(table, column) + add_check_constraint( + table, + "#{column} IS NOT NULL", + not_null_constraint_name(table, column, name: constraint_name), + validate: validate + ) + else + warning_message = <<~MESSAGE + NOT NULL check constraint was not created: + column #{table}.#{column} is already defined as `NOT NULL` + MESSAGE + + Gitlab::AppLogger.warn warning_message + end + end + + def validate_not_null_constraint(table, column, constraint_name: nil) + validate_check_constraint( + table, + not_null_constraint_name(table, column, name: constraint_name) + ) + end + + def remove_not_null_constraint(table, column, constraint_name: nil) + remove_check_constraint( + table, + not_null_constraint_name(table, column, name: constraint_name) + ) + end + + def check_not_null_constraint_exists?(table, column, constraint_name: nil) + check_constraint_exists?( + table, + not_null_constraint_name(table, column, name: constraint_name) + ) + end + + def rename_constraint(table_name, old_name, new_name) + execute <<~SQL + ALTER TABLE #{quote_table_name(table_name)} + RENAME CONSTRAINT #{quote_column_name(old_name)} TO #{quote_column_name(new_name)} + SQL + end + + def drop_constraint(table_name, constraint_name, cascade: false) + execute <<~SQL + ALTER TABLE #{quote_table_name(table_name)} DROP CONSTRAINT #{quote_column_name(constraint_name)} #{cascade_statement(cascade)} + SQL + end + + def validate_check_constraint_name!(constraint_name) + return unless constraint_name.to_s.length > MAX_IDENTIFIER_NAME_LENGTH + + raise "The maximum allowed constraint name is #{MAX_IDENTIFIER_NAME_LENGTH} characters" + end + + def text_limit_name(table, column, name: nil) + name.presence || check_constraint_name(table, column, 'max_length') + end + + private + + def validate_not_in_transaction!(method_name, modifier = nil) + return unless transaction_open? + + raise <<~ERROR + #{["`#{method_name}`", modifier].compact.join(' ')} cannot be run inside a transaction. + + You can disable transactions by calling `disable_ddl_transaction!` in the body of + your migration class + ERROR + end + + # Returns an ActiveRecord::Result containing the check constraints + # defined for the given column. + # + # If the schema is not provided, then the current_schema is used + def check_constraints_for(table, column, schema: nil) + check_sql = <<~SQL + SELECT + ccu.table_schema as schema_name, + ccu.table_name as table_name, + ccu.column_name as column_name, + con.conname as constraint_name, + pg_get_constraintdef(con.oid) as constraint_def + FROM pg_catalog.pg_constraint con + INNER JOIN pg_catalog.pg_class rel + ON rel.oid = con.conrelid + INNER JOIN pg_catalog.pg_namespace nsp + ON nsp.oid = con.connamespace + INNER JOIN information_schema.constraint_column_usage ccu + ON con.conname = ccu.constraint_name + AND nsp.nspname = ccu.constraint_schema + AND rel.relname = ccu.table_name + WHERE nsp.nspname = #{connection.quote(schema.presence || current_schema)} + AND rel.relname = #{connection.quote(table)} + AND ccu.column_name = #{connection.quote(column)} + AND con.contype = 'c' + ORDER BY constraint_name + SQL + + connection.exec_query(check_sql) + end + + def cascade_statement(cascade) + cascade ? 'CASCADE' : '' + end + + def not_null_constraint_name(table, column, name: nil) + name.presence || check_constraint_name(table, column, 'not_null') + end + + def missing_schema_object_message(table, type, name) + <<~MESSAGE + Could not find #{type} "#{name}" on table "#{table}" which was referenced during the migration. + This issue could be caused by the database schema straying from the expected state. + + To resolve this issue, please verify: + 1. all previous migrations have completed + 2. the database objects used in this migration match the Rails definition in schema.rb or structure.sql + + MESSAGE + end + end + end + end +end diff --git a/lib/gitlab/database/migrations/extension_helpers.rb b/lib/gitlab/database/migrations/extension_helpers.rb new file mode 100644 index 00000000000..435e9e0d2dc --- /dev/null +++ b/lib/gitlab/database/migrations/extension_helpers.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + module ExtensionHelpers + def create_extension(extension) + execute("CREATE EXTENSION IF NOT EXISTS #{extension}") + rescue ActiveRecord::StatementInvalid => e + dbname = ApplicationRecord.database.database_name + user = ApplicationRecord.database.username + + warn(<<~MSG) if e.to_s.include?('permission denied') + GitLab requires the PostgreSQL extension '#{extension}' installed in database '#{dbname}', but + the database user is not allowed to install the extension. + + You can either install the extension manually using a database superuser: + + CREATE EXTENSION IF NOT EXISTS #{extension} + + Or, you can solve this by logging in to the GitLab + database (#{dbname}) using a superuser and running: + + ALTER #{user} WITH SUPERUSER + + This query will grant the user superuser permissions, ensuring any database extensions + can be installed through migrations. + + For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html. + MSG + + raise + end + + def drop_extension(extension) + execute("DROP EXTENSION IF EXISTS #{extension}") + rescue ActiveRecord::StatementInvalid => e + dbname = ApplicationRecord.database.database_name + user = ApplicationRecord.database.username + + warn(<<~MSG) if e.to_s.include?('permission denied') + This migration attempts to drop the PostgreSQL extension '#{extension}' + installed in database '#{dbname}', but the database user is not allowed + to drop the extension. + + You can either drop the extension manually using a database superuser: + + DROP EXTENSION IF EXISTS #{extension} + + Or, you can solve this by logging in to the GitLab + database (#{dbname}) using a superuser and running: + + ALTER #{user} WITH SUPERUSER + + This query will grant the user superuser permissions, ensuring any database extensions + can be dropped through migrations. + + For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html. + MSG + + raise + end + end + end + end +end diff --git a/lib/gitlab/database/migrations/lock_retries_helpers.rb b/lib/gitlab/database/migrations/lock_retries_helpers.rb new file mode 100644 index 00000000000..137ef3ab144 --- /dev/null +++ b/lib/gitlab/database/migrations/lock_retries_helpers.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + module LockRetriesHelpers + # Executes the block with a retry mechanism that alters the +lock_timeout+ and +sleep_time+ between attempts. + # The timings can be controlled via the +timing_configuration+ parameter. + # If the lock was not acquired within the retry period, a last attempt is made without using +lock_timeout+. + # + # Note this helper uses subtransactions when run inside an already open transaction. + # + # ==== Examples + # # Invoking without parameters + # with_lock_retries do + # drop_table :my_table + # end + # + # # Invoking with custom +timing_configuration+ + # t = [ + # [1.second, 1.second], + # [2.seconds, 2.seconds] + # ] + # + # with_lock_retries(timing_configuration: t) do + # drop_table :my_table # this will be retried twice + # end + # + # # Disabling the retries using an environment variable + # > export DISABLE_LOCK_RETRIES=true + # + # with_lock_retries do + # drop_table :my_table # one invocation, it will not retry at all + # end + # + # ==== Parameters + # * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the + # block, sleep time before the next iteration, defaults to + # `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION` + # * +logger+ - [Gitlab::JsonLogger] + # * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES` + def with_lock_retries(*args, **kwargs, &block) + raise_on_exhaustion = !!kwargs.delete(:raise_on_exhaustion) + merged_args = { + connection: connection, + klass: self.class, + logger: Gitlab::BackgroundMigration::Logger, + allow_savepoints: true + }.merge(kwargs) + + Gitlab::Database::WithLockRetries.new(**merged_args) + .run(raise_on_exhaustion: raise_on_exhaustion, &block) + end + end + end + end +end diff --git a/lib/gitlab/database/migrations/runner.rb b/lib/gitlab/database/migrations/runner.rb index 85dc6051c7c..27b161419b2 100644 --- a/lib/gitlab/database/migrations/runner.rb +++ b/lib/gitlab/database/migrations/runner.rb @@ -7,6 +7,7 @@ module Gitlab BASE_RESULT_DIR = Rails.root.join('tmp', 'migration-testing').freeze METADATA_FILENAME = 'metadata.json' SCHEMA_VERSION = 4 # Version of the output format produced by the runner + POST_MIGRATION_MATCHER = %r{db/post_migrate/}.freeze class << self def up(database:, legacy_mode: false) @@ -116,7 +117,10 @@ module Gitlab verbose_was = ActiveRecord::Migration.verbose ActiveRecord::Migration.verbose = true - sorted_migrations = migrations.sort_by(&:version) + sorted_migrations = migrations.sort_by do |m| + [m.filename.match?(POST_MIGRATION_MATCHER) ? 1 : 0, m.version] + end + sorted_migrations.reverse! if direction == :down instrumentation = Instrumentation.new(result_dir: result_dir) diff --git a/lib/gitlab/database/migrations/timeout_helpers.rb b/lib/gitlab/database/migrations/timeout_helpers.rb new file mode 100644 index 00000000000..423c77452b1 --- /dev/null +++ b/lib/gitlab/database/migrations/timeout_helpers.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + module TimeoutHelpers + # Long-running migrations may take more than the timeout allowed by + # the database. Disable the session's statement timeout to ensure + # migrations don't get killed prematurely. + # + # There are two possible ways to disable the statement timeout: + # + # - Per transaction (this is the preferred and default mode) + # - Per connection (requires a cleanup after the execution) + # + # When using a per connection disable statement, code must be inside + # a block so we can automatically execute `RESET statement_timeout` after block finishes + # otherwise the statement will still be disabled until connection is dropped + # or `RESET statement_timeout` is executed + def disable_statement_timeout + if block_given? + if statement_timeout_disabled? + # Don't do anything if the statement_timeout is already disabled + # Allows for nested calls of disable_statement_timeout without + # resetting the timeout too early (before the outer call ends) + yield + else + begin + execute('SET statement_timeout TO 0') + + yield + ensure + execute('RESET statement_timeout') + end + end + else + unless transaction_open? + raise <<~ERROR + Cannot call disable_statement_timeout() without a transaction open or outside of a transaction block. + If you don't want to use a transaction wrap your code in a block call: + + disable_statement_timeout { # code that requires disabled statement here } + + This will make sure statement_timeout is disabled before and reset after the block execution is finished. + ERROR + end + + execute('SET LOCAL statement_timeout TO 0') + end + end + + private + + def statement_timeout_disabled? + # This is a string of the form "100ms" or "0" when disabled + connection.select_value('SHOW statement_timeout') == "0" + end + end + end + end +end diff --git a/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb b/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb index 23a8dc0b44f..58447481e60 100644 --- a/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb +++ b/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb @@ -10,13 +10,17 @@ module Gitlab attr_reader :partitioning_column, :table_name, :parent_table_name, :zero_partition_value - def initialize(migration_context:, table_name:, parent_table_name:, partitioning_column:, zero_partition_value:) + def initialize( + migration_context:, table_name:, parent_table_name:, partitioning_column:, + zero_partition_value:, lock_tables: []) + @migration_context = migration_context @connection = migration_context.connection @table_name = table_name @parent_table_name = parent_table_name @partitioning_column = partitioning_column @zero_partition_value = zero_partition_value + @lock_tables = Array.wrap(lock_tables) end def prepare_for_partitioning @@ -35,7 +39,12 @@ module Gitlab create_parent_table attach_foreign_keys_to_parent - migration_context.with_lock_retries(raise_on_exhaustion: true) do + lock_args = { + raise_on_exhaustion: true, + timing_configuration: lock_timing_configuration + } + + migration_context.with_lock_retries(**lock_args) do migration_context.execute(sql_to_convert_table) end end @@ -74,6 +83,7 @@ module Gitlab # but they acquire the same locks so it's much faster to incude them # here. [ + lock_tables_statement, attach_table_to_parent_statement, alter_sequence_statements(old_table: table_name, new_table: parent_table_name), remove_constraint_statement @@ -162,6 +172,16 @@ module Gitlab end end + def lock_tables_statement + return if @lock_tables.empty? + + table_names = @lock_tables.map { |name| quote_table_name(name) }.join(', ') + + <<~SQL + LOCK #{table_names} IN ACCESS EXCLUSIVE MODE + SQL + end + def attach_table_to_parent_statement <<~SQL ALTER TABLE #{quote_table_name(parent_table_name)} @@ -235,6 +255,13 @@ module Gitlab ALTER TABLE #{connection.quote_table_name(table_name)} OWNER TO CURRENT_USER SQL end + + def lock_timing_configuration + iterations = Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION + aggressive_iterations = Array.new(5) { [10.seconds, 1.minute] } + + iterations + aggressive_iterations + end end end end diff --git a/lib/gitlab/database/partitioning/detached_partition_dropper.rb b/lib/gitlab/database/partitioning/detached_partition_dropper.rb index 5e32ecad4ca..58c0728b614 100644 --- a/lib/gitlab/database/partitioning/detached_partition_dropper.rb +++ b/lib/gitlab/database/partitioning/detached_partition_dropper.rb @@ -7,7 +7,7 @@ module Gitlab Gitlab::AppLogger.info(message: "Checking for previously detached partitions to drop") Postgresql::DetachedPartition.ready_to_drop.find_each do |detached_partition| - if partition_attached?(qualify_partition_name(detached_partition.table_name)) + if partition_attached?(detached_partition.fully_qualified_table_name) unmark_partition(detached_partition) else drop_partition(detached_partition) @@ -41,14 +41,14 @@ module Gitlab # Another process may have already dropped the table and deleted this entry next unless try_lock_detached_partition(detached_partition.id) - drop_detached_partition(detached_partition.table_name) + drop_detached_partition(detached_partition) detached_partition.destroy! end end def remove_foreign_keys(detached_partition) - partition_identifier = qualify_partition_name(detached_partition.table_name) + partition_identifier = detached_partition.fully_qualified_table_name # We want to load all of these into memory at once to get a consistent view to loop over, # since we'll be deleting from this list as we go @@ -65,7 +65,7 @@ module Gitlab # It is important to only drop one foreign key per transaction. # Dropping a foreign key takes an ACCESS EXCLUSIVE lock on both tables participating in the foreign key. - partition_identifier = qualify_partition_name(detached_partition.table_name) + partition_identifier = detached_partition.fully_qualified_table_name with_lock_retries do connection.transaction(requires_new: false) do next unless try_lock_detached_partition(detached_partition.id) @@ -83,16 +83,10 @@ module Gitlab end end - def drop_detached_partition(partition_name) - partition_identifier = qualify_partition_name(partition_name) + def drop_detached_partition(detached_partition) + connection.drop_table(detached_partition.fully_qualified_table_name, if_exists: true) - connection.drop_table(partition_identifier, if_exists: true) - - Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: partition_name) - end - - def qualify_partition_name(table_name) - "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{table_name}" + Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: detached_partition.table_name) end def partition_attached?(partition_identifier) diff --git a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb index 15b542cf089..62f33bb56bc 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb @@ -7,6 +7,8 @@ module Gitlab include Gitlab::Database::MigrationHelpers include Gitlab::Database::SchemaHelpers + DuplicatedIndexesError = Class.new(StandardError) + ERROR_SCOPE = 'index' # Concurrently creates a new index on a partitioned table. In concept this works similarly to @@ -38,7 +40,7 @@ module Gitlab partitioned_table.postgres_partitions.order(:name).each do |partition| partition_index_name = generated_index_name(partition.identifier, options[:name]) - partition_options = options.merge(name: partition_index_name) + partition_options = options.merge(name: partition_index_name, allow_partition: true) add_concurrent_index(partition.identifier, column_names, partition_options) end @@ -92,6 +94,42 @@ module Gitlab .map { |_, indexes| indexes.map { |index| index['index_name'] } } end + # Retrieves a hash of index names for a given table and schema, by index + # definition. + # + # Example: + # + # indexes_by_definition_for_table('table_name_goes_here') + # + # Returns: + # + # { + # "CREATE _ btree (created_at)" => "index_on_created_at" + # } + def indexes_by_definition_for_table(table_name, schema_name: connection.current_schema) + duplicate_indexes = find_duplicate_indexes(table_name, schema_name: schema_name) + + unless duplicate_indexes.empty? + raise DuplicatedIndexesError, "#{table_name} has duplicate indexes: #{duplicate_indexes}" + end + + find_indexes(table_name, schema_name: schema_name) + .each_with_object({}) { |row, hash| hash[row['index_id']] = row['index_name'] } + end + + # Renames indexes for a given table and schema, mapping by index + # definition, to a hash of new index names. + # + # Example: + # + # index_names = indexes_by_definition_for_table('source_table_name_goes_here') + # drop_table('source_table_name_goes_here') + # rename_indexes_for_table('destination_table_name_goes_here', index_names) + def rename_indexes_for_table(table_name, new_index_names, schema_name: connection.current_schema) + current_index_names = indexes_by_definition_for_table(table_name, schema_name: schema_name) + rename_indexes(current_index_names, new_index_names, schema_name: schema_name) + end + private def find_indexes(table_name, schema_name: connection.current_schema) @@ -124,6 +162,18 @@ module Gitlab def generated_index_name(partition_name, index_name) object_name("#{partition_name}_#{index_name}", 'index') end + + def rename_indexes(from, to, schema_name: connection.current_schema) + indexes_to_rename = from.select { |index_id, _| to.has_key?(index_id) } + statements = indexes_to_rename.map do |index_id, index_name| + <<~SQL + ALTER INDEX #{connection.quote_table_name("#{schema_name}.#{connection.quote_column_name(index_name)}")} + RENAME TO #{connection.quote_column_name(to[index_id])} + SQL + end + + connection.execute(statements.join(';')) + end end end end diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb index 695a5d7ec77..f9790bf53b9 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb @@ -275,7 +275,7 @@ module Gitlab ).revert_preparation_for_partitioning end - def convert_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:) + def convert_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:, lock_tables: []) validate_not_in_transaction!(:convert_table_to_first_list_partition) Gitlab::Database::Partitioning::ConvertTableToFirstListPartition @@ -283,7 +283,8 @@ module Gitlab table_name: table_name, parent_table_name: parent_table_name, partitioning_column: partitioning_column, - zero_partition_value: initial_partitioning_value + zero_partition_value: initial_partitioning_value, + lock_tables: lock_tables ).partition end diff --git a/lib/gitlab/database/postgres_partition.rb b/lib/gitlab/database/postgres_partition.rb index eb080904f73..eda11fd8382 100644 --- a/lib/gitlab/database/postgres_partition.rb +++ b/lib/gitlab/database/postgres_partition.rb @@ -19,6 +19,20 @@ module Gitlab scope :for_parent_table, ->(name) { where("parent_identifier = concat(current_schema(), '.', ?)", name).order(:name) } + def self.partition_exists?(table_name) + where("identifier = concat(current_schema(), '.', ?)", table_name).exists? + end + + def self.legacy_partition_exists?(table_name) + result = connection.select_value(<<~SQL) + SELECT true FROM pg_class + WHERE relname = '#{table_name}' + AND relispartition = true; + SQL + + !!result + end + def to_s name end diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb index 6f64d04270f..1280789b30c 100644 --- a/lib/gitlab/database/query_analyzer.rb +++ b/lib/gitlab/database/query_analyzer.rb @@ -86,7 +86,11 @@ module Gitlab analyzers.each do |analyzer| next if analyzer.suppressed? && !analyzer.requires_tracking?(parsed) - analyzer.analyze(parsed) + if analyzer.raw? + analyzer.analyze(sql) + else + analyzer.analyze(parsed) + end rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e # We catch all standard errors to prevent validation errors to introduce fatal errors in production Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) diff --git a/lib/gitlab/database/query_analyzers/base.rb b/lib/gitlab/database/query_analyzers/base.rb index 9a52a4f6e23..9c2c228f869 100644 --- a/lib/gitlab/database/query_analyzers/base.rb +++ b/lib/gitlab/database/query_analyzers/base.rb @@ -53,6 +53,10 @@ module Gitlab Thread.current[self.context_key] end + def self.raw? + false + end + def self.enabled? raise NotImplementedError end diff --git a/lib/gitlab/database/query_analyzers/ci/partitioning_id_analyzer.rb b/lib/gitlab/database/query_analyzers/ci/partitioning_id_analyzer.rb new file mode 100644 index 00000000000..47277182d9a --- /dev/null +++ b/lib/gitlab/database/query_analyzers/ci/partitioning_id_analyzer.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module QueryAnalyzers + module Ci + # The purpose of this analyzer is to detect queries missing partition_id clause + # when selecting, inserting, updating or deleting data. + class PartitioningIdAnalyzer < Database::QueryAnalyzers::Base + PartitionIdMissingError = Class.new(QueryAnalyzerError) + + ROUTING_TABLES = %w[p_ci_builds_metadata].freeze + + class << self + def enabled? + ::Feature::FlipperFeature.table_exists? && + ::Feature.enabled?(:ci_partitioning_analyze_queries_partition_id_check, type: :ops) + end + + def analyze(parsed) + analyze_partition_id_presence(parsed) + end + + private + + def analyze_partition_id_presence(parsed) + detected = ROUTING_TABLES & (parsed.pg.dml_tables + parsed.pg.select_tables) + return if detected.none? + + if insert_query?(parsed) + return if insert_include_partition_id?(parsed) + else + detected_with_selected_columns = parsed_detected_tables(parsed, detected) + return if partition_id_included?(detected_with_selected_columns) + end + + ::Gitlab::ErrorTracking.track_and_raise_for_dev_exception( + PartitionIdMissingError.new( + "Detected query against a partitioned table without partition id: #{parsed.sql}" + ) + ) + end + + def parsed_detected_tables(parsed, routing_tables) + parsed.pg.filter_columns.each_with_object(Hash.new { |h, k| h[k] = [] }) do |item, hash| + table_name = item[0] || routing_tables[0] + column_name = item[1] + + hash[table_name] << column_name if routing_tables.include?(table_name) + end + end + + def partition_id_included?(result) + return false if result.empty? + + result.all? { |_routing_table, columns| columns.include?('partition_id') } + end + + def insert_query?(parsed) + parsed.sql.start_with?('INSERT') + end + + def insert_include_partition_id?(parsed) + filtered_columns_on_insert(parsed).include?('partition_id') + end + + def filtered_columns_on_insert(parsed) + result = parsed.pg.tree.to_h.dig(:stmts, 0, :stmt, :insert_stmt, :cols).map do |h| + h.dig(:res_target, :name) + end + + result || [] + end + end + end + end + end + end +end diff --git a/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb b/lib/gitlab/database/query_analyzers/ci/partitioning_routing_analyzer.rb index c2d5dfc1a15..eb55ebc7619 100644 --- a/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb +++ b/lib/gitlab/database/query_analyzers/ci/partitioning_routing_analyzer.rb @@ -5,12 +5,10 @@ module Gitlab module QueryAnalyzers module Ci # The purpose of this analyzer is to detect queries not going through a partitioning routing table - class PartitioningAnalyzer < Database::QueryAnalyzers::Base + class PartitioningRoutingAnalyzer < Database::QueryAnalyzers::Base RoutingTableNotUsedError = Class.new(QueryAnalyzerError) - ENABLED_TABLES = %w[ - ci_builds_metadata - ].freeze + ENABLED_TABLES = %w[ci_builds_metadata].freeze class << self def enabled? diff --git a/lib/gitlab/database/query_analyzers/query_recorder.rb b/lib/gitlab/database/query_analyzers/query_recorder.rb new file mode 100644 index 00000000000..88fe829c3d2 --- /dev/null +++ b/lib/gitlab/database/query_analyzers/query_recorder.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module QueryAnalyzers + class QueryRecorder < Base + LOG_FILE = 'rspec/query_recorder.ndjson' + + class << self + def raw? + true + end + + def enabled? + # Only enable QueryRecorder in CI + ENV['CI'].present? + end + + def analyze(sql) + payload = { + sql: sql + } + + log_query(payload) + end + + private + + def log_query(payload) + log_path = Rails.root.join(LOG_FILE) + log_dir = File.dirname(log_path) + + # Create log directory if it does not exist since it is only created + # ahead of time by certain CI jobs + FileUtils.mkdir_p(log_dir) unless Dir.exist?(log_dir) + + log_line = "#{Gitlab::Json.dump(payload)}\n" + + File.write(log_path, log_line, mode: 'a') + end + end + end + end + end +end diff --git a/lib/gitlab/database/tables_truncate.rb b/lib/gitlab/database/tables_truncate.rb index 164520fbab3..8380bf23899 100644 --- a/lib/gitlab/database/tables_truncate.rb +++ b/lib/gitlab/database/tables_truncate.rb @@ -14,7 +14,7 @@ module Gitlab end def execute - raise "Cannot truncate legacy tables in single-db setup" unless Gitlab::Database.has_config?(:ci) + raise "Cannot truncate legacy tables in single-db setup" if single_database_setup? raise "database is not supported" unless %w[main ci].include?(database_name) logger&.info "DRY RUN:" if dry_run @@ -91,6 +91,13 @@ module Gitlab end end end + + def single_database_setup? + return true unless Gitlab::Database.has_config?(:ci) + + ci_base_model = Gitlab::Database.database_base_models[:ci] + !!Gitlab::Database.db_config_share_with(ci_base_model.connection_db_config) + end end end end diff --git a/lib/gitlab/database/type/symbolized_jsonb.rb b/lib/gitlab/database/type/symbolized_jsonb.rb new file mode 100644 index 00000000000..5bec738ec9c --- /dev/null +++ b/lib/gitlab/database/type/symbolized_jsonb.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Type + # Extends Rails' Jsonb data type to deserialize it into symbolized Hash. + # + # Example: + # + # class SomeModel < ApplicationRecord + # # some_model.a_field is of type `jsonb` + # attribute :a_field, :sym_jsonb + # end + class SymbolizedJsonb < ::ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Jsonb + def type + :sym_jsonb + end + + def deserialize(value) + data = super + return unless data + + ::Gitlab::Utils.deep_symbolized_access(data) + end + end + end + end +end |