Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2022-11-17 14:33:21 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2022-11-17 14:33:21 +0300
commit7021455bd1ed7b125c55eb1b33c5a01f2bc55ee0 (patch)
tree5bdc2229f5198d516781f8d24eace62fc7e589e9 /lib/gitlab/database
parent185b095e93520f96e9cfc31d9c3e69b498cdab7c (diff)
Add latest changes from gitlab-org/gitlab@15-6-stable-eev15.6.0-rc42
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/background_migration/batched_job.rb8
-rw-r--r--lib/gitlab/database/background_migration/batched_migration.rb15
-rw-r--r--lib/gitlab/database/gitlab_schemas.yml6
-rw-r--r--lib/gitlab/database/load_balancing/configuration.rb3
-rw-r--r--lib/gitlab/database/load_balancing/load_balancer.rb7
-rw-r--r--lib/gitlab/database/load_balancing/service_discovery.rb13
-rw-r--r--lib/gitlab/database/load_balancing/service_discovery/sampler.rb56
-rw-r--r--lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb2
-rw-r--r--lib/gitlab/database/lock_writes_manager.rb7
-rw-r--r--lib/gitlab/database/migration_helpers.rb664
-rw-r--r--lib/gitlab/database/migration_helpers/v2.rb11
-rw-r--r--lib/gitlab/database/migrations/batched_background_migration_helpers.rb37
-rw-r--r--lib/gitlab/database/migrations/constraints_helpers.rb337
-rw-r--r--lib/gitlab/database/migrations/extension_helpers.rb66
-rw-r--r--lib/gitlab/database/migrations/lock_retries_helpers.rb57
-rw-r--r--lib/gitlab/database/migrations/runner.rb6
-rw-r--r--lib/gitlab/database/migrations/timeout_helpers.rb61
-rw-r--r--lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb31
-rw-r--r--lib/gitlab/database/partitioning/detached_partition_dropper.rb20
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb52
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb5
-rw-r--r--lib/gitlab/database/postgres_partition.rb14
-rw-r--r--lib/gitlab/database/query_analyzer.rb6
-rw-r--r--lib/gitlab/database/query_analyzers/base.rb4
-rw-r--r--lib/gitlab/database/query_analyzers/ci/partitioning_id_analyzer.rb79
-rw-r--r--lib/gitlab/database/query_analyzers/ci/partitioning_routing_analyzer.rb (renamed from lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb)6
-rw-r--r--lib/gitlab/database/query_analyzers/query_recorder.rb45
-rw-r--r--lib/gitlab/database/tables_truncate.rb9
-rw-r--r--lib/gitlab/database/type/symbolized_jsonb.rb28
29 files changed, 1103 insertions, 552 deletions
diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb
index 81898a59da7..6b7ff308c7e 100644
--- a/lib/gitlab/database/background_migration/batched_job.rb
+++ b/lib/gitlab/database/background_migration/batched_job.rb
@@ -14,7 +14,8 @@ module Gitlab
MAX_ATTEMPTS = 3
STUCK_JOBS_TIMEOUT = 1.hour.freeze
TIMEOUT_EXCEPTIONS = [ActiveRecord::StatementTimeout, ActiveRecord::ConnectionTimeoutError,
- ActiveRecord::AdapterTimeout, ActiveRecord::LockWaitTimeout].freeze
+ ActiveRecord::AdapterTimeout, ActiveRecord::LockWaitTimeout,
+ ActiveRecord::QueryCanceled].freeze
belongs_to :batched_migration, foreign_key: :batched_background_migration_id
has_many :batched_job_transition_logs, foreign_key: :batched_background_migration_job_id
@@ -112,7 +113,10 @@ module Gitlab
end
def can_split?(exception)
- attempts >= MAX_ATTEMPTS && TIMEOUT_EXCEPTIONS.include?(exception&.class) && batch_size > sub_batch_size && batch_size > 1
+ attempts >= MAX_ATTEMPTS &&
+ exception&.class&.in?(TIMEOUT_EXCEPTIONS) &&
+ batch_size > sub_batch_size &&
+ batch_size > 1
end
def split_and_retry!
diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb
index 92cafd1d00e..61a660ad14c 100644
--- a/lib/gitlab/database/background_migration/batched_migration.rb
+++ b/lib/gitlab/database/background_migration/batched_migration.rb
@@ -94,8 +94,21 @@ module Gitlab
end
def self.active_migration(connection:)
+ active_migrations_distinct_on_table(connection: connection, limit: 1).first
+ end
+
+ def self.find_executable(id, connection:)
for_gitlab_schema(Gitlab::Database.gitlab_schemas_for_connection(connection))
- .executable.queue_order.first
+ .executable.find_by_id(id)
+ end
+
+ def self.active_migrations_distinct_on_table(connection:, limit:)
+ distinct_on_table = select('DISTINCT ON (table_name) id')
+ .for_gitlab_schema(Gitlab::Database.gitlab_schemas_for_connection(connection))
+ .executable
+ .order(table_name: :asc, id: :asc)
+
+ where(id: distinct_on_table).queue_order.limit(limit)
end
def self.successful_rows_counts(migrations)
diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml
index c4a9cf8b80f..bf6ebb21f7d 100644
--- a/lib/gitlab/database/gitlab_schemas.yml
+++ b/lib/gitlab/database/gitlab_schemas.yml
@@ -40,6 +40,7 @@ atlassian_identities: :gitlab_main
audit_events_external_audit_event_destinations: :gitlab_main
audit_events: :gitlab_main
audit_events_streaming_headers: :gitlab_main
+audit_events_streaming_event_type_filters: :gitlab_main
authentication_events: :gitlab_main
award_emoji: :gitlab_main
aws_roles: :gitlab_main
@@ -167,6 +168,7 @@ dast_site_profiles_pipelines: :gitlab_main
dast_sites: :gitlab_main
dast_site_tokens: :gitlab_main
dast_site_validations: :gitlab_main
+dependency_proxy_blob_states: :gitlab_main
dependency_proxy_blobs: :gitlab_main
dependency_proxy_group_settings: :gitlab_main
dependency_proxy_image_ttl_group_policies: :gitlab_main
@@ -206,7 +208,6 @@ events: :gitlab_main
evidences: :gitlab_main
experiments: :gitlab_main
experiment_subjects: :gitlab_main
-experiment_users: :gitlab_main
external_approval_rules: :gitlab_main
external_approval_rules_protected_branches: :gitlab_main
external_pull_requests: :gitlab_ci
@@ -342,6 +343,7 @@ namespace_limits: :gitlab_main
namespace_package_settings: :gitlab_main
namespace_root_storage_statistics: :gitlab_main
namespace_ci_cd_settings: :gitlab_main
+namespace_commit_emails: :gitlab_main
namespace_settings: :gitlab_main
namespace_details: :gitlab_main
namespaces: :gitlab_main
@@ -363,6 +365,7 @@ operations_scopes: :gitlab_main
operations_strategies: :gitlab_main
operations_strategies_user_lists: :gitlab_main
operations_user_lists: :gitlab_main
+p_ci_builds_metadata: :gitlab_ci
packages_build_infos: :gitlab_main
packages_cleanup_policies: :gitlab_main
packages_composer_cache_files: :gitlab_main
@@ -451,6 +454,7 @@ projects: :gitlab_main
projects_sync_events: :gitlab_main
project_statistics: :gitlab_main
project_topics: :gitlab_main
+project_wiki_repositories: :gitlab_main
project_wiki_repository_states: :gitlab_main
prometheus_alert_events: :gitlab_main
prometheus_alerts: :gitlab_main
diff --git a/lib/gitlab/database/load_balancing/configuration.rb b/lib/gitlab/database/load_balancing/configuration.rb
index 59b08fac7e9..50472bd5780 100644
--- a/lib/gitlab/database/load_balancing/configuration.rb
+++ b/lib/gitlab/database/load_balancing/configuration.rb
@@ -57,7 +57,8 @@ module Gitlab
record_type: 'A',
interval: 60,
disconnect_timeout: 120,
- use_tcp: false
+ use_tcp: false,
+ max_replica_pools: nil
}
end
diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb
index 0881025b425..cb3a378ad64 100644
--- a/lib/gitlab/database/load_balancing/load_balancer.rb
+++ b/lib/gitlab/database/load_balancing/load_balancer.rb
@@ -119,6 +119,13 @@ module Gitlab
connection = pool.connection
transaction_open = connection.transaction_open?
+ if attempt && attempt > 1
+ ::Gitlab::Database::LoadBalancing::Logger.warn(
+ event: :read_write_retry,
+ message: 'A read_write block was retried because of connection error'
+ )
+ end
+
yield connection
rescue StandardError => e
# No leaking will happen on the final attempt. Leaks are caused by subsequent retries
diff --git a/lib/gitlab/database/load_balancing/service_discovery.rb b/lib/gitlab/database/load_balancing/service_discovery.rb
index dfd4892371c..52a9e8798d4 100644
--- a/lib/gitlab/database/load_balancing/service_discovery.rb
+++ b/lib/gitlab/database/load_balancing/service_discovery.rb
@@ -48,6 +48,7 @@ module Gitlab
# forcefully disconnected.
# use_tcp - Use TCP instaed of UDP to look up resources
# load_balancer - The load balancer instance to use
+ # rubocop:disable Metrics/ParameterLists
def initialize(
load_balancer,
nameserver:,
@@ -56,7 +57,8 @@ module Gitlab
record_type: 'A',
interval: 60,
disconnect_timeout: 120,
- use_tcp: false
+ use_tcp: false,
+ max_replica_pools: nil
)
@nameserver = nameserver
@port = port
@@ -66,7 +68,9 @@ module Gitlab
@disconnect_timeout = disconnect_timeout
@use_tcp = use_tcp
@load_balancer = load_balancer
+ @max_replica_pools = max_replica_pools
end
+ # rubocop:enable Metrics/ParameterLists
def start
Thread.new do
@@ -170,6 +174,8 @@ module Gitlab
addresses_from_srv_record(response)
end
+ addresses = sampler.sample(addresses)
+
raise EmptyDnsResponse if addresses.empty?
# Addresses are sorted so we can directly compare the old and new
@@ -221,6 +227,11 @@ module Gitlab
def addresses_from_a_record(resources)
resources.map { |r| Address.new(r.address.to_s) }
end
+
+ def sampler
+ @sampler ||= ::Gitlab::Database::LoadBalancing::ServiceDiscovery::Sampler
+ .new(max_replica_pools: @max_replica_pools)
+ end
end
end
end
diff --git a/lib/gitlab/database/load_balancing/service_discovery/sampler.rb b/lib/gitlab/database/load_balancing/service_discovery/sampler.rb
new file mode 100644
index 00000000000..71870214156
--- /dev/null
+++ b/lib/gitlab/database/load_balancing/service_discovery/sampler.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module LoadBalancing
+ class ServiceDiscovery
+ class Sampler
+ def initialize(max_replica_pools:, seed: Random.new_seed)
+ # seed must be set once and consistent
+ # for every invocation of #sample on
+ # the same instance of Sampler
+ @seed = seed
+ @max_replica_pools = max_replica_pools
+ end
+
+ def sample(addresses)
+ return addresses if @max_replica_pools.nil? || addresses.count <= @max_replica_pools
+
+ ::Gitlab::Database::LoadBalancing::Logger.info(
+ event: :host_list_limit_exceeded,
+ message: "Host list length exceeds max_replica_pools so random hosts will be chosen.",
+ max_replica_pools: @max_replica_pools,
+ total_host_list_length: addresses.count,
+ randomization_seed: @seed
+ )
+
+ # First sort them in case the ordering from DNS server changes
+ # then randomly order all addresses using consistent seed so
+ # this process always gives the same set for this instance of
+ # Sampler
+ addresses = addresses.sort
+ addresses = addresses.shuffle(random: Random.new(@seed))
+
+ # Group by hostname so that we can sample evenly across hosts
+ addresses_by_host = addresses.group_by(&:hostname)
+
+ selected_addresses = []
+ while selected_addresses.count < @max_replica_pools
+ # Loop over all hostnames grabbing one address at a time to
+ # evenly distribute across all hostnames
+ addresses_by_host.each do |host, addresses|
+ next if addresses.empty?
+
+ selected_addresses << addresses.pop
+
+ break unless selected_addresses.count < @max_replica_pools
+ end
+ end
+
+ selected_addresses
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
index 3180289ec69..737852d5ccb 100644
--- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
+++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
@@ -4,7 +4,7 @@ module Gitlab
module Database
module LoadBalancing
class SidekiqServerMiddleware
- JobReplicaNotUpToDate = Class.new(StandardError)
+ JobReplicaNotUpToDate = Class.new(::Gitlab::SidekiqMiddleware::RetryError)
MINIMUM_DELAY_INTERVAL_SECONDS = 0.8
diff --git a/lib/gitlab/database/lock_writes_manager.rb b/lib/gitlab/database/lock_writes_manager.rb
index fe75cd763b4..2594ee04b35 100644
--- a/lib/gitlab/database/lock_writes_manager.rb
+++ b/lib/gitlab/database/lock_writes_manager.rb
@@ -5,6 +5,11 @@ module Gitlab
class LockWritesManager
TRIGGER_FUNCTION_NAME = 'gitlab_schema_prevent_write'
+ # Triggers to block INSERT / UPDATE / DELETE
+ # Triggers on TRUNCATE are not added to the information_schema.triggers
+ # See https://www.postgresql.org/message-id/16934.1568989957%40sss.pgh.pa.us
+ EXPECTED_TRIGGER_RECORD_COUNT = 3
+
def initialize(table_name:, connection:, database_name:, logger: nil, dry_run: false)
@table_name = table_name
@connection = connection
@@ -20,7 +25,7 @@ module Gitlab
AND trigger_name = '#{write_trigger_name(table_name)}'
SQL
- connection.select_value(query) == 3
+ connection.select_value(query) == EXPECTED_TRIGGER_RECORD_COUNT
end
def lock_writes
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index df40e3b3868..16416dd2507 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -6,6 +6,10 @@ module Gitlab
include Migrations::ReestablishedConnectionStack
include Migrations::BackgroundMigrationHelpers
include Migrations::BatchedBackgroundMigrationHelpers
+ include Migrations::LockRetriesHelpers
+ include Migrations::TimeoutHelpers
+ include Migrations::ConstraintsHelpers
+ include Migrations::ExtensionHelpers
include DynamicModelHelpers
include RenameTableHelpers
include AsyncIndexes::MigrationHelpers
@@ -22,8 +26,6 @@ module Gitlab
super(table_name, connection: connection, **kwargs)
end
- # https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
- MAX_IDENTIFIER_NAME_LENGTH = 63
DEFAULT_TIMESTAMP_COLUMNS = %i[created_at updated_at].freeze
# Adds `created_at` and `updated_at` columns with timezone information.
@@ -146,6 +148,12 @@ module Gitlab
'in the body of your migration class'
end
+ if !options.delete(:allow_partition) && partition?(table_name)
+ raise ArgumentError, 'add_concurrent_index can not be used on a partitioned ' \
+ 'table. Please use add_concurrent_partitioned_index on the partitioned table ' \
+ 'as we need to create indexes on each partition and an index on the parent table'
+ end
+
options = options.merge({ algorithm: :concurrently })
if index_exists?(table_name, column_name, **options)
@@ -202,6 +210,12 @@ module Gitlab
'in the body of your migration class'
end
+ if partition?(table_name)
+ raise ArgumentError, 'remove_concurrent_index can not be used on a partitioned ' \
+ 'table. Please use remove_concurrent_partitioned_index_by_name on the partitioned table ' \
+ 'as we need to remove the index on the parent table'
+ end
+
options = options.merge({ algorithm: :concurrently })
unless index_exists?(table_name, column_name, **options)
@@ -231,6 +245,12 @@ module Gitlab
'in the body of your migration class'
end
+ if partition?(table_name)
+ raise ArgumentError, 'remove_concurrent_index_by_name can not be used on a partitioned ' \
+ 'table. Please use remove_concurrent_partitioned_index_by_name on the partitioned table ' \
+ 'as we need to remove the index on the parent table'
+ end
+
index_name = index_name[:name] if index_name.is_a?(Hash)
raise 'remove_concurrent_index_by_name must get an index name as the second argument' if index_name.blank?
@@ -360,97 +380,6 @@ module Gitlab
"#{prefix}#{hashed_identifier}"
end
- # Long-running migrations may take more than the timeout allowed by
- # the database. Disable the session's statement timeout to ensure
- # migrations don't get killed prematurely.
- #
- # There are two possible ways to disable the statement timeout:
- #
- # - Per transaction (this is the preferred and default mode)
- # - Per connection (requires a cleanup after the execution)
- #
- # When using a per connection disable statement, code must be inside
- # a block so we can automatically execute `RESET statement_timeout` after block finishes
- # otherwise the statement will still be disabled until connection is dropped
- # or `RESET statement_timeout` is executed
- def disable_statement_timeout
- if block_given?
- if statement_timeout_disabled?
- # Don't do anything if the statement_timeout is already disabled
- # Allows for nested calls of disable_statement_timeout without
- # resetting the timeout too early (before the outer call ends)
- yield
- else
- begin
- execute('SET statement_timeout TO 0')
-
- yield
- ensure
- execute('RESET statement_timeout')
- end
- end
- else
- unless transaction_open?
- raise <<~ERROR
- Cannot call disable_statement_timeout() without a transaction open or outside of a transaction block.
- If you don't want to use a transaction wrap your code in a block call:
-
- disable_statement_timeout { # code that requires disabled statement here }
-
- This will make sure statement_timeout is disabled before and reset after the block execution is finished.
- ERROR
- end
-
- execute('SET LOCAL statement_timeout TO 0')
- end
- end
-
- # Executes the block with a retry mechanism that alters the +lock_timeout+ and +sleep_time+ between attempts.
- # The timings can be controlled via the +timing_configuration+ parameter.
- # If the lock was not acquired within the retry period, a last attempt is made without using +lock_timeout+.
- #
- # Note this helper uses subtransactions when run inside an already open transaction.
- #
- # ==== Examples
- # # Invoking without parameters
- # with_lock_retries do
- # drop_table :my_table
- # end
- #
- # # Invoking with custom +timing_configuration+
- # t = [
- # [1.second, 1.second],
- # [2.seconds, 2.seconds]
- # ]
- #
- # with_lock_retries(timing_configuration: t) do
- # drop_table :my_table # this will be retried twice
- # end
- #
- # # Disabling the retries using an environment variable
- # > export DISABLE_LOCK_RETRIES=true
- #
- # with_lock_retries do
- # drop_table :my_table # one invocation, it will not retry at all
- # end
- #
- # ==== Parameters
- # * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the block, sleep time before the next iteration, defaults to `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION`
- # * +logger+ - [Gitlab::JsonLogger]
- # * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES`
- def with_lock_retries(*args, **kwargs, &block)
- raise_on_exhaustion = !!kwargs.delete(:raise_on_exhaustion)
- merged_args = {
- connection: connection,
- klass: self.class,
- logger: Gitlab::BackgroundMigration::Logger,
- allow_savepoints: true
- }.merge(kwargs)
-
- Gitlab::Database::WithLockRetries.new(**merged_args)
- .run(raise_on_exhaustion: raise_on_exhaustion, &block)
- end
-
def true_value
Database.true_value
end
@@ -796,6 +725,10 @@ module Gitlab
install_rename_triggers(table, old, new)
end
+ def convert_to_type_column(column, from_type, to_type)
+ "#{column}_convert_#{from_type}_to_#{to_type}"
+ end
+
def convert_to_bigint_column(column)
"#{column}_convert_to_bigint"
end
@@ -826,7 +759,22 @@ module Gitlab
# columns - The name, or array of names, of the column(s) that we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id)
def initialize_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
- create_temporary_columns_and_triggers(table, columns, primary_key: primary_key, data_type: :bigint)
+ mappings = Array(columns).map do |c|
+ {
+ c => {
+ from_type: :int,
+ to_type: :bigint,
+ default_value: 0
+ }
+ }
+ end.reduce(&:merge)
+
+ create_temporary_columns_and_triggers(
+ table,
+ mappings,
+ primary_key: primary_key,
+ old_bigint_column_naming: true
+ )
end
# Reverts `initialize_conversion_of_integer_to_bigint`
@@ -849,9 +797,23 @@ module Gitlab
# table - The name of the database table containing the columns
# columns - The name, or array of names, of the column(s) that we have converted to bigint.
# primary_key - The name of the primary key column (most often :id)
-
def restore_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
- create_temporary_columns_and_triggers(table, columns, primary_key: primary_key, data_type: :int)
+ mappings = Array(columns).map do |c|
+ {
+ c => {
+ from_type: :bigint,
+ to_type: :int,
+ default_value: 0
+ }
+ }
+ end.reduce(&:merge)
+
+ create_temporary_columns_and_triggers(
+ table,
+ mappings,
+ primary_key: primary_key,
+ old_bigint_column_naming: true
+ )
end
# Backfills the new columns used in an integer-to-bigint conversion using background migrations.
@@ -947,43 +909,6 @@ module Gitlab
execute("DELETE FROM batched_background_migrations WHERE #{conditions}")
end
- def ensure_batched_background_migration_is_finished(job_class_name:, table_name:, column_name:, job_arguments:, finalize: true)
- Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode!
-
- Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information
- migration = Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration(
- Gitlab::Database.gitlab_schemas_for_connection(connection),
- job_class_name, table_name, column_name, job_arguments
- )
-
- configuration = {
- job_class_name: job_class_name,
- table_name: table_name,
- column_name: column_name,
- job_arguments: job_arguments
- }
-
- return Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}" if migration.nil?
-
- return if migration.finished?
-
- finalize_batched_background_migration(job_class_name: job_class_name, table_name: table_name, column_name: column_name, job_arguments: job_arguments) if finalize
-
- unless migration.reload.finished? # rubocop:disable Cop/ActiveRecordAssociationReload
- raise "Expected batched background migration for the given configuration to be marked as 'finished', " \
- "but it is '#{migration.status_name}':" \
- "\t#{configuration}" \
- "\n\n" \
- "Finalize it manually by running the following command in a `bash` or `sh` shell:" \
- "\n\n" \
- "\tsudo gitlab-rake gitlab:background_migrations:finalize[#{job_class_name},#{table_name},#{column_name},'#{job_arguments.to_json.gsub(',', '\,')}']" \
- "\n\n" \
- "For more information, check the documentation" \
- "\n\n" \
- "\thttps://docs.gitlab.com/ee/user/admin_area/monitoring/background_migrations.html#database-migrations-failing-because-of-batched-background-migration-not-finished"
- end
- end
-
# Returns an Array containing the indexes for the given column
def indexes_for(table, column)
column = column.to_s
@@ -1102,6 +1027,24 @@ module Gitlab
rescue ArgumentError
end
+ # Remove any instances of deprecated job classes lingering in queues.
+ #
+ # rubocop:disable Cop/SidekiqApiUsage
+ def sidekiq_remove_jobs(job_klass:)
+ Sidekiq::Queue.new(job_klass.queue).each do |job|
+ job.delete if job.klass == job_klass.to_s
+ end
+
+ Sidekiq::RetrySet.new.each do |retri|
+ retri.delete if retri.klass == job_klass.to_s
+ end
+
+ Sidekiq::ScheduledSet.new.each do |scheduled|
+ scheduled.delete if scheduled.klass == job_klass.to_s
+ end
+ end
+ # rubocop:enable Cop/SidekiqApiUsage
+
def sidekiq_queue_migrate(queue_from, to:)
while sidekiq_queue_length(queue_from) > 0
Sidekiq.redis do |conn|
@@ -1194,320 +1137,6 @@ into similar problems in the future (e.g. when new tables are created).
execute(sql)
end
- # Returns the name for a check constraint
- #
- # type:
- # - Any value, as long as it is unique
- # - Constraint names are unique per table in Postgres, and, additionally,
- # we can have multiple check constraints over a column
- # So we use the (table, column, type) triplet as a unique name
- # - e.g. we use 'max_length' when adding checks for text limits
- # or 'not_null' when adding a NOT NULL constraint
- #
- def check_constraint_name(table, column, type)
- identifier = "#{table}_#{column}_check_#{type}"
- # Check concurrent_foreign_key_name() for info on why we use a hash
- hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10)
-
- "check_#{hashed_identifier}"
- end
-
- def check_constraint_exists?(table, constraint_name)
- # Constraint names are unique per table in Postgres, not per schema
- # Two tables can have constraints with the same name, so we filter by
- # the table name in addition to using the constraint_name
- check_sql = <<~SQL
- SELECT COUNT(*)
- FROM pg_catalog.pg_constraint con
- INNER JOIN pg_catalog.pg_class rel
- ON rel.oid = con.conrelid
- INNER JOIN pg_catalog.pg_namespace nsp
- ON nsp.oid = con.connamespace
- WHERE con.contype = 'c'
- AND con.conname = #{connection.quote(constraint_name)}
- AND nsp.nspname = #{connection.quote(current_schema)}
- AND rel.relname = #{connection.quote(table)}
- SQL
-
- connection.select_value(check_sql) > 0
- end
-
- # Adds a check constraint to a table
- #
- # This method is the generic helper for adding any check constraint
- # More specialized helpers may use it (e.g. add_text_limit or add_not_null)
- #
- # This method only requires minimal locking:
- # - The constraint is added using NOT VALID
- # This allows us to add the check constraint without validating it
- # - The check will be enforced for new data (inserts) coming in
- # - If `validate: true` the constraint is also validated
- # Otherwise, validate_check_constraint() can be used at a later stage
- # - Check comments on add_concurrent_foreign_key for more info
- #
- # table - The table the constraint will be added to
- # check - The check clause to add
- # e.g. 'char_length(name) <= 5' or 'store IS NOT NULL'
- # constraint_name - The name of the check constraint (otherwise auto-generated)
- # Should be unique per table (not per column)
- # validate - Whether to validate the constraint in this call
- #
- def add_check_constraint(table, check, constraint_name, validate: true)
- # Transactions would result in ALTER TABLE locks being held for the
- # duration of the transaction, defeating the purpose of this method.
- validate_not_in_transaction!(:add_check_constraint)
-
- validate_check_constraint_name!(constraint_name)
-
- if check_constraint_exists?(table, constraint_name)
- warning_message = <<~MESSAGE
- Check constraint was not created because it exists already
- (this may be due to an aborted migration or similar)
- table: #{table}, check: #{check}, constraint name: #{constraint_name}
- MESSAGE
-
- Gitlab::AppLogger.warn warning_message
- else
- # Only add the constraint without validating it
- # Even though it is fast, ADD CONSTRAINT requires an EXCLUSIVE lock
- # Use with_lock_retries to make sure that this operation
- # will not timeout on tables accessed by many processes
- with_lock_retries do
- execute <<-EOF.strip_heredoc
- ALTER TABLE #{table}
- ADD CONSTRAINT #{constraint_name}
- CHECK ( #{check} )
- NOT VALID;
- EOF
- end
- end
-
- if validate
- validate_check_constraint(table, constraint_name)
- end
- end
-
- def validate_check_constraint(table, constraint_name)
- validate_check_constraint_name!(constraint_name)
-
- unless check_constraint_exists?(table, constraint_name)
- raise missing_schema_object_message(table, "check constraint", constraint_name)
- end
-
- disable_statement_timeout do
- # VALIDATE CONSTRAINT only requires a SHARE UPDATE EXCLUSIVE LOCK
- # It only conflicts with other validations and creating indexes
- execute("ALTER TABLE #{table} VALIDATE CONSTRAINT #{constraint_name};")
- end
- end
-
- def remove_check_constraint(table, constraint_name)
- # This is technically not necessary, but aligned with add_check_constraint
- # and allows us to continue use with_lock_retries here
- validate_not_in_transaction!(:remove_check_constraint)
-
- validate_check_constraint_name!(constraint_name)
-
- # DROP CONSTRAINT requires an EXCLUSIVE lock
- # Use with_lock_retries to make sure that this will not timeout
- with_lock_retries do
- execute <<-EOF.strip_heredoc
- ALTER TABLE #{table}
- DROP CONSTRAINT IF EXISTS #{constraint_name}
- EOF
- end
- end
-
- # Copies all check constraints for the old column to the new column.
- #
- # table - The table containing the columns.
- # old - The old column.
- # new - The new column.
- # schema - The schema the table is defined for
- # If it is not provided, then the current_schema is used
- def copy_check_constraints(table, old, new, schema: nil)
- if transaction_open?
- raise 'copy_check_constraints can not be run inside a transaction'
- end
-
- unless column_exists?(table, old)
- raise "Column #{old} does not exist on #{table}"
- end
-
- unless column_exists?(table, new)
- raise "Column #{new} does not exist on #{table}"
- end
-
- table_with_schema = schema.present? ? "#{schema}.#{table}" : table
-
- check_constraints_for(table, old, schema: schema).each do |check_c|
- validate = !(check_c["constraint_def"].end_with? "NOT VALID")
-
- # Normalize:
- # - Old constraint definitions:
- # '(char_length(entity_path) <= 5500)'
- # - Definitionss from pg_get_constraintdef(oid):
- # 'CHECK ((char_length(entity_path) <= 5500))'
- # - Definitions from pg_get_constraintdef(oid, pretty_bool):
- # 'CHECK (char_length(entity_path) <= 5500)'
- # - Not valid constraints: 'CHECK (...) NOT VALID'
- # to a single format that we can use:
- # '(char_length(entity_path) <= 5500)'
- check_definition = check_c["constraint_def"]
- .sub(/^\s*(CHECK)?\s*\({0,2}/, '(')
- .sub(/\){0,2}\s*(NOT VALID)?\s*$/, ')')
-
- constraint_name = begin
- if check_definition == "(#{old} IS NOT NULL)"
- not_null_constraint_name(table_with_schema, new)
- elsif check_definition.start_with? "(char_length(#{old}) <="
- text_limit_name(table_with_schema, new)
- else
- check_constraint_name(table_with_schema, new, 'copy_check_constraint')
- end
- end
-
- add_check_constraint(
- table_with_schema,
- check_definition.gsub(old.to_s, new.to_s),
- constraint_name,
- validate: validate
- )
- end
- end
-
- # Migration Helpers for adding limit to text columns
- def add_text_limit(table, column, limit, constraint_name: nil, validate: true)
- add_check_constraint(
- table,
- "char_length(#{column}) <= #{limit}",
- text_limit_name(table, column, name: constraint_name),
- validate: validate
- )
- end
-
- def validate_text_limit(table, column, constraint_name: nil)
- validate_check_constraint(table, text_limit_name(table, column, name: constraint_name))
- end
-
- def remove_text_limit(table, column, constraint_name: nil)
- remove_check_constraint(table, text_limit_name(table, column, name: constraint_name))
- end
-
- def check_text_limit_exists?(table, column, constraint_name: nil)
- check_constraint_exists?(table, text_limit_name(table, column, name: constraint_name))
- end
-
- # Migration Helpers for managing not null constraints
- def add_not_null_constraint(table, column, constraint_name: nil, validate: true)
- if column_is_nullable?(table, column)
- add_check_constraint(
- table,
- "#{column} IS NOT NULL",
- not_null_constraint_name(table, column, name: constraint_name),
- validate: validate
- )
- else
- warning_message = <<~MESSAGE
- NOT NULL check constraint was not created:
- column #{table}.#{column} is already defined as `NOT NULL`
- MESSAGE
-
- Gitlab::AppLogger.warn warning_message
- end
- end
-
- def validate_not_null_constraint(table, column, constraint_name: nil)
- validate_check_constraint(
- table,
- not_null_constraint_name(table, column, name: constraint_name)
- )
- end
-
- def remove_not_null_constraint(table, column, constraint_name: nil)
- remove_check_constraint(
- table,
- not_null_constraint_name(table, column, name: constraint_name)
- )
- end
-
- def check_not_null_constraint_exists?(table, column, constraint_name: nil)
- check_constraint_exists?(
- table,
- not_null_constraint_name(table, column, name: constraint_name)
- )
- end
-
- def create_extension(extension)
- execute('CREATE EXTENSION IF NOT EXISTS %s' % extension)
- rescue ActiveRecord::StatementInvalid => e
- dbname = ApplicationRecord.database.database_name
- user = ApplicationRecord.database.username
-
- warn(<<~MSG) if e.to_s =~ /permission denied/
- GitLab requires the PostgreSQL extension '#{extension}' installed in database '#{dbname}', but
- the database user is not allowed to install the extension.
-
- You can either install the extension manually using a database superuser:
-
- CREATE EXTENSION IF NOT EXISTS #{extension}
-
- Or, you can solve this by logging in to the GitLab
- database (#{dbname}) using a superuser and running:
-
- ALTER #{user} WITH SUPERUSER
-
- This query will grant the user superuser permissions, ensuring any database extensions
- can be installed through migrations.
-
- For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html.
- MSG
-
- raise
- end
-
- def drop_extension(extension)
- execute('DROP EXTENSION IF EXISTS %s' % extension)
- rescue ActiveRecord::StatementInvalid => e
- dbname = ApplicationRecord.database.database_name
- user = ApplicationRecord.database.username
-
- warn(<<~MSG) if e.to_s =~ /permission denied/
- This migration attempts to drop the PostgreSQL extension '#{extension}'
- installed in database '#{dbname}', but the database user is not allowed
- to drop the extension.
-
- You can either drop the extension manually using a database superuser:
-
- DROP EXTENSION IF EXISTS #{extension}
-
- Or, you can solve this by logging in to the GitLab
- database (#{dbname}) using a superuser and running:
-
- ALTER #{user} WITH SUPERUSER
-
- This query will grant the user superuser permissions, ensuring any database extensions
- can be dropped through migrations.
-
- For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html.
- MSG
-
- raise
- end
-
- def rename_constraint(table_name, old_name, new_name)
- execute <<~SQL
- ALTER TABLE #{quote_table_name(table_name)}
- RENAME CONSTRAINT #{quote_column_name(old_name)} TO #{quote_column_name(new_name)}
- SQL
- end
-
- def drop_constraint(table_name, constraint_name, cascade: false)
- execute <<~SQL
- ALTER TABLE #{quote_table_name(table_name)} DROP CONSTRAINT #{quote_column_name(constraint_name)} #{cascade_statement(cascade)}
- SQL
- end
-
def add_primary_key_using_index(table_name, pk_name, index_to_use)
execute <<~SQL
ALTER TABLE #{quote_table_name(table_name)} ADD CONSTRAINT #{quote_table_name(pk_name)} PRIMARY KEY USING INDEX #{quote_table_name(index_to_use)}
@@ -1536,17 +1165,20 @@ into similar problems in the future (e.g. when new tables are created).
SQL
end
- private
+ # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
+ def create_temporary_columns_and_triggers(table, mappings, primary_key: :id, old_bigint_column_naming: false)
+ raise ArgumentError, "No mappings for column conversion provided" if mappings.blank?
- def multiple_columns(columns, separator: ', ')
- Array.wrap(columns).join(separator)
- end
+ unless mappings.values.all? { |values| mapping_has_required_columns?(values) }
+ raise ArgumentError, "Some mappings don't have required keys provided"
+ end
- def cascade_statement(cascade)
- cascade ? 'CASCADE' : ''
- end
+ neutral_values_for_type = {
+ int: 0,
+ bigint: 0,
+ uuid: '00000000-0000-0000-0000-000000000000'
+ }
- def create_temporary_columns_and_triggers(table, columns, primary_key: :id, data_type: :bigint)
unless table_exists?(table)
raise "Table #{table} does not exist"
end
@@ -1555,7 +1187,7 @@ into similar problems in the future (e.g. when new tables are created).
raise "Column #{primary_key} does not exist on #{table}"
end
- columns = Array.wrap(columns)
+ columns = mappings.keys
columns.each do |column|
next if column_exists?(table, column)
@@ -1564,67 +1196,88 @@ into similar problems in the future (e.g. when new tables are created).
check_trigger_permissions!(table)
- conversions = columns.to_h { |column| [column, convert_to_bigint_column(column)] }
+ if old_bigint_column_naming
+ mappings.each do |column, params|
+ params.merge!(
+ temporary_column_name: convert_to_bigint_column(column)
+ )
+ end
+ else
+ mappings.each do |column, params|
+ params.merge!(
+ temporary_column_name: convert_to_type_column(column, params[:from_type], params[:to_type])
+ )
+ end
+ end
with_lock_retries do
- conversions.each do |(source_column, temporary_name)|
- column = column_for(table, source_column)
+ mappings.each do |(column_name, params)|
+ column = column_for(table, column_name)
+ temporary_name = params[:temporary_column_name]
+ data_type = params[:to_type]
+ default_value = params[:default_value]
if (column.name.to_s == primary_key.to_s) || !column.null
# If the column to be converted is either a PK or is defined as NOT NULL,
# set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
# That way, we skip the expensive validation step required to add
# a NOT NULL constraint at the end of the process
- add_column(table, temporary_name, data_type, default: column.default || 0, null: false)
+ add_column(
+ table,
+ temporary_name,
+ data_type,
+ default: column.default || default_value || neutral_values_for_type.fetch(data_type),
+ null: false
+ )
else
- add_column(table, temporary_name, data_type, default: column.default)
+ add_column(
+ table,
+ temporary_name,
+ data_type,
+ default: column.default
+ )
end
end
- install_rename_triggers(table, conversions.keys, conversions.values)
+ old_column_names = mappings.keys
+ temporary_column_names = mappings.values.map { |v| v[:temporary_column_name] }
+ install_rename_triggers(table, old_column_names, temporary_column_names)
end
end
+ # rubocop:enable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
- def validate_check_constraint_name!(constraint_name)
- if constraint_name.to_s.length > MAX_IDENTIFIER_NAME_LENGTH
- raise "The maximum allowed constraint name is #{MAX_IDENTIFIER_NAME_LENGTH} characters"
+ def partition?(table_name)
+ if view_exists?(:postgres_partitions)
+ Gitlab::Database::PostgresPartition.partition_exists?(table_name)
+ else
+ Gitlab::Database::PostgresPartition.legacy_partition_exists?(table_name)
end
end
- # Returns an ActiveRecord::Result containing the check constraints
- # defined for the given column.
- #
- # If the schema is not provided, then the current_schema is used
- def check_constraints_for(table, column, schema: nil)
- check_sql = <<~SQL
- SELECT
- ccu.table_schema as schema_name,
- ccu.table_name as table_name,
- ccu.column_name as column_name,
- con.conname as constraint_name,
- pg_get_constraintdef(con.oid) as constraint_def
- FROM pg_catalog.pg_constraint con
- INNER JOIN pg_catalog.pg_class rel
- ON rel.oid = con.conrelid
- INNER JOIN pg_catalog.pg_namespace nsp
- ON nsp.oid = con.connamespace
- INNER JOIN information_schema.constraint_column_usage ccu
- ON con.conname = ccu.constraint_name
- AND nsp.nspname = ccu.constraint_schema
- AND rel.relname = ccu.table_name
- WHERE nsp.nspname = #{connection.quote(schema.presence || current_schema)}
- AND rel.relname = #{connection.quote(table)}
- AND ccu.column_name = #{connection.quote(column)}
- AND con.contype = 'c'
- ORDER BY constraint_name
- SQL
+ private
+
+ def multiple_columns(columns, separator: ', ')
+ Array.wrap(columns).join(separator)
+ end
+
+ def cascade_statement(cascade)
+ cascade ? 'CASCADE' : ''
+ end
- connection.exec_query(check_sql)
+ def validate_check_constraint_name!(constraint_name)
+ if constraint_name.to_s.length > MAX_IDENTIFIER_NAME_LENGTH
+ raise "The maximum allowed constraint name is #{MAX_IDENTIFIER_NAME_LENGTH} characters"
+ end
end
- def statement_timeout_disabled?
- # This is a string of the form "100ms" or "0" when disabled
- connection.select_value('SHOW statement_timeout') == "0"
+ # mappings => {} where keys are column names and values are hashes with the following keys:
+ # from_type - from which type we're migrating
+ # to_type - to which type we're migrating
+ # default_value - custom default value, if not provided will be taken from neutral_values_for_type
+ def mapping_has_required_columns?(mapping)
+ %i[from_type to_type].map do |required_key|
+ mapping.has_key?(required_key)
+ end.all?
end
def column_is_nullable?(table, column)
@@ -1640,14 +1293,6 @@ into similar problems in the future (e.g. when new tables are created).
connection.select_value(check_sql) == 'YES'
end
- def text_limit_name(table, column, name: nil)
- name.presence || check_constraint_name(table, column, 'max_length')
- end
-
- def not_null_constraint_name(table, column, name: nil)
- name.presence || check_constraint_name(table, column, 'not_null')
- end
-
def missing_schema_object_message(table, type, name)
<<~MESSAGE
Could not find #{type} "#{name}" on table "#{table}" which was referenced during the migration.
@@ -1717,17 +1362,6 @@ into similar problems in the future (e.g. when new tables are created).
Must end with `_at`}
MESSAGE
end
-
- def validate_not_in_transaction!(method_name, modifier = nil)
- return unless transaction_open?
-
- raise <<~ERROR
- #{["`#{method_name}`", modifier].compact.join(' ')} cannot be run inside a transaction.
-
- You can disable transactions by calling `disable_ddl_transaction!` in the body of
- your migration class
- ERROR
- end
end
end
end
diff --git a/lib/gitlab/database/migration_helpers/v2.rb b/lib/gitlab/database/migration_helpers/v2.rb
index dd426962033..b5b8b58681c 100644
--- a/lib/gitlab/database/migration_helpers/v2.rb
+++ b/lib/gitlab/database/migration_helpers/v2.rb
@@ -205,8 +205,8 @@ module Gitlab
raise "Column #{old_column} does not exist on #{table}"
end
- if column.default
- raise "#{calling_operation} does not currently support columns with default values"
+ if column.default_function
+ raise "#{calling_operation} does not currently support columns with default functions"
end
unless column_exists?(table, batch_column_name)
@@ -269,17 +269,20 @@ module Gitlab
def create_insert_trigger(trigger_name, quoted_table, quoted_old_column, quoted_new_column)
function_name = function_name_for_trigger(trigger_name)
+ column = columns(quoted_table.delete('"').to_sym).find { |column| column.name == quoted_old_column.delete('"') }
+ quoted_default_value = connection.quote(column.default)
+
execute(<<~SQL)
CREATE OR REPLACE FUNCTION #{function_name}()
RETURNS trigger
LANGUAGE plpgsql
AS $$
BEGIN
- IF NEW.#{quoted_old_column} IS NULL AND NEW.#{quoted_new_column} IS NOT NULL THEN
+ IF NEW.#{quoted_old_column} IS NOT DISTINCT FROM #{quoted_default_value} AND NEW.#{quoted_new_column} IS DISTINCT FROM #{quoted_default_value} THEN
NEW.#{quoted_old_column} = NEW.#{quoted_new_column};
END IF;
- IF NEW.#{quoted_new_column} IS NULL AND NEW.#{quoted_old_column} IS NOT NULL THEN
+ IF NEW.#{quoted_new_column} IS NOT DISTINCT FROM #{quoted_default_value} AND NEW.#{quoted_old_column} IS DISTINCT FROM #{quoted_default_value} THEN
NEW.#{quoted_new_column} = NEW.#{quoted_old_column};
END IF;
diff --git a/lib/gitlab/database/migrations/batched_background_migration_helpers.rb b/lib/gitlab/database/migrations/batched_background_migration_helpers.rb
index 363fd0598f9..e958ce2aba4 100644
--- a/lib/gitlab/database/migrations/batched_background_migration_helpers.rb
+++ b/lib/gitlab/database/migrations/batched_background_migration_helpers.rb
@@ -196,6 +196,43 @@ module Gitlab
:gitlab_main
end
end
+
+ def ensure_batched_background_migration_is_finished(job_class_name:, table_name:, column_name:, job_arguments:, finalize: true)
+ Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode!
+
+ Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information
+ migration = Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration(
+ Gitlab::Database.gitlab_schemas_for_connection(connection),
+ job_class_name, table_name, column_name, job_arguments
+ )
+
+ configuration = {
+ job_class_name: job_class_name,
+ table_name: table_name,
+ column_name: column_name,
+ job_arguments: job_arguments
+ }
+
+ return Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}" if migration.nil?
+
+ return if migration.finished?
+
+ finalize_batched_background_migration(job_class_name: job_class_name, table_name: table_name, column_name: column_name, job_arguments: job_arguments) if finalize
+
+ return if migration.reload.finished? # rubocop:disable Cop/ActiveRecordAssociationReload
+
+ raise "Expected batched background migration for the given configuration to be marked as 'finished', " \
+ "but it is '#{migration.status_name}':" \
+ "\t#{configuration}" \
+ "\n\n" \
+ "Finalize it manually by running the following command in a `bash` or `sh` shell:" \
+ "\n\n" \
+ "\tsudo gitlab-rake gitlab:background_migrations:finalize[#{job_class_name},#{table_name},#{column_name},'#{job_arguments.to_json.gsub(',', '\,')}']" \
+ "\n\n" \
+ "For more information, check the documentation" \
+ "\n\n" \
+ "\thttps://docs.gitlab.com/ee/user/admin_area/monitoring/background_migrations.html#database-migrations-failing-because-of-batched-background-migration-not-finished"
+ end
end
end
end
diff --git a/lib/gitlab/database/migrations/constraints_helpers.rb b/lib/gitlab/database/migrations/constraints_helpers.rb
new file mode 100644
index 00000000000..7b849e3137a
--- /dev/null
+++ b/lib/gitlab/database/migrations/constraints_helpers.rb
@@ -0,0 +1,337 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Migrations
+ module ConstraintsHelpers
+ include LockRetriesHelpers
+ include TimeoutHelpers
+
+ # https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
+ MAX_IDENTIFIER_NAME_LENGTH = 63
+
+ # Returns the name for a check constraint
+ #
+ # type:
+ # - Any value, as long as it is unique
+ # - Constraint names are unique per table in Postgres, and, additionally,
+ # we can have multiple check constraints over a column
+ # So we use the (table, column, type) triplet as a unique name
+ # - e.g. we use 'max_length' when adding checks for text limits
+ # or 'not_null' when adding a NOT NULL constraint
+ #
+ def check_constraint_name(table, column, type)
+ identifier = "#{table}_#{column}_check_#{type}"
+ # Check concurrent_foreign_key_name() for info on why we use a hash
+ hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10)
+
+ "check_#{hashed_identifier}"
+ end
+
+ def check_constraint_exists?(table, constraint_name)
+ # Constraint names are unique per table in Postgres, not per schema
+ # Two tables can have constraints with the same name, so we filter by
+ # the table name in addition to using the constraint_name
+
+ check_sql = <<~SQL
+ SELECT COUNT(*)
+ FROM pg_catalog.pg_constraint con
+ INNER JOIN pg_catalog.pg_class rel
+ ON rel.oid = con.conrelid
+ INNER JOIN pg_catalog.pg_namespace nsp
+ ON nsp.oid = con.connamespace
+ WHERE con.contype = 'c'
+ AND con.conname = #{connection.quote(constraint_name)}
+ AND nsp.nspname = #{connection.quote(current_schema)}
+ AND rel.relname = #{connection.quote(table)}
+ SQL
+
+ connection.select_value(check_sql) > 0
+ end
+
+ # Adds a check constraint to a table
+ #
+ # This method is the generic helper for adding any check constraint
+ # More specialized helpers may use it (e.g. add_text_limit or add_not_null)
+ #
+ # This method only requires minimal locking:
+ # - The constraint is added using NOT VALID
+ # This allows us to add the check constraint without validating it
+ # - The check will be enforced for new data (inserts) coming in
+ # - If `validate: true` the constraint is also validated
+ # Otherwise, validate_check_constraint() can be used at a later stage
+ # - Check comments on add_concurrent_foreign_key for more info
+ #
+ # table - The table the constraint will be added to
+ # check - The check clause to add
+ # e.g. 'char_length(name) <= 5' or 'store IS NOT NULL'
+ # constraint_name - The name of the check constraint (otherwise auto-generated)
+ # Should be unique per table (not per column)
+ # validate - Whether to validate the constraint in this call
+ #
+ def add_check_constraint(table, check, constraint_name, validate: true)
+ # Transactions would result in ALTER TABLE locks being held for the
+ # duration of the transaction, defeating the purpose of this method.
+ validate_not_in_transaction!(:add_check_constraint)
+
+ validate_check_constraint_name!(constraint_name)
+
+ if check_constraint_exists?(table, constraint_name)
+ warning_message = <<~MESSAGE
+ Check constraint was not created because it exists already
+ (this may be due to an aborted migration or similar)
+ table: #{table}, check: #{check}, constraint name: #{constraint_name}
+ MESSAGE
+
+ Gitlab::AppLogger.warn warning_message
+ else
+ # Only add the constraint without validating it
+ # Even though it is fast, ADD CONSTRAINT requires an EXCLUSIVE lock
+ # Use with_lock_retries to make sure that this operation
+ # will not timeout on tables accessed by many processes
+ with_lock_retries do
+ execute <<~SQL
+ ALTER TABLE #{table}
+ ADD CONSTRAINT #{constraint_name}
+ CHECK ( #{check} )
+ NOT VALID;
+ SQL
+ end
+ end
+
+ validate_check_constraint(table, constraint_name) if validate
+ end
+
+ def validate_check_constraint(table, constraint_name)
+ validate_check_constraint_name!(constraint_name)
+
+ unless check_constraint_exists?(table, constraint_name)
+ raise missing_schema_object_message(table, "check constraint", constraint_name)
+ end
+
+ disable_statement_timeout do
+ # VALIDATE CONSTRAINT only requires a SHARE UPDATE EXCLUSIVE LOCK
+ # It only conflicts with other validations and creating indexes
+ execute("ALTER TABLE #{table} VALIDATE CONSTRAINT #{constraint_name};")
+ end
+ end
+
+ def remove_check_constraint(table, constraint_name)
+ # This is technically not necessary, but aligned with add_check_constraint
+ # and allows us to continue use with_lock_retries here
+ validate_not_in_transaction!(:remove_check_constraint)
+
+ validate_check_constraint_name!(constraint_name)
+
+ # DROP CONSTRAINT requires an EXCLUSIVE lock
+ # Use with_lock_retries to make sure that this will not timeout
+ with_lock_retries do
+ execute <<-SQL
+ ALTER TABLE #{table}
+ DROP CONSTRAINT IF EXISTS #{constraint_name}
+ SQL
+ end
+ end
+
+ # Copies all check constraints for the old column to the new column.
+ #
+ # table - The table containing the columns.
+ # old - The old column.
+ # new - The new column.
+ # schema - The schema the table is defined for
+ # If it is not provided, then the current_schema is used
+ def copy_check_constraints(table, old, new, schema: nil)
+ raise 'copy_check_constraints can not be run inside a transaction' if transaction_open?
+
+ raise "Column #{old} does not exist on #{table}" unless column_exists?(table, old)
+
+ raise "Column #{new} does not exist on #{table}" unless column_exists?(table, new)
+
+ table_with_schema = schema.present? ? "#{schema}.#{table}" : table
+
+ check_constraints_for(table, old, schema: schema).each do |check_c|
+ validate = !(check_c["constraint_def"].end_with? "NOT VALID")
+
+ # Normalize:
+ # - Old constraint definitions:
+ # '(char_length(entity_path) <= 5500)'
+ # - Definitionss from pg_get_constraintdef(oid):
+ # 'CHECK ((char_length(entity_path) <= 5500))'
+ # - Definitions from pg_get_constraintdef(oid, pretty_bool):
+ # 'CHECK (char_length(entity_path) <= 5500)'
+ # - Not valid constraints: 'CHECK (...) NOT VALID'
+ # to a single format that we can use:
+ # '(char_length(entity_path) <= 5500)'
+ check_definition = check_c["constraint_def"]
+ .sub(/^\s*(CHECK)?\s*\({0,2}/, '(')
+ .sub(/\){0,2}\s*(NOT VALID)?\s*$/, ')')
+
+ constraint_name = if check_definition == "(#{old} IS NOT NULL)"
+ not_null_constraint_name(table_with_schema, new)
+ elsif check_definition.start_with? "(char_length(#{old}) <="
+ text_limit_name(table_with_schema, new)
+ else
+ check_constraint_name(table_with_schema, new, 'copy_check_constraint')
+ end
+
+ add_check_constraint(
+ table_with_schema,
+ check_definition.gsub(old.to_s, new.to_s),
+ constraint_name,
+ validate: validate
+ )
+ end
+ end
+
+ # Migration Helpers for adding limit to text columns
+ def add_text_limit(table, column, limit, constraint_name: nil, validate: true)
+ add_check_constraint(
+ table,
+ "char_length(#{column}) <= #{limit}",
+ text_limit_name(table, column, name: constraint_name),
+ validate: validate
+ )
+ end
+
+ def validate_text_limit(table, column, constraint_name: nil)
+ validate_check_constraint(table, text_limit_name(table, column, name: constraint_name))
+ end
+
+ def remove_text_limit(table, column, constraint_name: nil)
+ remove_check_constraint(table, text_limit_name(table, column, name: constraint_name))
+ end
+
+ def check_text_limit_exists?(table, column, constraint_name: nil)
+ check_constraint_exists?(table, text_limit_name(table, column, name: constraint_name))
+ end
+
+ # Migration Helpers for managing not null constraints
+ def add_not_null_constraint(table, column, constraint_name: nil, validate: true)
+ if column_is_nullable?(table, column)
+ add_check_constraint(
+ table,
+ "#{column} IS NOT NULL",
+ not_null_constraint_name(table, column, name: constraint_name),
+ validate: validate
+ )
+ else
+ warning_message = <<~MESSAGE
+ NOT NULL check constraint was not created:
+ column #{table}.#{column} is already defined as `NOT NULL`
+ MESSAGE
+
+ Gitlab::AppLogger.warn warning_message
+ end
+ end
+
+ def validate_not_null_constraint(table, column, constraint_name: nil)
+ validate_check_constraint(
+ table,
+ not_null_constraint_name(table, column, name: constraint_name)
+ )
+ end
+
+ def remove_not_null_constraint(table, column, constraint_name: nil)
+ remove_check_constraint(
+ table,
+ not_null_constraint_name(table, column, name: constraint_name)
+ )
+ end
+
+ def check_not_null_constraint_exists?(table, column, constraint_name: nil)
+ check_constraint_exists?(
+ table,
+ not_null_constraint_name(table, column, name: constraint_name)
+ )
+ end
+
+ def rename_constraint(table_name, old_name, new_name)
+ execute <<~SQL
+ ALTER TABLE #{quote_table_name(table_name)}
+ RENAME CONSTRAINT #{quote_column_name(old_name)} TO #{quote_column_name(new_name)}
+ SQL
+ end
+
+ def drop_constraint(table_name, constraint_name, cascade: false)
+ execute <<~SQL
+ ALTER TABLE #{quote_table_name(table_name)} DROP CONSTRAINT #{quote_column_name(constraint_name)} #{cascade_statement(cascade)}
+ SQL
+ end
+
+ def validate_check_constraint_name!(constraint_name)
+ return unless constraint_name.to_s.length > MAX_IDENTIFIER_NAME_LENGTH
+
+ raise "The maximum allowed constraint name is #{MAX_IDENTIFIER_NAME_LENGTH} characters"
+ end
+
+ def text_limit_name(table, column, name: nil)
+ name.presence || check_constraint_name(table, column, 'max_length')
+ end
+
+ private
+
+ def validate_not_in_transaction!(method_name, modifier = nil)
+ return unless transaction_open?
+
+ raise <<~ERROR
+ #{["`#{method_name}`", modifier].compact.join(' ')} cannot be run inside a transaction.
+
+ You can disable transactions by calling `disable_ddl_transaction!` in the body of
+ your migration class
+ ERROR
+ end
+
+ # Returns an ActiveRecord::Result containing the check constraints
+ # defined for the given column.
+ #
+ # If the schema is not provided, then the current_schema is used
+ def check_constraints_for(table, column, schema: nil)
+ check_sql = <<~SQL
+ SELECT
+ ccu.table_schema as schema_name,
+ ccu.table_name as table_name,
+ ccu.column_name as column_name,
+ con.conname as constraint_name,
+ pg_get_constraintdef(con.oid) as constraint_def
+ FROM pg_catalog.pg_constraint con
+ INNER JOIN pg_catalog.pg_class rel
+ ON rel.oid = con.conrelid
+ INNER JOIN pg_catalog.pg_namespace nsp
+ ON nsp.oid = con.connamespace
+ INNER JOIN information_schema.constraint_column_usage ccu
+ ON con.conname = ccu.constraint_name
+ AND nsp.nspname = ccu.constraint_schema
+ AND rel.relname = ccu.table_name
+ WHERE nsp.nspname = #{connection.quote(schema.presence || current_schema)}
+ AND rel.relname = #{connection.quote(table)}
+ AND ccu.column_name = #{connection.quote(column)}
+ AND con.contype = 'c'
+ ORDER BY constraint_name
+ SQL
+
+ connection.exec_query(check_sql)
+ end
+
+ def cascade_statement(cascade)
+ cascade ? 'CASCADE' : ''
+ end
+
+ def not_null_constraint_name(table, column, name: nil)
+ name.presence || check_constraint_name(table, column, 'not_null')
+ end
+
+ def missing_schema_object_message(table, type, name)
+ <<~MESSAGE
+ Could not find #{type} "#{name}" on table "#{table}" which was referenced during the migration.
+ This issue could be caused by the database schema straying from the expected state.
+
+ To resolve this issue, please verify:
+ 1. all previous migrations have completed
+ 2. the database objects used in this migration match the Rails definition in schema.rb or structure.sql
+
+ MESSAGE
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migrations/extension_helpers.rb b/lib/gitlab/database/migrations/extension_helpers.rb
new file mode 100644
index 00000000000..435e9e0d2dc
--- /dev/null
+++ b/lib/gitlab/database/migrations/extension_helpers.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Migrations
+ module ExtensionHelpers
+ def create_extension(extension)
+ execute("CREATE EXTENSION IF NOT EXISTS #{extension}")
+ rescue ActiveRecord::StatementInvalid => e
+ dbname = ApplicationRecord.database.database_name
+ user = ApplicationRecord.database.username
+
+ warn(<<~MSG) if e.to_s.include?('permission denied')
+ GitLab requires the PostgreSQL extension '#{extension}' installed in database '#{dbname}', but
+ the database user is not allowed to install the extension.
+
+ You can either install the extension manually using a database superuser:
+
+ CREATE EXTENSION IF NOT EXISTS #{extension}
+
+ Or, you can solve this by logging in to the GitLab
+ database (#{dbname}) using a superuser and running:
+
+ ALTER #{user} WITH SUPERUSER
+
+ This query will grant the user superuser permissions, ensuring any database extensions
+ can be installed through migrations.
+
+ For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html.
+ MSG
+
+ raise
+ end
+
+ def drop_extension(extension)
+ execute("DROP EXTENSION IF EXISTS #{extension}")
+ rescue ActiveRecord::StatementInvalid => e
+ dbname = ApplicationRecord.database.database_name
+ user = ApplicationRecord.database.username
+
+ warn(<<~MSG) if e.to_s.include?('permission denied')
+ This migration attempts to drop the PostgreSQL extension '#{extension}'
+ installed in database '#{dbname}', but the database user is not allowed
+ to drop the extension.
+
+ You can either drop the extension manually using a database superuser:
+
+ DROP EXTENSION IF EXISTS #{extension}
+
+ Or, you can solve this by logging in to the GitLab
+ database (#{dbname}) using a superuser and running:
+
+ ALTER #{user} WITH SUPERUSER
+
+ This query will grant the user superuser permissions, ensuring any database extensions
+ can be dropped through migrations.
+
+ For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html.
+ MSG
+
+ raise
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migrations/lock_retries_helpers.rb b/lib/gitlab/database/migrations/lock_retries_helpers.rb
new file mode 100644
index 00000000000..137ef3ab144
--- /dev/null
+++ b/lib/gitlab/database/migrations/lock_retries_helpers.rb
@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Migrations
+ module LockRetriesHelpers
+ # Executes the block with a retry mechanism that alters the +lock_timeout+ and +sleep_time+ between attempts.
+ # The timings can be controlled via the +timing_configuration+ parameter.
+ # If the lock was not acquired within the retry period, a last attempt is made without using +lock_timeout+.
+ #
+ # Note this helper uses subtransactions when run inside an already open transaction.
+ #
+ # ==== Examples
+ # # Invoking without parameters
+ # with_lock_retries do
+ # drop_table :my_table
+ # end
+ #
+ # # Invoking with custom +timing_configuration+
+ # t = [
+ # [1.second, 1.second],
+ # [2.seconds, 2.seconds]
+ # ]
+ #
+ # with_lock_retries(timing_configuration: t) do
+ # drop_table :my_table # this will be retried twice
+ # end
+ #
+ # # Disabling the retries using an environment variable
+ # > export DISABLE_LOCK_RETRIES=true
+ #
+ # with_lock_retries do
+ # drop_table :my_table # one invocation, it will not retry at all
+ # end
+ #
+ # ==== Parameters
+ # * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the
+ # block, sleep time before the next iteration, defaults to
+ # `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION`
+ # * +logger+ - [Gitlab::JsonLogger]
+ # * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES`
+ def with_lock_retries(*args, **kwargs, &block)
+ raise_on_exhaustion = !!kwargs.delete(:raise_on_exhaustion)
+ merged_args = {
+ connection: connection,
+ klass: self.class,
+ logger: Gitlab::BackgroundMigration::Logger,
+ allow_savepoints: true
+ }.merge(kwargs)
+
+ Gitlab::Database::WithLockRetries.new(**merged_args)
+ .run(raise_on_exhaustion: raise_on_exhaustion, &block)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migrations/runner.rb b/lib/gitlab/database/migrations/runner.rb
index 85dc6051c7c..27b161419b2 100644
--- a/lib/gitlab/database/migrations/runner.rb
+++ b/lib/gitlab/database/migrations/runner.rb
@@ -7,6 +7,7 @@ module Gitlab
BASE_RESULT_DIR = Rails.root.join('tmp', 'migration-testing').freeze
METADATA_FILENAME = 'metadata.json'
SCHEMA_VERSION = 4 # Version of the output format produced by the runner
+ POST_MIGRATION_MATCHER = %r{db/post_migrate/}.freeze
class << self
def up(database:, legacy_mode: false)
@@ -116,7 +117,10 @@ module Gitlab
verbose_was = ActiveRecord::Migration.verbose
ActiveRecord::Migration.verbose = true
- sorted_migrations = migrations.sort_by(&:version)
+ sorted_migrations = migrations.sort_by do |m|
+ [m.filename.match?(POST_MIGRATION_MATCHER) ? 1 : 0, m.version]
+ end
+
sorted_migrations.reverse! if direction == :down
instrumentation = Instrumentation.new(result_dir: result_dir)
diff --git a/lib/gitlab/database/migrations/timeout_helpers.rb b/lib/gitlab/database/migrations/timeout_helpers.rb
new file mode 100644
index 00000000000..423c77452b1
--- /dev/null
+++ b/lib/gitlab/database/migrations/timeout_helpers.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Migrations
+ module TimeoutHelpers
+ # Long-running migrations may take more than the timeout allowed by
+ # the database. Disable the session's statement timeout to ensure
+ # migrations don't get killed prematurely.
+ #
+ # There are two possible ways to disable the statement timeout:
+ #
+ # - Per transaction (this is the preferred and default mode)
+ # - Per connection (requires a cleanup after the execution)
+ #
+ # When using a per connection disable statement, code must be inside
+ # a block so we can automatically execute `RESET statement_timeout` after block finishes
+ # otherwise the statement will still be disabled until connection is dropped
+ # or `RESET statement_timeout` is executed
+ def disable_statement_timeout
+ if block_given?
+ if statement_timeout_disabled?
+ # Don't do anything if the statement_timeout is already disabled
+ # Allows for nested calls of disable_statement_timeout without
+ # resetting the timeout too early (before the outer call ends)
+ yield
+ else
+ begin
+ execute('SET statement_timeout TO 0')
+
+ yield
+ ensure
+ execute('RESET statement_timeout')
+ end
+ end
+ else
+ unless transaction_open?
+ raise <<~ERROR
+ Cannot call disable_statement_timeout() without a transaction open or outside of a transaction block.
+ If you don't want to use a transaction wrap your code in a block call:
+
+ disable_statement_timeout { # code that requires disabled statement here }
+
+ This will make sure statement_timeout is disabled before and reset after the block execution is finished.
+ ERROR
+ end
+
+ execute('SET LOCAL statement_timeout TO 0')
+ end
+ end
+
+ private
+
+ def statement_timeout_disabled?
+ # This is a string of the form "100ms" or "0" when disabled
+ connection.select_value('SHOW statement_timeout') == "0"
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb b/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb
index 23a8dc0b44f..58447481e60 100644
--- a/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb
+++ b/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb
@@ -10,13 +10,17 @@ module Gitlab
attr_reader :partitioning_column, :table_name, :parent_table_name, :zero_partition_value
- def initialize(migration_context:, table_name:, parent_table_name:, partitioning_column:, zero_partition_value:)
+ def initialize(
+ migration_context:, table_name:, parent_table_name:, partitioning_column:,
+ zero_partition_value:, lock_tables: [])
+
@migration_context = migration_context
@connection = migration_context.connection
@table_name = table_name
@parent_table_name = parent_table_name
@partitioning_column = partitioning_column
@zero_partition_value = zero_partition_value
+ @lock_tables = Array.wrap(lock_tables)
end
def prepare_for_partitioning
@@ -35,7 +39,12 @@ module Gitlab
create_parent_table
attach_foreign_keys_to_parent
- migration_context.with_lock_retries(raise_on_exhaustion: true) do
+ lock_args = {
+ raise_on_exhaustion: true,
+ timing_configuration: lock_timing_configuration
+ }
+
+ migration_context.with_lock_retries(**lock_args) do
migration_context.execute(sql_to_convert_table)
end
end
@@ -74,6 +83,7 @@ module Gitlab
# but they acquire the same locks so it's much faster to incude them
# here.
[
+ lock_tables_statement,
attach_table_to_parent_statement,
alter_sequence_statements(old_table: table_name, new_table: parent_table_name),
remove_constraint_statement
@@ -162,6 +172,16 @@ module Gitlab
end
end
+ def lock_tables_statement
+ return if @lock_tables.empty?
+
+ table_names = @lock_tables.map { |name| quote_table_name(name) }.join(', ')
+
+ <<~SQL
+ LOCK #{table_names} IN ACCESS EXCLUSIVE MODE
+ SQL
+ end
+
def attach_table_to_parent_statement
<<~SQL
ALTER TABLE #{quote_table_name(parent_table_name)}
@@ -235,6 +255,13 @@ module Gitlab
ALTER TABLE #{connection.quote_table_name(table_name)} OWNER TO CURRENT_USER
SQL
end
+
+ def lock_timing_configuration
+ iterations = Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION
+ aggressive_iterations = Array.new(5) { [10.seconds, 1.minute] }
+
+ iterations + aggressive_iterations
+ end
end
end
end
diff --git a/lib/gitlab/database/partitioning/detached_partition_dropper.rb b/lib/gitlab/database/partitioning/detached_partition_dropper.rb
index 5e32ecad4ca..58c0728b614 100644
--- a/lib/gitlab/database/partitioning/detached_partition_dropper.rb
+++ b/lib/gitlab/database/partitioning/detached_partition_dropper.rb
@@ -7,7 +7,7 @@ module Gitlab
Gitlab::AppLogger.info(message: "Checking for previously detached partitions to drop")
Postgresql::DetachedPartition.ready_to_drop.find_each do |detached_partition|
- if partition_attached?(qualify_partition_name(detached_partition.table_name))
+ if partition_attached?(detached_partition.fully_qualified_table_name)
unmark_partition(detached_partition)
else
drop_partition(detached_partition)
@@ -41,14 +41,14 @@ module Gitlab
# Another process may have already dropped the table and deleted this entry
next unless try_lock_detached_partition(detached_partition.id)
- drop_detached_partition(detached_partition.table_name)
+ drop_detached_partition(detached_partition)
detached_partition.destroy!
end
end
def remove_foreign_keys(detached_partition)
- partition_identifier = qualify_partition_name(detached_partition.table_name)
+ partition_identifier = detached_partition.fully_qualified_table_name
# We want to load all of these into memory at once to get a consistent view to loop over,
# since we'll be deleting from this list as we go
@@ -65,7 +65,7 @@ module Gitlab
# It is important to only drop one foreign key per transaction.
# Dropping a foreign key takes an ACCESS EXCLUSIVE lock on both tables participating in the foreign key.
- partition_identifier = qualify_partition_name(detached_partition.table_name)
+ partition_identifier = detached_partition.fully_qualified_table_name
with_lock_retries do
connection.transaction(requires_new: false) do
next unless try_lock_detached_partition(detached_partition.id)
@@ -83,16 +83,10 @@ module Gitlab
end
end
- def drop_detached_partition(partition_name)
- partition_identifier = qualify_partition_name(partition_name)
+ def drop_detached_partition(detached_partition)
+ connection.drop_table(detached_partition.fully_qualified_table_name, if_exists: true)
- connection.drop_table(partition_identifier, if_exists: true)
-
- Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: partition_name)
- end
-
- def qualify_partition_name(table_name)
- "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{table_name}"
+ Gitlab::AppLogger.info(message: "Dropped previously detached partition", partition_name: detached_partition.table_name)
end
def partition_attached?(partition_identifier)
diff --git a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
index 15b542cf089..62f33bb56bc 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
@@ -7,6 +7,8 @@ module Gitlab
include Gitlab::Database::MigrationHelpers
include Gitlab::Database::SchemaHelpers
+ DuplicatedIndexesError = Class.new(StandardError)
+
ERROR_SCOPE = 'index'
# Concurrently creates a new index on a partitioned table. In concept this works similarly to
@@ -38,7 +40,7 @@ module Gitlab
partitioned_table.postgres_partitions.order(:name).each do |partition|
partition_index_name = generated_index_name(partition.identifier, options[:name])
- partition_options = options.merge(name: partition_index_name)
+ partition_options = options.merge(name: partition_index_name, allow_partition: true)
add_concurrent_index(partition.identifier, column_names, partition_options)
end
@@ -92,6 +94,42 @@ module Gitlab
.map { |_, indexes| indexes.map { |index| index['index_name'] } }
end
+ # Retrieves a hash of index names for a given table and schema, by index
+ # definition.
+ #
+ # Example:
+ #
+ # indexes_by_definition_for_table('table_name_goes_here')
+ #
+ # Returns:
+ #
+ # {
+ # "CREATE _ btree (created_at)" => "index_on_created_at"
+ # }
+ def indexes_by_definition_for_table(table_name, schema_name: connection.current_schema)
+ duplicate_indexes = find_duplicate_indexes(table_name, schema_name: schema_name)
+
+ unless duplicate_indexes.empty?
+ raise DuplicatedIndexesError, "#{table_name} has duplicate indexes: #{duplicate_indexes}"
+ end
+
+ find_indexes(table_name, schema_name: schema_name)
+ .each_with_object({}) { |row, hash| hash[row['index_id']] = row['index_name'] }
+ end
+
+ # Renames indexes for a given table and schema, mapping by index
+ # definition, to a hash of new index names.
+ #
+ # Example:
+ #
+ # index_names = indexes_by_definition_for_table('source_table_name_goes_here')
+ # drop_table('source_table_name_goes_here')
+ # rename_indexes_for_table('destination_table_name_goes_here', index_names)
+ def rename_indexes_for_table(table_name, new_index_names, schema_name: connection.current_schema)
+ current_index_names = indexes_by_definition_for_table(table_name, schema_name: schema_name)
+ rename_indexes(current_index_names, new_index_names, schema_name: schema_name)
+ end
+
private
def find_indexes(table_name, schema_name: connection.current_schema)
@@ -124,6 +162,18 @@ module Gitlab
def generated_index_name(partition_name, index_name)
object_name("#{partition_name}_#{index_name}", 'index')
end
+
+ def rename_indexes(from, to, schema_name: connection.current_schema)
+ indexes_to_rename = from.select { |index_id, _| to.has_key?(index_id) }
+ statements = indexes_to_rename.map do |index_id, index_name|
+ <<~SQL
+ ALTER INDEX #{connection.quote_table_name("#{schema_name}.#{connection.quote_column_name(index_name)}")}
+ RENAME TO #{connection.quote_column_name(to[index_id])}
+ SQL
+ end
+
+ connection.execute(statements.join(';'))
+ end
end
end
end
diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
index 695a5d7ec77..f9790bf53b9 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
@@ -275,7 +275,7 @@ module Gitlab
).revert_preparation_for_partitioning
end
- def convert_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:)
+ def convert_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:, lock_tables: [])
validate_not_in_transaction!(:convert_table_to_first_list_partition)
Gitlab::Database::Partitioning::ConvertTableToFirstListPartition
@@ -283,7 +283,8 @@ module Gitlab
table_name: table_name,
parent_table_name: parent_table_name,
partitioning_column: partitioning_column,
- zero_partition_value: initial_partitioning_value
+ zero_partition_value: initial_partitioning_value,
+ lock_tables: lock_tables
).partition
end
diff --git a/lib/gitlab/database/postgres_partition.rb b/lib/gitlab/database/postgres_partition.rb
index eb080904f73..eda11fd8382 100644
--- a/lib/gitlab/database/postgres_partition.rb
+++ b/lib/gitlab/database/postgres_partition.rb
@@ -19,6 +19,20 @@ module Gitlab
scope :for_parent_table, ->(name) { where("parent_identifier = concat(current_schema(), '.', ?)", name).order(:name) }
+ def self.partition_exists?(table_name)
+ where("identifier = concat(current_schema(), '.', ?)", table_name).exists?
+ end
+
+ def self.legacy_partition_exists?(table_name)
+ result = connection.select_value(<<~SQL)
+ SELECT true FROM pg_class
+ WHERE relname = '#{table_name}'
+ AND relispartition = true;
+ SQL
+
+ !!result
+ end
+
def to_s
name
end
diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb
index 6f64d04270f..1280789b30c 100644
--- a/lib/gitlab/database/query_analyzer.rb
+++ b/lib/gitlab/database/query_analyzer.rb
@@ -86,7 +86,11 @@ module Gitlab
analyzers.each do |analyzer|
next if analyzer.suppressed? && !analyzer.requires_tracking?(parsed)
- analyzer.analyze(parsed)
+ if analyzer.raw?
+ analyzer.analyze(sql)
+ else
+ analyzer.analyze(parsed)
+ end
rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e
# We catch all standard errors to prevent validation errors to introduce fatal errors in production
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
diff --git a/lib/gitlab/database/query_analyzers/base.rb b/lib/gitlab/database/query_analyzers/base.rb
index 9a52a4f6e23..9c2c228f869 100644
--- a/lib/gitlab/database/query_analyzers/base.rb
+++ b/lib/gitlab/database/query_analyzers/base.rb
@@ -53,6 +53,10 @@ module Gitlab
Thread.current[self.context_key]
end
+ def self.raw?
+ false
+ end
+
def self.enabled?
raise NotImplementedError
end
diff --git a/lib/gitlab/database/query_analyzers/ci/partitioning_id_analyzer.rb b/lib/gitlab/database/query_analyzers/ci/partitioning_id_analyzer.rb
new file mode 100644
index 00000000000..47277182d9a
--- /dev/null
+++ b/lib/gitlab/database/query_analyzers/ci/partitioning_id_analyzer.rb
@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module QueryAnalyzers
+ module Ci
+ # The purpose of this analyzer is to detect queries missing partition_id clause
+ # when selecting, inserting, updating or deleting data.
+ class PartitioningIdAnalyzer < Database::QueryAnalyzers::Base
+ PartitionIdMissingError = Class.new(QueryAnalyzerError)
+
+ ROUTING_TABLES = %w[p_ci_builds_metadata].freeze
+
+ class << self
+ def enabled?
+ ::Feature::FlipperFeature.table_exists? &&
+ ::Feature.enabled?(:ci_partitioning_analyze_queries_partition_id_check, type: :ops)
+ end
+
+ def analyze(parsed)
+ analyze_partition_id_presence(parsed)
+ end
+
+ private
+
+ def analyze_partition_id_presence(parsed)
+ detected = ROUTING_TABLES & (parsed.pg.dml_tables + parsed.pg.select_tables)
+ return if detected.none?
+
+ if insert_query?(parsed)
+ return if insert_include_partition_id?(parsed)
+ else
+ detected_with_selected_columns = parsed_detected_tables(parsed, detected)
+ return if partition_id_included?(detected_with_selected_columns)
+ end
+
+ ::Gitlab::ErrorTracking.track_and_raise_for_dev_exception(
+ PartitionIdMissingError.new(
+ "Detected query against a partitioned table without partition id: #{parsed.sql}"
+ )
+ )
+ end
+
+ def parsed_detected_tables(parsed, routing_tables)
+ parsed.pg.filter_columns.each_with_object(Hash.new { |h, k| h[k] = [] }) do |item, hash|
+ table_name = item[0] || routing_tables[0]
+ column_name = item[1]
+
+ hash[table_name] << column_name if routing_tables.include?(table_name)
+ end
+ end
+
+ def partition_id_included?(result)
+ return false if result.empty?
+
+ result.all? { |_routing_table, columns| columns.include?('partition_id') }
+ end
+
+ def insert_query?(parsed)
+ parsed.sql.start_with?('INSERT')
+ end
+
+ def insert_include_partition_id?(parsed)
+ filtered_columns_on_insert(parsed).include?('partition_id')
+ end
+
+ def filtered_columns_on_insert(parsed)
+ result = parsed.pg.tree.to_h.dig(:stmts, 0, :stmt, :insert_stmt, :cols).map do |h|
+ h.dig(:res_target, :name)
+ end
+
+ result || []
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb b/lib/gitlab/database/query_analyzers/ci/partitioning_routing_analyzer.rb
index c2d5dfc1a15..eb55ebc7619 100644
--- a/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb
+++ b/lib/gitlab/database/query_analyzers/ci/partitioning_routing_analyzer.rb
@@ -5,12 +5,10 @@ module Gitlab
module QueryAnalyzers
module Ci
# The purpose of this analyzer is to detect queries not going through a partitioning routing table
- class PartitioningAnalyzer < Database::QueryAnalyzers::Base
+ class PartitioningRoutingAnalyzer < Database::QueryAnalyzers::Base
RoutingTableNotUsedError = Class.new(QueryAnalyzerError)
- ENABLED_TABLES = %w[
- ci_builds_metadata
- ].freeze
+ ENABLED_TABLES = %w[ci_builds_metadata].freeze
class << self
def enabled?
diff --git a/lib/gitlab/database/query_analyzers/query_recorder.rb b/lib/gitlab/database/query_analyzers/query_recorder.rb
new file mode 100644
index 00000000000..88fe829c3d2
--- /dev/null
+++ b/lib/gitlab/database/query_analyzers/query_recorder.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module QueryAnalyzers
+ class QueryRecorder < Base
+ LOG_FILE = 'rspec/query_recorder.ndjson'
+
+ class << self
+ def raw?
+ true
+ end
+
+ def enabled?
+ # Only enable QueryRecorder in CI
+ ENV['CI'].present?
+ end
+
+ def analyze(sql)
+ payload = {
+ sql: sql
+ }
+
+ log_query(payload)
+ end
+
+ private
+
+ def log_query(payload)
+ log_path = Rails.root.join(LOG_FILE)
+ log_dir = File.dirname(log_path)
+
+ # Create log directory if it does not exist since it is only created
+ # ahead of time by certain CI jobs
+ FileUtils.mkdir_p(log_dir) unless Dir.exist?(log_dir)
+
+ log_line = "#{Gitlab::Json.dump(payload)}\n"
+
+ File.write(log_path, log_line, mode: 'a')
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/tables_truncate.rb b/lib/gitlab/database/tables_truncate.rb
index 164520fbab3..8380bf23899 100644
--- a/lib/gitlab/database/tables_truncate.rb
+++ b/lib/gitlab/database/tables_truncate.rb
@@ -14,7 +14,7 @@ module Gitlab
end
def execute
- raise "Cannot truncate legacy tables in single-db setup" unless Gitlab::Database.has_config?(:ci)
+ raise "Cannot truncate legacy tables in single-db setup" if single_database_setup?
raise "database is not supported" unless %w[main ci].include?(database_name)
logger&.info "DRY RUN:" if dry_run
@@ -91,6 +91,13 @@ module Gitlab
end
end
end
+
+ def single_database_setup?
+ return true unless Gitlab::Database.has_config?(:ci)
+
+ ci_base_model = Gitlab::Database.database_base_models[:ci]
+ !!Gitlab::Database.db_config_share_with(ci_base_model.connection_db_config)
+ end
end
end
end
diff --git a/lib/gitlab/database/type/symbolized_jsonb.rb b/lib/gitlab/database/type/symbolized_jsonb.rb
new file mode 100644
index 00000000000..5bec738ec9c
--- /dev/null
+++ b/lib/gitlab/database/type/symbolized_jsonb.rb
@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Type
+ # Extends Rails' Jsonb data type to deserialize it into symbolized Hash.
+ #
+ # Example:
+ #
+ # class SomeModel < ApplicationRecord
+ # # some_model.a_field is of type `jsonb`
+ # attribute :a_field, :sym_jsonb
+ # end
+ class SymbolizedJsonb < ::ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Jsonb
+ def type
+ :sym_jsonb
+ end
+
+ def deserialize(value)
+ data = super
+ return unless data
+
+ ::Gitlab::Utils.deep_symbolized_access(data)
+ end
+ end
+ end
+ end
+end