Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2021-07-20 12:55:51 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2021-07-20 12:55:51 +0300
commite8d2c2579383897a1dd7f9debd359abe8ae8373d (patch)
treec42be41678c2586d49a75cabce89322082698334 /lib/gitlab/database
parentfc845b37ec3a90aaa719975f607740c22ba6a113 (diff)
Add latest changes from gitlab-org/gitlab@14-1-stable-eev14.1.0-rc42
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/as_with_materialized.rb2
-rw-r--r--lib/gitlab/database/background_migration/batched_job.rb45
-rw-r--r--lib/gitlab/database/background_migration/batched_migration.rb9
-rw-r--r--lib/gitlab/database/background_migration/batched_migration_runner.rb45
-rw-r--r--lib/gitlab/database/batch_count.rb156
-rw-r--r--lib/gitlab/database/batch_counter.rb159
-rw-r--r--lib/gitlab/database/custom_structure.rb43
-rw-r--r--lib/gitlab/database/dynamic_model_helpers.rb6
-rw-r--r--lib/gitlab/database/load_balancing.rb1
-rw-r--r--lib/gitlab/database/load_balancing/load_balancer.rb15
-rw-r--r--lib/gitlab/database/load_balancing/rack_middleware.rb2
-rw-r--r--lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb26
-rw-r--r--lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb63
-rw-r--r--lib/gitlab/database/load_balancing/srv_resolver.rb2
-rw-r--r--lib/gitlab/database/load_balancing/sticking.rb16
-rw-r--r--lib/gitlab/database/migration_helpers.rb30
-rw-r--r--lib/gitlab/database/migrations/background_migration_helpers.rb5
-rw-r--r--lib/gitlab/database/partitioning/monthly_strategy.rb29
-rw-r--r--lib/gitlab/database/partitioning/partition_manager.rb (renamed from lib/gitlab/database/partitioning/partition_creator.rb)47
-rw-r--r--lib/gitlab/database/partitioning/partition_monitoring.rb2
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb129
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key.rb13
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key_validator.rb28
-rw-r--r--lib/gitlab/database/postgres_hll/batch_distinct_counter.rb8
-rw-r--r--lib/gitlab/database/postgres_index.rb20
-rw-r--r--lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb3
-rw-r--r--lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb2
-rw-r--r--lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb7
-rw-r--r--lib/gitlab/database/reindexing.rb8
-rw-r--r--lib/gitlab/database/reindexing/concurrent_reindex.rb154
-rw-r--r--lib/gitlab/database/reindexing/coordinator.rb2
-rw-r--r--lib/gitlab/database/reindexing/index_selection.rb17
-rw-r--r--lib/gitlab/database/reindexing/reindex_action.rb2
-rw-r--r--lib/gitlab/database/reindexing/reindex_concurrently.rb138
-rw-r--r--lib/gitlab/database/schema_cleaner.rb6
-rw-r--r--lib/gitlab/database/schema_migrations.rb19
-rw-r--r--lib/gitlab/database/schema_migrations/context.rb41
-rw-r--r--lib/gitlab/database/schema_migrations/migrations.rb52
-rw-r--r--lib/gitlab/database/schema_version_files.rb64
39 files changed, 722 insertions, 694 deletions
diff --git a/lib/gitlab/database/as_with_materialized.rb b/lib/gitlab/database/as_with_materialized.rb
index e7e3c1766a9..eda991efbd5 100644
--- a/lib/gitlab/database/as_with_materialized.rb
+++ b/lib/gitlab/database/as_with_materialized.rb
@@ -24,6 +24,8 @@ module Gitlab
end
# Note: to be deleted after the minimum PG version is set to 12.0
+ # Update the documentation together when deleting the method
+ # https://docs.gitlab.com/ee/development/merge_request_performance_guidelines.html#use-ctes-wisely
def self.materialized_if_supported
materialized_supported? ? 'MATERIALIZED' : ''
end
diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb
index 9a1dc4ee17d..03bd02d7554 100644
--- a/lib/gitlab/database/background_migration/batched_job.rb
+++ b/lib/gitlab/database/background_migration/batched_job.rb
@@ -44,6 +44,51 @@ module Gitlab
# TODO: Switch to individual job interval (prereq: https://gitlab.com/gitlab-org/gitlab/-/issues/328801)
duration.to_f / batched_migration.interval
end
+
+ def split_and_retry!
+ with_lock do
+ raise 'Only failed jobs can be split' unless failed?
+
+ new_batch_size = batch_size / 2
+
+ raise 'Job cannot be split further' if new_batch_size < 1
+
+ batching_strategy = batched_migration.batch_class.new
+ next_batch_bounds = batching_strategy.next_batch(
+ batched_migration.table_name,
+ batched_migration.column_name,
+ batch_min_value: min_value,
+ batch_size: new_batch_size
+ )
+ midpoint = next_batch_bounds.last
+
+ # We don't want the midpoint to go over the existing max_value because
+ # those IDs would already be in the next batched migration job.
+ # This could happen when a lot of records in the current batch are deleted.
+ #
+ # In this case, we just lower the batch size so that future calls to this
+ # method could eventually split the job if it continues to fail.
+ if midpoint >= max_value
+ update!(batch_size: new_batch_size, attempts: 0)
+ else
+ old_max_value = max_value
+
+ update!(
+ batch_size: new_batch_size,
+ max_value: midpoint,
+ attempts: 0,
+ started_at: nil,
+ finished_at: nil,
+ metrics: {}
+ )
+
+ new_record = dup
+ new_record.min_value = midpoint.next
+ new_record.max_value = old_max_value
+ new_record.save!
+ end
+ end
+ end
end
end
end
diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb
index 36e89023c86..9d66824da51 100644
--- a/lib/gitlab/database/background_migration/batched_migration.rb
+++ b/lib/gitlab/database/background_migration/batched_migration.rb
@@ -10,7 +10,7 @@ module Gitlab
self.table_name = :batched_background_migrations
has_many :batched_jobs, foreign_key: :batched_background_migration_id
- has_one :last_job, -> { order(id: :desc) },
+ has_one :last_job, -> { order(max_value: :desc) },
class_name: 'Gitlab::Database::BackgroundMigration::BatchedJob',
foreign_key: :batched_background_migration_id
@@ -29,11 +29,16 @@ module Gitlab
paused: 0,
active: 1,
finished: 3,
- failed: 4
+ failed: 4,
+ finalizing: 5
}
attribute :pause_ms, :integer, default: 100
+ def self.find_for_configuration(job_class_name, table_name, column_name, job_arguments)
+ for_configuration(job_class_name, table_name, column_name, job_arguments).first
+ end
+
def self.active_migration
active.queue_order.first
end
diff --git a/lib/gitlab/database/background_migration/batched_migration_runner.rb b/lib/gitlab/database/background_migration/batched_migration_runner.rb
index 67fe6c536e6..14e3919986e 100644
--- a/lib/gitlab/database/background_migration/batched_migration_runner.rb
+++ b/lib/gitlab/database/background_migration/batched_migration_runner.rb
@@ -4,6 +4,12 @@ module Gitlab
module Database
module BackgroundMigration
class BatchedMigrationRunner
+ FailedToFinalize = Class.new(RuntimeError)
+
+ def self.finalize(job_class_name, table_name, column_name, job_arguments)
+ new.finalize(job_class_name, table_name, column_name, job_arguments)
+ end
+
def initialize(migration_wrapper = BatchedMigrationWrapper.new)
@migration_wrapper = migration_wrapper
end
@@ -37,10 +43,35 @@ module Gitlab
raise 'this method is not intended for use in real environments'
end
- while migration.active?
- run_migration_job(migration)
+ run_migration_while(migration, :active)
+ end
- migration.reload_last_job
+ # Finalize migration for given configuration.
+ #
+ # If the migration is already finished, do nothing. Otherwise change its status to `finalizing`
+ # in order to prevent it being picked up by the background worker. Perform all pending jobs,
+ # then keep running until migration is finished.
+ def finalize(job_class_name, table_name, column_name, job_arguments)
+ migration = BatchedMigration.find_for_configuration(job_class_name, table_name, column_name, job_arguments)
+
+ configuration = {
+ job_class_name: job_class_name,
+ table_name: table_name,
+ column_name: column_name,
+ job_arguments: job_arguments
+ }
+
+ if migration.nil?
+ Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}"
+ elsif migration.finished?
+ Gitlab::AppLogger.warn "Batched background migration for the given configuration is already finished: #{configuration}"
+ else
+ migration.finalizing!
+ migration.batched_jobs.pending.each { |job| migration_wrapper.perform(job) }
+
+ run_migration_while(migration, :finalizing)
+
+ raise FailedToFinalize unless migration.finished?
end
end
@@ -90,6 +121,14 @@ module Gitlab
active_migration.finished!
end
end
+
+ def run_migration_while(migration, status)
+ while migration.status == status.to_s
+ run_migration_job(migration)
+
+ migration.reload_last_job
+ end
+ end
end
end
end
diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb
index 9002d39e1ee..49f56b5be97 100644
--- a/lib/gitlab/database/batch_count.rb
+++ b/lib/gitlab/database/batch_count.rb
@@ -18,7 +18,7 @@
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_count(Namespace.group(:type))
# batch_distinct_count(::Project, :creator_id)
-# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
+# batch_distinct_count(::Project.aimed_for_deletion.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
# batch_distinct_count(Project.group(:visibility_level), :creator_id)
# batch_sum(User, :sign_in_count)
# batch_sum(Issue.group(:state_id), :weight))
@@ -41,159 +41,5 @@ module Gitlab
include BatchCount
end
end
-
- class BatchCounter
- FALLBACK = -1
- MIN_REQUIRED_BATCH_SIZE = 1_250
- DEFAULT_SUM_BATCH_SIZE = 1_000
- MAX_ALLOWED_LOOPS = 10_000
- SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
- ALLOWED_MODES = [:itself, :distinct].freeze
- FALLBACK_FINISH = 0
- OFFSET_BY_ONE = 1
-
- # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
- DEFAULT_DISTINCT_BATCH_SIZE = 10_000
- DEFAULT_BATCH_SIZE = 100_000
-
- def initialize(relation, column: nil, operation: :count, operation_args: nil)
- @relation = relation
- @column = column || relation.primary_key
- @operation = operation
- @operation_args = operation_args
- end
-
- def unwanted_configuration?(finish, batch_size, start)
- (@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) ||
- (@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) ||
- (finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
- start >= finish
- end
-
- def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
- raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
-
- check_mode!(mode)
-
- # non-distinct have better performance
- batch_size ||= batch_size_for_mode_and_operation(mode, @operation)
-
- start = actual_start(start)
- finish = actual_finish(finish)
-
- raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
- return FALLBACK if unwanted_configuration?(finish, batch_size, start)
-
- results = nil
- batch_start = start
-
- while batch_start < finish
- begin
- batch_end = [batch_start + batch_size, finish].min
- batch_relation = build_relation_batch(batch_start, batch_end, mode)
-
- op_args = @operation_args
- if @operation == :count && @operation_args.blank? && use_loose_index_scan_for_distinct_values?(mode)
- op_args = [Gitlab::Database::LooseIndexScanDistinctCount::COLUMN_ALIAS]
- end
-
- results = merge_results(results, batch_relation.send(@operation, *op_args)) # rubocop:disable GitlabSecurity/PublicSend
- batch_start = batch_end
- rescue ActiveRecord::QueryCanceled => error
- # retry with a safe batch size & warmer cache
- if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE
- batch_size /= 2
- else
- log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error)
- return FALLBACK
- end
- rescue Gitlab::Database::LooseIndexScanDistinctCount::ColumnConfigurationError => error
- Gitlab::AppJsonLogger
- .error(
- event: 'batch_count',
- relation: @relation.table_name,
- operation: @operation,
- operation_args: @operation_args,
- mode: mode,
- message: "LooseIndexScanDistinctCount column error: #{error.message}"
- )
-
- return FALLBACK
- end
-
- sleep(SLEEP_TIME_IN_SECONDS)
- end
-
- results
- end
-
- def merge_results(results, object)
- return object unless results
-
- if object.is_a?(Hash)
- results.merge!(object) { |_, a, b| a + b }
- else
- results + object
- end
- end
-
- private
-
- def build_relation_batch(start, finish, mode)
- if use_loose_index_scan_for_distinct_values?(mode)
- Gitlab::Database::LooseIndexScanDistinctCount.new(@relation, @column).build_query(from: start, to: finish)
- else
- @relation.select(@column).public_send(mode).where(between_condition(start, finish)) # rubocop:disable GitlabSecurity/PublicSend
- end
- end
-
- def batch_size_for_mode_and_operation(mode, operation)
- return DEFAULT_SUM_BATCH_SIZE if operation == :sum
-
- mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
- end
-
- def between_condition(start, finish)
- return @column.between(start...finish) if @column.is_a?(Arel::Attributes::Attribute)
-
- { @column => start...finish }
- end
-
- def actual_start(start)
- start || @relation.unscope(:group, :having).minimum(@column) || 0
- end
-
- def actual_finish(finish)
- (finish || @relation.unscope(:group, :having).maximum(@column) || FALLBACK_FINISH) + OFFSET_BY_ONE
- end
-
- def check_mode!(mode)
- raise "The mode #{mode.inspect} is not supported" unless ALLOWED_MODES.include?(mode)
- raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
- raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct
- end
-
- def log_canceled_batch_fetch(batch_start, mode, query, error)
- Gitlab::AppJsonLogger
- .error(
- event: 'batch_count',
- relation: @relation.table_name,
- operation: @operation,
- operation_args: @operation_args,
- start: batch_start,
- mode: mode,
- query: query,
- message: "Query has been canceled with message: #{error.message}"
- )
- end
-
- def use_loose_index_scan_for_distinct_values?(mode)
- Feature.enabled?(:loose_index_scan_for_distinct_values) && not_group_by_query? && mode == :distinct
- end
-
- def not_group_by_query?
- !@relation.is_a?(ActiveRecord::Relation) || @relation.group_values.blank?
- end
- end
end
end
diff --git a/lib/gitlab/database/batch_counter.rb b/lib/gitlab/database/batch_counter.rb
new file mode 100644
index 00000000000..5f2e404c9da
--- /dev/null
+++ b/lib/gitlab/database/batch_counter.rb
@@ -0,0 +1,159 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class BatchCounter
+ FALLBACK = -1
+ MIN_REQUIRED_BATCH_SIZE = 1_250
+ DEFAULT_SUM_BATCH_SIZE = 1_000
+ MAX_ALLOWED_LOOPS = 10_000
+ SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
+ ALLOWED_MODES = [:itself, :distinct].freeze
+ FALLBACK_FINISH = 0
+ OFFSET_BY_ONE = 1
+
+ # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
+ DEFAULT_DISTINCT_BATCH_SIZE = 10_000
+ DEFAULT_BATCH_SIZE = 100_000
+
+ def initialize(relation, column: nil, operation: :count, operation_args: nil)
+ @relation = relation
+ @column = column || relation.primary_key
+ @operation = operation
+ @operation_args = operation_args
+ end
+
+ def unwanted_configuration?(finish, batch_size, start)
+ (@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) ||
+ (@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) ||
+ (finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
+ start >= finish
+ end
+
+ def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
+ raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
+
+ check_mode!(mode)
+
+ # non-distinct have better performance
+ batch_size ||= batch_size_for_mode_and_operation(mode, @operation)
+
+ start = actual_start(start)
+ finish = actual_finish(finish)
+
+ raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
+ return FALLBACK if unwanted_configuration?(finish, batch_size, start)
+
+ results = nil
+ batch_start = start
+
+ while batch_start < finish
+ begin
+ batch_end = [batch_start + batch_size, finish].min
+ batch_relation = build_relation_batch(batch_start, batch_end, mode)
+
+ op_args = @operation_args
+ if @operation == :count && @operation_args.blank? && use_loose_index_scan_for_distinct_values?(mode)
+ op_args = [Gitlab::Database::LooseIndexScanDistinctCount::COLUMN_ALIAS]
+ end
+
+ results = merge_results(results, batch_relation.send(@operation, *op_args)) # rubocop:disable GitlabSecurity/PublicSend
+ batch_start = batch_end
+ rescue ActiveRecord::QueryCanceled => error
+ # retry with a safe batch size & warmer cache
+ if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE
+ batch_size /= 2
+ else
+ log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error)
+ return FALLBACK
+ end
+ rescue Gitlab::Database::LooseIndexScanDistinctCount::ColumnConfigurationError => error
+ Gitlab::AppJsonLogger
+ .error(
+ event: 'batch_count',
+ relation: @relation.table_name,
+ operation: @operation,
+ operation_args: @operation_args,
+ mode: mode,
+ message: "LooseIndexScanDistinctCount column error: #{error.message}"
+ )
+
+ return FALLBACK
+ end
+
+ sleep(SLEEP_TIME_IN_SECONDS)
+ end
+
+ results
+ end
+
+ def merge_results(results, object)
+ return object unless results
+
+ if object.is_a?(Hash)
+ results.merge!(object) { |_, a, b| a + b }
+ else
+ results + object
+ end
+ end
+
+ private
+
+ def build_relation_batch(start, finish, mode)
+ if use_loose_index_scan_for_distinct_values?(mode)
+ Gitlab::Database::LooseIndexScanDistinctCount.new(@relation, @column).build_query(from: start, to: finish)
+ else
+ @relation.select(@column).public_send(mode).where(between_condition(start, finish)) # rubocop:disable GitlabSecurity/PublicSend
+ end
+ end
+
+ def batch_size_for_mode_and_operation(mode, operation)
+ return DEFAULT_SUM_BATCH_SIZE if operation == :sum
+
+ mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
+ end
+
+ def between_condition(start, finish)
+ return @column.between(start...finish) if @column.is_a?(Arel::Attributes::Attribute)
+
+ { @column => start...finish }
+ end
+
+ def actual_start(start)
+ start || @relation.unscope(:group, :having).minimum(@column) || 0
+ end
+
+ def actual_finish(finish)
+ (finish || @relation.unscope(:group, :having).maximum(@column) || FALLBACK_FINISH) + OFFSET_BY_ONE
+ end
+
+ def check_mode!(mode)
+ raise "The mode #{mode.inspect} is not supported" unless ALLOWED_MODES.include?(mode)
+ raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
+ raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct
+ end
+
+ def log_canceled_batch_fetch(batch_start, mode, query, error)
+ Gitlab::AppJsonLogger
+ .error(
+ event: 'batch_count',
+ relation: @relation.table_name,
+ operation: @operation,
+ operation_args: @operation_args,
+ start: batch_start,
+ mode: mode,
+ query: query,
+ message: "Query has been canceled with message: #{error.message}"
+ )
+ end
+
+ def use_loose_index_scan_for_distinct_values?(mode)
+ Feature.enabled?(:loose_index_scan_for_distinct_values) && not_group_by_query? && mode == :distinct
+ end
+
+ def not_group_by_query?
+ !@relation.is_a?(ActiveRecord::Relation) || @relation.group_values.blank?
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/custom_structure.rb b/lib/gitlab/database/custom_structure.rb
deleted file mode 100644
index e4404e73a63..00000000000
--- a/lib/gitlab/database/custom_structure.rb
+++ /dev/null
@@ -1,43 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- class CustomStructure
- CUSTOM_DUMP_FILE = 'db/gitlab_structure.sql'
-
- def dump
- File.open(self.class.custom_dump_filepath, 'wb') do |io|
- io << "-- this file tracks custom GitLab data, such as foreign keys referencing partitioned tables\n"
- io << "-- more details can be found in the issue: https://gitlab.com/gitlab-org/gitlab/-/issues/201872\n\n"
-
- dump_partitioned_foreign_keys(io) if partitioned_foreign_keys_exist?
- end
- end
-
- def self.custom_dump_filepath
- Rails.root.join(CUSTOM_DUMP_FILE)
- end
-
- private
-
- def dump_partitioned_foreign_keys(io)
- io << "COPY partitioned_foreign_keys (#{partitioned_fk_columns.join(", ")}) FROM STDIN;\n"
-
- PartitioningMigrationHelpers::PartitionedForeignKey.find_each do |fk|
- io << fk.attributes.values_at(*partitioned_fk_columns).join("\t") << "\n"
- end
- io << "\\.\n"
- end
-
- def partitioned_foreign_keys_exist?
- return false unless PartitioningMigrationHelpers::PartitionedForeignKey.table_exists?
-
- PartitioningMigrationHelpers::PartitionedForeignKey.exists?
- end
-
- def partitioned_fk_columns
- @partitioned_fk_columns ||= PartitioningMigrationHelpers::PartitionedForeignKey.column_names
- end
- end
- end
-end
diff --git a/lib/gitlab/database/dynamic_model_helpers.rb b/lib/gitlab/database/dynamic_model_helpers.rb
index 7439591be99..220062f1bc6 100644
--- a/lib/gitlab/database/dynamic_model_helpers.rb
+++ b/lib/gitlab/database/dynamic_model_helpers.rb
@@ -3,6 +3,8 @@
module Gitlab
module Database
module DynamicModelHelpers
+ BATCH_SIZE = 1_000
+
def define_batchable_model(table_name)
Class.new(ActiveRecord::Base) do
include EachBatch
@@ -12,7 +14,7 @@ module Gitlab
end
end
- def each_batch(table_name, scope: ->(table) { table.all }, of: 1000)
+ def each_batch(table_name, scope: ->(table) { table.all }, of: BATCH_SIZE)
if transaction_open?
raise <<~MSG.squish
each_batch should not run inside a transaction, you can disable
@@ -25,7 +27,7 @@ module Gitlab
.each_batch(of: of) { |batch| yield batch }
end
- def each_batch_range(table_name, scope: ->(table) { table.all }, of: 1000)
+ def each_batch_range(table_name, scope: ->(table) { table.all }, of: BATCH_SIZE)
each_batch(table_name, scope: scope, of: of) do |batch|
yield batch.pluck('MIN(id), MAX(id)').first
end
diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb
index 88743cd2e75..31d41a6d6c0 100644
--- a/lib/gitlab/database/load_balancing.rb
+++ b/lib/gitlab/database/load_balancing.rb
@@ -85,7 +85,6 @@ module Gitlab
# Returns true if load balancing is to be enabled.
def self.enable?
return false if Gitlab::Runtime.rake?
- return false if Gitlab::Runtime.sidekiq? && !Gitlab::Utils.to_boolean(ENV['ENABLE_LOAD_BALANCING_FOR_SIDEKIQ'], default: false)
return false unless self.configured?
true
diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb
index a833bb8491f..a5d67ebc050 100644
--- a/lib/gitlab/database/load_balancing/load_balancer.rb
+++ b/lib/gitlab/database/load_balancing/load_balancer.rb
@@ -147,15 +147,15 @@ module Gitlab
raise 'Failed to determine the write location of the primary database'
end
- # Returns true if all hosts have caught up to the given transaction
- # write location.
- def all_caught_up?(location)
- @host_list.hosts.all? { |host| host.caught_up?(location) }
- end
-
# Returns true if there was at least one host that has caught up with the given transaction.
#
# In case of a retry, this method also stores the set of hosts that have caught up.
+ #
+ # UPD: `select_caught_up_hosts` seems to have redundant logic managing host list (`:gitlab_load_balancer_valid_hosts`),
+ # while we only need a single host: https://gitlab.com/gitlab-org/gitlab/-/issues/326125#note_615271604
+ # Also, shuffling the list afterwards doesn't seem to be necessary.
+ # This may be improved by merging this method with `select_up_to_date_host`.
+ # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out
def select_caught_up_hosts(location)
all_hosts = @host_list.hosts
valid_hosts = all_hosts.select { |host| host.caught_up?(location) }
@@ -179,6 +179,8 @@ module Gitlab
# Returns true if there was at least one host that has caught up with the given transaction.
# Similar to `#select_caught_up_hosts`, picks a random host, to rotate replicas we use.
# Unlike `#select_caught_up_hosts`, does not iterate over all hosts if finds any.
+ #
+ # It is going to be merged with `select_caught_up_hosts`, because they intend to do the same.
def select_up_to_date_host(location)
all_hosts = @host_list.hosts.shuffle
host = all_hosts.find { |host| host.caught_up?(location) }
@@ -190,6 +192,7 @@ module Gitlab
true
end
+ # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out
def set_consistent_hosts_for_request(hosts)
RequestStore[VALID_HOSTS_CACHE_KEY] = hosts
end
diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb
index 4734ff99bd3..8e7e6865402 100644
--- a/lib/gitlab/database/load_balancing/rack_middleware.rb
+++ b/lib/gitlab/database/load_balancing/rack_middleware.rb
@@ -39,6 +39,8 @@ module Gitlab
result = @app.call(env)
+ ActiveSupport::Notifications.instrument('web_transaction_completed.load_balancing')
+
stick_if_necessary(env)
result
diff --git a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
index 524d69c00c0..0e36ebbc3ee 100644
--- a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
+++ b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
@@ -5,27 +5,29 @@ module Gitlab
module LoadBalancing
class SidekiqClientMiddleware
def call(worker_class, job, _queue, _redis_pool)
+ # Mailers can't be constantized
worker_class = worker_class.to_s.safe_constantize
- mark_data_consistency_location(worker_class, job)
+ if load_balancing_enabled?(worker_class)
+ job['worker_data_consistency'] = worker_class.get_data_consistency
+ set_data_consistency_location!(job) unless location_already_provided?(job)
+ else
+ job['worker_data_consistency'] = ::WorkerAttributes::DEFAULT_DATA_CONSISTENCY
+ end
yield
end
private
- def mark_data_consistency_location(worker_class, job)
- # Mailers can't be constantized
- return unless worker_class
- return unless worker_class.include?(::ApplicationWorker)
- return unless worker_class.get_data_consistency_feature_flag_enabled?
-
- return if location_already_provided?(job)
-
- job['worker_data_consistency'] = worker_class.get_data_consistency
-
- return unless worker_class.utilizes_load_balancing_capabilities?
+ def load_balancing_enabled?(worker_class)
+ worker_class &&
+ worker_class.include?(::ApplicationWorker) &&
+ worker_class.utilizes_load_balancing_capabilities? &&
+ worker_class.get_data_consistency_feature_flag_enabled?
+ end
+ def set_data_consistency_location!(job)
if Session.current.use_primary?
job['database_write_location'] = load_balancer.primary_write_location
else
diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
index 9bd0adf8dbd..0551750568a 100644
--- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
+++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
@@ -7,8 +7,18 @@ module Gitlab
JobReplicaNotUpToDate = Class.new(StandardError)
def call(worker, job, _queue)
- if requires_primary?(worker.class, job)
+ worker_class = worker.class
+ strategy = select_load_balancing_strategy(worker_class, job)
+
+ job['load_balancing_strategy'] = strategy.to_s
+
+ if use_primary?(strategy)
Session.current.use_primary!
+ elsif strategy == :retry
+ raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\
+ " Replica was not up to date."
+ else
+ # this means we selected an up-to-date replica, but there is nothing to do in this case.
end
yield
@@ -23,31 +33,42 @@ module Gitlab
Session.clear_session
end
- def requires_primary?(worker_class, job)
- return true unless worker_class.include?(::ApplicationWorker)
- return true unless worker_class.utilizes_load_balancing_capabilities?
- return true unless worker_class.get_data_consistency_feature_flag_enabled?
-
- location = job['database_write_location'] || job['database_replica_location']
+ def use_primary?(strategy)
+ strategy.start_with?('primary')
+ end
- return true unless location
+ def select_load_balancing_strategy(worker_class, job)
+ return :primary unless load_balancing_available?(worker_class)
- job_data_consistency = worker_class.get_data_consistency
- job[:data_consistency] = job_data_consistency.to_s
+ location = job['database_write_location'] || job['database_replica_location']
+ return :primary_no_wal unless location
if replica_caught_up?(location)
- job[:database_chosen] = 'replica'
- false
- elsif job_data_consistency == :delayed && not_yet_retried?(job)
- job[:database_chosen] = 'retry'
- raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\
- " Replica was not up to date."
+ # Happy case: we can read from a replica.
+ retried_before?(worker_class, job) ? :replica_retried : :replica
+ elsif can_retry?(worker_class, job)
+ # Optimistic case: The worker allows retries and we have retries left.
+ :retry
else
- job[:database_chosen] = 'primary'
- true
+ # Sad case: we need to fall back to the primary.
+ :primary
end
end
+ def load_balancing_available?(worker_class)
+ worker_class.include?(::ApplicationWorker) &&
+ worker_class.utilizes_load_balancing_capabilities? &&
+ worker_class.get_data_consistency_feature_flag_enabled?
+ end
+
+ def can_retry?(worker_class, job)
+ worker_class.get_data_consistency == :delayed && not_yet_retried?(job)
+ end
+
+ def retried_before?(worker_class, job)
+ worker_class.get_data_consistency == :delayed && !not_yet_retried?(job)
+ end
+
def not_yet_retried?(job)
# if `retry_count` is `nil` it indicates that this job was never retried
# the `0` indicates that this is a first retry
@@ -59,11 +80,7 @@ module Gitlab
end
def replica_caught_up?(location)
- if Feature.enabled?(:sidekiq_load_balancing_rotate_up_to_date_replica)
- load_balancer.select_up_to_date_host(location)
- else
- load_balancer.host.caught_up?(location)
- end
+ load_balancer.select_up_to_date_host(location)
end
end
end
diff --git a/lib/gitlab/database/load_balancing/srv_resolver.rb b/lib/gitlab/database/load_balancing/srv_resolver.rb
index 20da525f4d2..1f599ef4a27 100644
--- a/lib/gitlab/database/load_balancing/srv_resolver.rb
+++ b/lib/gitlab/database/load_balancing/srv_resolver.rb
@@ -1,5 +1,7 @@
# frozen_string_literal: true
+require 'net/dns'
+
module Gitlab
module Database
module LoadBalancing
diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb
index efbd7099300..8e1aa079216 100644
--- a/lib/gitlab/database/load_balancing/sticking.rb
+++ b/lib/gitlab/database/load_balancing/sticking.rb
@@ -33,8 +33,10 @@ module Gitlab
return true unless location
- load_balancer.all_caught_up?(location).tap do |caught_up|
- unstick(namespace, id) if caught_up
+ load_balancer.select_up_to_date_host(location).tap do |found|
+ ActiveSupport::Notifications.instrument('caught_up_replica_pick.load_balancing', { result: found } )
+
+ unstick(namespace, id) if found
end
end
@@ -51,8 +53,14 @@ module Gitlab
# write location. If no such location exists, err on the side of caution.
return false unless location
- load_balancer.select_caught_up_hosts(location).tap do |selected|
- unstick(namespace, id) if selected
+ if ::Feature.enabled?(:load_balancing_refine_load_balancer_methods)
+ load_balancer.select_up_to_date_host(location).tap do |selected|
+ unstick(namespace, id) if selected
+ end
+ else
+ load_balancer.select_caught_up_hosts(location).tap do |selected|
+ unstick(namespace, id) if selected
+ end
end
end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index d155abefdc8..842ab4f7b80 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -217,11 +217,12 @@ module Gitlab
# source - The source table containing the foreign key.
# target - The target table the key points to.
# column - The name of the column to create the foreign key on.
+ # target_column - The name of the referenced column, defaults to "id".
# on_delete - The action to perform when associated data is removed,
# defaults to "CASCADE".
# name - The name of the foreign key.
#
- def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, name: nil, validate: true)
+ def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_column: :id, name: nil, validate: true)
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
if transaction_open?
@@ -231,7 +232,8 @@ module Gitlab
options = {
column: column,
on_delete: on_delete,
- name: name.presence || concurrent_foreign_key_name(source, column)
+ name: name.presence || concurrent_foreign_key_name(source, column),
+ primary_key: target_column
}
if foreign_key_exists?(source, target, **options)
@@ -252,7 +254,7 @@ module Gitlab
ALTER TABLE #{source}
ADD CONSTRAINT #{options[:name]}
FOREIGN KEY (#{options[:column]})
- REFERENCES #{target} (id)
+ REFERENCES #{target} (#{target_column})
#{on_delete_statement(options[:on_delete])}
NOT VALID;
EOF
@@ -389,12 +391,14 @@ module Gitlab
# * +logger+ - [Gitlab::JsonLogger]
# * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES`
def with_lock_retries(*args, **kwargs, &block)
+ raise_on_exhaustion = !!kwargs.delete(:raise_on_exhaustion)
merged_args = {
klass: self.class,
logger: Gitlab::BackgroundMigration::Logger
}.merge(kwargs)
- Gitlab::Database::WithLockRetries.new(**merged_args).run(&block)
+ Gitlab::Database::WithLockRetries.new(**merged_args)
+ .run(raise_on_exhaustion: raise_on_exhaustion, &block)
end
def true_value
@@ -1106,7 +1110,16 @@ module Gitlab
Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}"
elsif !migration.finished?
raise "Expected batched background migration for the given configuration to be marked as 'finished', " \
- "but it is '#{migration.status}': #{configuration}"
+ "but it is '#{migration.status}':" \
+ "\t#{configuration}" \
+ "\n\n" \
+ "Finalize it manualy by running" \
+ "\n\n" \
+ "\tsudo gitlab-rake gitlab:background_migrations:finalize[#{job_class_name},#{table_name},#{column_name},'#{job_arguments.inspect.gsub(',', '\,')}']" \
+ "\n\n" \
+ "For more information, check the documentation" \
+ "\n\n" \
+ "\thttps://docs.gitlab.com/ee/user/admin_area/monitoring/background_migrations.html#database-migrations-failing-because-of-batched-background-migration-not-finished"
end
end
@@ -1610,6 +1623,13 @@ into similar problems in the future (e.g. when new tables are created).
raise
end
+ def rename_constraint(table_name, old_name, new_name)
+ execute <<~SQL
+ ALTER TABLE #{quote_table_name(table_name)}
+ RENAME CONSTRAINT #{quote_column_name(old_name)} TO #{quote_column_name(new_name)}
+ SQL
+ end
+
private
def validate_check_constraint_name!(constraint_name)
diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb
index fa30ffb62f5..28491a934a0 100644
--- a/lib/gitlab/database/migrations/background_migration_helpers.rb
+++ b/lib/gitlab/database/migrations/background_migration_helpers.rb
@@ -107,7 +107,10 @@ module Gitlab
batch_counter = 0
model_class.each_batch(of: batch_size) do |relation, index|
- start_id, end_id = relation.pluck(Arel.sql("MIN(#{primary_column_name}), MAX(#{primary_column_name})")).first
+ max = relation.arel_table[primary_column_name].maximum
+ min = relation.arel_table[primary_column_name].minimum
+
+ start_id, end_id = relation.pluck(min, max).first
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
# the same time, which is not helpful in most cases where we wish to
diff --git a/lib/gitlab/database/partitioning/monthly_strategy.rb b/lib/gitlab/database/partitioning/monthly_strategy.rb
index 82ea1ce26fb..4c68399cb68 100644
--- a/lib/gitlab/database/partitioning/monthly_strategy.rb
+++ b/lib/gitlab/database/partitioning/monthly_strategy.rb
@@ -4,16 +4,17 @@ module Gitlab
module Database
module Partitioning
class MonthlyStrategy
- attr_reader :model, :partitioning_key
+ attr_reader :model, :partitioning_key, :retain_for
# We create this many partitions in the future
HEADROOM = 6.months
delegate :table_name, to: :model
- def initialize(model, partitioning_key)
+ def initialize(model, partitioning_key, retain_for: nil)
@model = model
@partitioning_key = partitioning_key
+ @retain_for = retain_for
end
def current_partitions
@@ -27,13 +28,21 @@ module Gitlab
desired_partitions - current_partitions
end
+ def extra_partitions
+ current_partitions - desired_partitions
+ end
+
private
def desired_partitions
[].tap do |parts|
min_date, max_date = relevant_range
- parts << partition_for(upper_bound: min_date)
+ if pruning_old_partitions? && min_date <= oldest_active_date
+ min_date = oldest_active_date.beginning_of_month
+ else
+ parts << partition_for(upper_bound: min_date)
+ end
while min_date < max_date
next_date = min_date.next_month
@@ -52,13 +61,17 @@ module Gitlab
# to start from MINVALUE to a specific date `x`. The range returned
# does not include the range of the first, half-unbounded partition.
def relevant_range
- if first_partition = current_partitions.min
+ if (first_partition = current_partitions.min)
# Case 1: First partition starts with MINVALUE, i.e. from is nil -> start with first real partition
# Case 2: Rather unexpectedly, first partition does not start with MINVALUE, i.e. from is not nil
# In this case, use first partition beginning as a start
min_date = first_partition.from || first_partition.to
end
+ if pruning_old_partitions?
+ min_date ||= oldest_active_date
+ end
+
# In case we don't have a partition yet
min_date ||= Date.today
min_date = min_date.beginning_of_month
@@ -72,6 +85,14 @@ module Gitlab
TimePartition.new(table_name, lower_bound, upper_bound)
end
+ def pruning_old_partitions?
+ Feature.enabled?(:partition_pruning_dry_run) && retain_for.present?
+ end
+
+ def oldest_active_date
+ (Date.today - retain_for).beginning_of_month
+ end
+
def connection
ActiveRecord::Base.connection
end
diff --git a/lib/gitlab/database/partitioning/partition_creator.rb b/lib/gitlab/database/partitioning/partition_manager.rb
index d4b2b8d50e2..c2a9422a42a 100644
--- a/lib/gitlab/database/partitioning/partition_creator.rb
+++ b/lib/gitlab/database/partitioning/partition_manager.rb
@@ -3,7 +3,7 @@
module Gitlab
module Database
module Partitioning
- class PartitionCreator
+ class PartitionManager
def self.register(model)
raise ArgumentError, "Only models with a #partitioning_strategy can be registered." unless model.respond_to?(:partitioning_strategy)
@@ -15,7 +15,7 @@ module Gitlab
end
LEASE_TIMEOUT = 1.minute
- LEASE_KEY = 'database_partition_creation_%s'
+ MANAGEMENT_LEASE_KEY = 'database_partition_management_%s'
attr_reader :models
@@ -23,23 +23,25 @@ module Gitlab
@models = models
end
- def create_partitions
+ def sync_partitions
Gitlab::AppLogger.info("Checking state of dynamic postgres partitions")
models.each do |model|
# Double-checking before getting the lease:
- # The prevailing situation is no missing partitions
- next if missing_partitions(model).empty?
+ # The prevailing situation is no missing partitions and no extra partitions
+ next if missing_partitions(model).empty? && extra_partitions(model).empty?
- only_with_exclusive_lease(model) do
+ only_with_exclusive_lease(model, lease_key: MANAGEMENT_LEASE_KEY) do
partitions_to_create = missing_partitions(model)
+ create(partitions_to_create) unless partitions_to_create.empty?
- next if partitions_to_create.empty?
-
- create(model, partitions_to_create)
+ if Feature.enabled?(:partition_pruning_dry_run)
+ partitions_to_detach = extra_partitions(model)
+ detach(partitions_to_detach) unless partitions_to_detach.empty?
+ end
end
rescue StandardError => e
- Gitlab::AppLogger.error("Failed to create partition(s) for #{model.table_name}: #{e.class}: #{e.message}")
+ Gitlab::AppLogger.error("Failed to create / detach partition(s) for #{model.table_name}: #{e.class}: #{e.message}")
end
end
@@ -51,15 +53,22 @@ module Gitlab
model.partitioning_strategy.missing_partitions
end
- def only_with_exclusive_lease(model)
- lease = Gitlab::ExclusiveLease.new(LEASE_KEY % model.table_name, timeout: LEASE_TIMEOUT)
+ def extra_partitions(model)
+ return [] unless Feature.enabled?(:partition_pruning_dry_run)
+ return [] unless connection.table_exists?(model.table_name)
+
+ model.partitioning_strategy.extra_partitions
+ end
+
+ def only_with_exclusive_lease(model, lease_key:)
+ lease = Gitlab::ExclusiveLease.new(lease_key % model.table_name, timeout: LEASE_TIMEOUT)
yield if lease.try_obtain
ensure
lease&.cancel
end
- def create(model, partitions)
+ def create(partitions)
connection.transaction do
with_lock_retries do
partitions.each do |partition|
@@ -71,6 +80,18 @@ module Gitlab
end
end
+ def detach(partitions)
+ connection.transaction do
+ with_lock_retries do
+ partitions.each { |p| detach_one_partition(p) }
+ end
+ end
+ end
+
+ def detach_one_partition(partition)
+ Gitlab::AppLogger.info("Planning to detach #{partition.partition_name} for table #{partition.table}")
+ end
+
def with_lock_retries(&block)
Gitlab::Database::WithLockRetries.new(
klass: self.class,
diff --git a/lib/gitlab/database/partitioning/partition_monitoring.rb b/lib/gitlab/database/partitioning/partition_monitoring.rb
index 9ec9ae684a5..ad122fd47fe 100644
--- a/lib/gitlab/database/partitioning/partition_monitoring.rb
+++ b/lib/gitlab/database/partitioning/partition_monitoring.rb
@@ -6,7 +6,7 @@ module Gitlab
class PartitionMonitoring
attr_reader :models
- def initialize(models = PartitionCreator.models)
+ def initialize(models = PartitionManager.models)
@models = models
end
diff --git a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb
index 4402c42b136..f1aa7871245 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb
@@ -79,135 +79,6 @@ module Gitlab
"#{prefix}#{hashed_identifier}"
end
-
- # Creates a "foreign key" that references a partitioned table. Because foreign keys referencing partitioned
- # tables are not supported in PG11, this does not create a true database foreign key, but instead implements the
- # same functionality at the database level by using triggers.
- #
- # Example:
- #
- # add_partitioned_foreign_key :issues, :projects
- #
- # Available options:
- #
- # :column - name of the referencing column (otherwise inferred from the referenced table name)
- # :primary_key - name of the primary key in the referenced table (defaults to id)
- # :on_delete - supports either :cascade for ON DELETE CASCADE or :nullify for ON DELETE SET NULL
- #
- def add_partitioned_foreign_key(from_table, to_table, column: nil, primary_key: :id, on_delete: :cascade)
- cascade_delete = extract_cascade_option(on_delete)
-
- update_foreign_keys(from_table, to_table, column, primary_key, cascade_delete) do |current_keys, existing_key, specified_key|
- if existing_key.nil?
- unless specified_key.save
- raise "failed to create foreign key: #{specified_key.errors.full_messages.to_sentence}"
- end
-
- current_keys << specified_key
- else
- Gitlab::AppLogger.warn "foreign key not added because it already exists: #{specified_key}"
- current_keys
- end
- end
- end
-
- # Drops a "foreign key" that references a partitioned table. This method ONLY applies to foreign keys previously
- # created through the `add_partitioned_foreign_key` method. Standard database foreign keys should be managed
- # through the familiar Rails helpers.
- #
- # Example:
- #
- # remove_partitioned_foreign_key :issues, :projects
- #
- # Available options:
- #
- # :column - name of the referencing column (otherwise inferred from the referenced table name)
- # :primary_key - name of the primary key in the referenced table (defaults to id)
- #
- def remove_partitioned_foreign_key(from_table, to_table, column: nil, primary_key: :id)
- update_foreign_keys(from_table, to_table, column, primary_key) do |current_keys, existing_key, specified_key|
- if existing_key
- existing_key.delete
- current_keys.delete(existing_key)
- else
- Gitlab::AppLogger.warn "foreign key not removed because it doesn't exist: #{specified_key}"
- end
-
- current_keys
- end
- end
-
- private
-
- def fk_function_name(table)
- object_name(table, 'fk_cascade_function')
- end
-
- def fk_trigger_name(table)
- object_name(table, 'fk_cascade_trigger')
- end
-
- def fk_from_spec(from_table, to_table, from_column, to_column, cascade_delete)
- PartitionedForeignKey.new(from_table: from_table.to_s, to_table: to_table.to_s, from_column: from_column.to_s,
- to_column: to_column.to_s, cascade_delete: cascade_delete)
- end
-
- def update_foreign_keys(from_table, to_table, from_column, to_column, cascade_delete = nil)
- assert_not_in_transaction_block(scope: 'partitioned foreign key')
-
- from_column ||= "#{to_table.to_s.singularize}_id"
- specified_key = fk_from_spec(from_table, to_table, from_column, to_column, cascade_delete)
-
- current_keys = PartitionedForeignKey.by_referenced_table(to_table).to_a
- existing_key = find_existing_key(current_keys, specified_key)
-
- final_keys = yield current_keys, existing_key, specified_key
-
- fn_name = fk_function_name(to_table)
- trigger_name = fk_trigger_name(to_table)
-
- with_lock_retries do
- drop_trigger(to_table, trigger_name, if_exists: true)
-
- if final_keys.empty?
- drop_function(fn_name, if_exists: true)
- else
- create_or_replace_fk_function(fn_name, final_keys)
- create_trigger(to_table, trigger_name, fn_name, fires: 'AFTER DELETE')
- end
- end
- end
-
- def extract_cascade_option(on_delete)
- case on_delete
- when :cascade then true
- when :nullify then false
- else raise ArgumentError, "invalid option #{on_delete} for :on_delete"
- end
- end
-
- def find_existing_key(keys, key)
- keys.find { |k| k.from_table == key.from_table && k.from_column == key.from_column }
- end
-
- def create_or_replace_fk_function(fn_name, fk_specs)
- create_trigger_function(fn_name, replace: true) do
- cascade_statements = build_cascade_statements(fk_specs)
- cascade_statements << 'RETURN OLD;'
-
- cascade_statements.join("\n")
- end
- end
-
- def build_cascade_statements(foreign_keys)
- foreign_keys.map do |fks|
- if fks.cascade_delete?
- "DELETE FROM #{fks.from_table} WHERE #{fks.from_column} = OLD.#{fks.to_column};"
- else
- "UPDATE #{fks.from_table} SET #{fks.from_column} = NULL WHERE #{fks.from_column} = OLD.#{fks.to_column};"
- end
- end
- end
end
end
end
diff --git a/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key.rb b/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key.rb
deleted file mode 100644
index f9a90511f9b..00000000000
--- a/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key.rb
+++ /dev/null
@@ -1,13 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- module PartitioningMigrationHelpers
- class PartitionedForeignKey < ApplicationRecord
- validates_with PartitionedForeignKeyValidator
-
- scope :by_referenced_table, ->(table) { where(to_table: table) }
- end
- end
- end
-end
diff --git a/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key_validator.rb b/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key_validator.rb
deleted file mode 100644
index 089cf2b8931..00000000000
--- a/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key_validator.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- module PartitioningMigrationHelpers
- class PartitionedForeignKeyValidator < ActiveModel::Validator
- def validate(record)
- validate_key_part(record, :from_table, :from_column)
- validate_key_part(record, :to_table, :to_column)
- end
-
- private
-
- def validate_key_part(record, table_field, column_field)
- if !connection.table_exists?(record[table_field])
- record.errors.add(table_field, 'must be a valid table')
- elsif !connection.column_exists?(record[table_field], record[column_field])
- record.errors.add(column_field, 'must be a valid column')
- end
- end
-
- def connection
- ActiveRecord::Base.connection
- end
- end
- end
- end
-end
diff --git a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
index aa46b98be5d..580cab5622d 100644
--- a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
+++ b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
@@ -11,17 +11,17 @@ module Gitlab
# In order to not use a possible complex time consuming query when calculating min and max values,
# the start and finish can be sent specifically, start and finish should contain max and min values for PRIMARY KEY of
# relation (most cases `id` column) rather than counted attribute eg:
- # estimate_distinct_count(start: ::Project.with_active_services.minimum(:id), finish: ::Project.with_active_services.maximum(:id))
+ # estimate_distinct_count(start: ::Project.aimed_for_deletion.minimum(:id), finish: ::Project.aimed_for_deletion.maximum(:id))
#
# Grouped relations are NOT supported yet.
#
# @example Usage
# ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project, :creator_id).execute
- # ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project.with_active_services.service_desk_enabled.where(time_period))
+ # ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project.aimed_for_deletion.service_desk_enabled.where(time_period))
# .execute(
# batch_size: 1_000,
- # start: ::Project.with_active_services.service_desk_enabled.where(time_period).minimum(:id),
- # finish: ::Project.with_active_services.service_desk_enabled.where(time_period).maximum(:id)
+ # start: ::Project.aimed_for_deletion.service_desk_enabled.where(time_period).minimum(:id),
+ # finish: ::Project.aimed_for_deletion.service_desk_enabled.where(time_period).maximum(:id)
# )
#
# @note HyperLogLog is an PROBABILISTIC algorithm that ESTIMATES distinct count of given attribute value for supplied relation
diff --git a/lib/gitlab/database/postgres_index.rb b/lib/gitlab/database/postgres_index.rb
index 6e734834841..58e4e7e7924 100644
--- a/lib/gitlab/database/postgres_index.rb
+++ b/lib/gitlab/database/postgres_index.rb
@@ -7,6 +7,7 @@ module Gitlab
self.table_name = 'postgres_indexes'
self.primary_key = 'identifier'
+ self.inheritance_column = :_type_disabled
has_one :bloat_estimate, class_name: 'Gitlab::Database::PostgresIndexBloatEstimate', foreign_key: :identifier
has_many :reindexing_actions, class_name: 'Gitlab::Database::Reindexing::ReindexAction', foreign_key: :index_identifier
@@ -17,12 +18,12 @@ module Gitlab
find(identifier)
end
- # A 'regular' index is a non-unique index,
- # that does not serve an exclusion constraint and
- # is defined on a table that is not partitioned.
- scope :regular, -> { where(unique: false, partitioned: false, exclusion: false)}
+ # Indexes with reindexing support
+ scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) }
- scope :not_match, ->(regex) { where("name !~ ?", regex)}
+ scope :not_match, ->(regex) { where("name !~ ?", regex) }
+
+ scope :match, ->(regex) { where("name ~* ?", regex) }
scope :not_recently_reindexed, -> do
recent_actions = Reindexing::ReindexAction.recent.where('index_identifier = identifier')
@@ -30,10 +31,19 @@ module Gitlab
where('NOT EXISTS (?)', recent_actions)
end
+ def reset
+ reload # rubocop:disable Cop/ActiveRecordAssociationReload
+ clear_memoization(:bloat_size)
+ end
+
def bloat_size
strong_memoize(:bloat_size) { bloat_estimate&.bloat_size || 0 }
end
+ def relative_bloat_level
+ bloat_size / ondisk_size_bytes.to_f
+ end
+
def to_s
name
end
diff --git a/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb b/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb
index 59bd24d3c37..a2e7f4befab 100644
--- a/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb
+++ b/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb
@@ -7,8 +7,7 @@ module Gitlab
extend ActiveSupport::Concern
def dump_schema_information # :nodoc:
- versions = schema_migration.all_versions
- Gitlab::Database::SchemaVersionFiles.touch_all(versions) if versions.any?
+ Gitlab::Database::SchemaMigrations.touch_all(self)
nil
end
diff --git a/lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb b/lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb
index 9f664fa2137..71d2554844e 100644
--- a/lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb
+++ b/lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb
@@ -22,7 +22,7 @@ module Gitlab
end
def force_disconnect_timer
- @force_disconnect_timer ||= ConnectionTimer.starting_now
+ @force_disconnect_timer ||= ::Gitlab::Database::ConnectionTimer.starting_now
end
end
end
diff --git a/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb b/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb
index cf8342941c4..30060ecb34f 100644
--- a/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb
+++ b/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb
@@ -6,9 +6,10 @@ module Gitlab
module LoadSchemaVersionsMixin
extend ActiveSupport::Concern
- def structure_load(*args)
- super(*args)
- Gitlab::Database::SchemaVersionFiles.load_all
+ def structure_load(...)
+ super(...)
+
+ Gitlab::Database::SchemaMigrations.load_all(connection)
end
end
end
diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb
index 0cfad690283..841e04ccbd1 100644
--- a/lib/gitlab/database/reindexing.rb
+++ b/lib/gitlab/database/reindexing.rb
@@ -6,6 +6,8 @@ module Gitlab
# Number of indexes to reindex per invocation
DEFAULT_INDEXES_PER_INVOCATION = 2
+ SUPPORTED_TYPES = %w(btree gist).freeze
+
# candidate_indexes: Array of Gitlab::Database::PostgresIndex
def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION)
IndexSelection.new(candidate_indexes).take(how_many).each do |index|
@@ -15,10 +17,8 @@ module Gitlab
def self.candidate_indexes
Gitlab::Database::PostgresIndex
- .regular
- .where('NOT expression')
- .not_match("^#{ConcurrentReindex::TEMPORARY_INDEX_PREFIX}")
- .not_match("^#{ConcurrentReindex::REPLACED_INDEX_PREFIX}")
+ .not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$")
+ .reindexing_support
end
end
end
diff --git a/lib/gitlab/database/reindexing/concurrent_reindex.rb b/lib/gitlab/database/reindexing/concurrent_reindex.rb
deleted file mode 100644
index 7e2dd55d21b..00000000000
--- a/lib/gitlab/database/reindexing/concurrent_reindex.rb
+++ /dev/null
@@ -1,154 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- module Reindexing
- class ConcurrentReindex
- include Gitlab::Utils::StrongMemoize
-
- ReindexError = Class.new(StandardError)
-
- PG_IDENTIFIER_LENGTH = 63
- TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
- REPLACED_INDEX_PREFIX = 'old_reindex_'
- STATEMENT_TIMEOUT = 9.hours
-
- # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
- # which only conflicts with DDL and vacuum. We therefore execute this with a rather
- # high lock timeout and a long pause in between retries. This is an alternative to
- # setting a high statement timeout, which would lead to a long running query with effects
- # on e.g. vacuum.
- REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
-
- attr_reader :index, :logger
-
- def initialize(index, logger: Gitlab::AppLogger)
- @index = index
- @logger = logger
- end
-
- def perform
- raise ReindexError, 'UNIQUE indexes are currently not supported' if index.unique?
- raise ReindexError, 'partitioned indexes are currently not supported' if index.partitioned?
- raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
- raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name.start_with?(TEMPORARY_INDEX_PREFIX, REPLACED_INDEX_PREFIX)
-
- logger.info "Starting reindex of #{index}"
-
- with_rebuilt_index do |replacement_index|
- swap_index(replacement_index)
- end
- end
-
- private
-
- def with_rebuilt_index
- if Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
- logger.debug("dropping dangling index from previous run (if it exists): #{replacement_index_name}")
- remove_index(index.schema, replacement_index_name)
- end
-
- create_replacement_index_statement = index.definition
- .sub(/CREATE INDEX #{index.name}/, "CREATE INDEX CONCURRENTLY #{replacement_index_name}")
-
- logger.info("creating replacement index #{replacement_index_name}")
- logger.debug("replacement index definition: #{create_replacement_index_statement}")
-
- set_statement_timeout do
- connection.execute(create_replacement_index_statement)
- end
-
- replacement_index = Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
-
- unless replacement_index.valid_index?
- message = 'replacement index was created as INVALID'
- logger.error("#{message}, cleaning up")
- raise ReindexError, "failed to reindex #{index}: #{message}"
- end
-
- # Some expression indexes (aka functional indexes)
- # require additional statistics. The existing statistics
- # are tightly bound to the original index. We have to
- # rebuild statistics for the new index before dropping
- # the original one.
- rebuild_statistics if index.expression?
-
- yield replacement_index
- ensure
- begin
- remove_index(index.schema, replacement_index_name)
- rescue StandardError => e
- logger.error(e)
- end
- end
-
- def swap_index(replacement_index)
- logger.info("swapping replacement index #{replacement_index} with #{index}")
-
- with_lock_retries do
- rename_index(index.schema, index.name, replaced_index_name)
- rename_index(replacement_index.schema, replacement_index.name, index.name)
- rename_index(index.schema, replaced_index_name, replacement_index.name)
- end
- end
-
- def rename_index(schema, old_index_name, new_index_name)
- connection.execute(<<~SQL)
- ALTER INDEX #{quote_table_name(schema)}.#{quote_table_name(old_index_name)}
- RENAME TO #{quote_table_name(new_index_name)}
- SQL
- end
-
- def remove_index(schema, name)
- logger.info("Removing index #{schema}.#{name}")
-
- retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
- timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
- klass: self.class,
- logger: logger
- )
-
- retries.run(raise_on_exhaustion: false) do
- connection.execute(<<~SQL)
- DROP INDEX CONCURRENTLY
- IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)}
- SQL
- end
- end
-
- def rebuild_statistics
- logger.info("rebuilding table statistics for #{index.schema}.#{index.tablename}")
-
- connection.execute(<<~SQL)
- ANALYZE #{quote_table_name(index.schema)}.#{quote_table_name(index.tablename)}
- SQL
- end
-
- def replacement_index_name
- @replacement_index_name ||= "#{TEMPORARY_INDEX_PREFIX}#{index.indexrelid}"
- end
-
- def replaced_index_name
- @replaced_index_name ||= "#{REPLACED_INDEX_PREFIX}#{index.indexrelid}"
- end
-
- def with_lock_retries(&block)
- arguments = { klass: self.class, logger: logger }
- Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
- end
-
- def set_statement_timeout
- execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
- yield
- ensure
- execute('RESET statement_timeout')
- end
-
- delegate :execute, :quote_table_name, to: :connection
- def connection
- @connection ||= ActiveRecord::Base.connection
- end
- end
- end
- end
-end
diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb
index d68f47b5b6c..13298f67ca9 100644
--- a/lib/gitlab/database/reindexing/coordinator.rb
+++ b/lib/gitlab/database/reindexing/coordinator.rb
@@ -41,7 +41,7 @@ module Gitlab
end
def perform_for(index, action)
- ConcurrentReindex.new(index).perform
+ ReindexConcurrently.new(index).perform
rescue StandardError
action.state = :failed
diff --git a/lib/gitlab/database/reindexing/index_selection.rb b/lib/gitlab/database/reindexing/index_selection.rb
index 406e70791df..2186384e7d7 100644
--- a/lib/gitlab/database/reindexing/index_selection.rb
+++ b/lib/gitlab/database/reindexing/index_selection.rb
@@ -6,6 +6,12 @@ module Gitlab
class IndexSelection
include Enumerable
+ # Only reindex indexes with a relative bloat level (bloat estimate / size) higher than this
+ MINIMUM_RELATIVE_BLOAT = 0.2
+
+ # Only consider indexes with a total ondisk size in this range (before reindexing)
+ INDEX_SIZE_RANGE = (1.gigabyte..100.gigabyte).freeze
+
delegate :each, to: :indexes
def initialize(candidates)
@@ -24,11 +30,12 @@ module Gitlab
# we force a N+1 pattern here and estimate bloat on a per-index
# basis.
- @indexes ||= filter_candidates.sort_by(&:bloat_size).reverse
- end
-
- def filter_candidates
- candidates.not_recently_reindexed
+ @indexes ||= candidates
+ .not_recently_reindexed
+ .where(ondisk_size_bytes: INDEX_SIZE_RANGE)
+ .sort_by(&:relative_bloat_level) # forced N+1
+ .reverse
+ .select { |candidate| candidate.relative_bloat_level >= MINIMUM_RELATIVE_BLOAT }
end
end
end
diff --git a/lib/gitlab/database/reindexing/reindex_action.rb b/lib/gitlab/database/reindexing/reindex_action.rb
index 7e58201889f..ff465fffb74 100644
--- a/lib/gitlab/database/reindexing/reindex_action.rb
+++ b/lib/gitlab/database/reindexing/reindex_action.rb
@@ -10,7 +10,7 @@ module Gitlab
enum state: { started: 0, finished: 1, failed: 2 }
# Amount of time to consider a previous reindexing *recent*
- RECENT_THRESHOLD = 7.days
+ RECENT_THRESHOLD = 10.days
scope :recent, -> { where(state: :finished).where('action_end > ?', Time.zone.now - RECENT_THRESHOLD) }
diff --git a/lib/gitlab/database/reindexing/reindex_concurrently.rb b/lib/gitlab/database/reindexing/reindex_concurrently.rb
new file mode 100644
index 00000000000..8d9f9f5abdd
--- /dev/null
+++ b/lib/gitlab/database/reindexing/reindex_concurrently.rb
@@ -0,0 +1,138 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Reindexing
+ # This is a >= PG12 reindexing strategy based on `REINDEX CONCURRENTLY`
+ class ReindexConcurrently
+ ReindexError = Class.new(StandardError)
+
+ TEMPORARY_INDEX_PATTERN = '\_ccnew[0-9]*'
+ STATEMENT_TIMEOUT = 9.hours
+ PG_MAX_INDEX_NAME_LENGTH = 63
+
+ # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
+ # which only conflicts with DDL and vacuum. We therefore execute this with a rather
+ # high lock timeout and a long pause in between retries. This is an alternative to
+ # setting a high statement timeout, which would lead to a long running query with effects
+ # on e.g. vacuum.
+ REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
+
+ attr_reader :index, :logger
+
+ def initialize(index, logger: Gitlab::AppLogger)
+ @index = index
+ @logger = logger
+ end
+
+ def perform
+ raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
+ raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name =~ /#{TEMPORARY_INDEX_PATTERN}/
+
+ # Expression indexes require additional statistics in `pg_statistic`:
+ # select * from pg_statistic where starelid = (select oid from pg_class where relname = 'some_index');
+ #
+ # In PG12, this has been fixed in https://gitlab.com/postgres/postgres/-/commit/b17ff07aa3eb142d2cde2ea00e4a4e8f63686f96.
+ # Discussion happened in https://www.postgresql.org/message-id/flat/CAFcNs%2BqpFPmiHd1oTXvcPdvAHicJDA9qBUSujgAhUMJyUMb%2BSA%40mail.gmail.com
+ # following a GitLab.com incident that surfaced this (https://gitlab.com/gitlab-com/gl-infra/production/-/issues/2885).
+ #
+ # While this has been backpatched, we continue to disable expression indexes until further review.
+ raise ReindexError, 'expression indexes are currently not supported' if index.expression?
+
+ begin
+ with_logging do
+ set_statement_timeout do
+ execute("REINDEX INDEX CONCURRENTLY #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
+ end
+ end
+ ensure
+ cleanup_dangling_indexes
+ end
+ end
+
+ private
+
+ def with_logging
+ bloat_size = index.bloat_size
+ ondisk_size_before = index.ondisk_size_bytes
+
+ logger.info(
+ message: "Starting reindex of #{index}",
+ index: index.identifier,
+ table: index.tablename,
+ estimated_bloat_bytes: bloat_size,
+ index_size_before_bytes: ondisk_size_before,
+ relative_bloat_level: index.relative_bloat_level
+ )
+
+ duration = Benchmark.realtime do
+ yield
+ end
+
+ index.reset
+
+ logger.info(
+ message: "Finished reindex of #{index}",
+ index: index.identifier,
+ table: index.tablename,
+ estimated_bloat_bytes: bloat_size,
+ index_size_before_bytes: ondisk_size_before,
+ index_size_after_bytes: index.ondisk_size_bytes,
+ relative_bloat_level: index.relative_bloat_level,
+ duration_s: duration.round(2)
+ )
+ end
+
+ def cleanup_dangling_indexes
+ Gitlab::Database::PostgresIndex.match("#{TEMPORARY_INDEX_PATTERN}$").each do |lingering_index|
+ # Example lingering index name: some_index_ccnew1
+
+ # Example prefix: 'some_index'
+ prefix = lingering_index.name.gsub(/#{TEMPORARY_INDEX_PATTERN}/, '')
+
+ # Example suffix: '_ccnew1'
+ suffix = lingering_index.name.match(/#{TEMPORARY_INDEX_PATTERN}/)[0]
+
+ # Only remove if the lingering index name could have been chosen
+ # as a result of a REINDEX operation (considering that PostgreSQL
+ # truncates index names to 63 chars and adds a suffix).
+ if index.name[0...PG_MAX_INDEX_NAME_LENGTH - suffix.length] == prefix
+ remove_index(lingering_index)
+ end
+ end
+ end
+
+ def remove_index(index)
+ logger.info("Removing dangling index #{index.identifier}")
+
+ retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
+ timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
+ klass: self.class,
+ logger: logger
+ )
+
+ retries.run(raise_on_exhaustion: false) do
+ execute("DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
+ end
+ end
+
+ def with_lock_retries(&block)
+ arguments = { klass: self.class, logger: logger }
+ Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
+ end
+
+ def set_statement_timeout
+ execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
+ yield
+ ensure
+ execute('RESET statement_timeout')
+ end
+
+ delegate :execute, :quote_table_name, to: :connection
+ def connection
+ @connection ||= ActiveRecord::Base.connection
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/schema_cleaner.rb b/lib/gitlab/database/schema_cleaner.rb
index 8f93da2b66c..c3cdcf1450d 100644
--- a/lib/gitlab/database/schema_cleaner.rb
+++ b/lib/gitlab/database/schema_cleaner.rb
@@ -30,11 +30,7 @@ module Gitlab
structure.gsub!(/\n{3,}/, "\n\n")
io << structure.strip
- io << <<~MSG
- -- schema_migrations.version information is no longer stored in this file,
- -- but instead tracked in the db/schema_migrations directory
- -- see https://gitlab.com/gitlab-org/gitlab/-/issues/218590 for details
- MSG
+ io << "\n"
nil
end
diff --git a/lib/gitlab/database/schema_migrations.rb b/lib/gitlab/database/schema_migrations.rb
new file mode 100644
index 00000000000..1c49ed8d946
--- /dev/null
+++ b/lib/gitlab/database/schema_migrations.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module SchemaMigrations
+ def self.touch_all(connection)
+ context = Gitlab::Database::SchemaMigrations::Context.new(connection)
+
+ Gitlab::Database::SchemaMigrations::Migrations.new(context).touch_all
+ end
+
+ def self.load_all(connection)
+ context = Gitlab::Database::SchemaMigrations::Context.new(connection)
+
+ Gitlab::Database::SchemaMigrations::Migrations.new(context).load_all
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/schema_migrations/context.rb b/lib/gitlab/database/schema_migrations/context.rb
new file mode 100644
index 00000000000..bd8b9bed2c1
--- /dev/null
+++ b/lib/gitlab/database/schema_migrations/context.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module SchemaMigrations
+ class Context
+ attr_reader :connection
+
+ def initialize(connection)
+ @connection = connection
+ end
+
+ def schema_directory
+ @schema_directory ||=
+ if ActiveRecord::Base.configurations.primary?(database_name)
+ File.join(db_dir, 'schema_migrations')
+ else
+ File.join(db_dir, "#{database_name}_schema_migrations")
+ end
+ end
+
+ def versions_to_create
+ versions_from_database = @connection.schema_migration.all_versions
+ versions_from_migration_files = @connection.migration_context.migrations.map { |m| m.version.to_s }
+
+ versions_from_database & versions_from_migration_files
+ end
+
+ private
+
+ def database_name
+ @database_name ||= @connection.pool.db_config.name
+ end
+
+ def db_dir
+ @db_dir ||= Rails.application.config.paths["db"].first
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/schema_migrations/migrations.rb b/lib/gitlab/database/schema_migrations/migrations.rb
new file mode 100644
index 00000000000..3b16b7f1b81
--- /dev/null
+++ b/lib/gitlab/database/schema_migrations/migrations.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module SchemaMigrations
+ class Migrations
+ MIGRATION_VERSION_GLOB = '20[0-9][0-9]*'
+
+ def initialize(context)
+ @context = context
+ end
+
+ def touch_all
+ return unless @context.versions_to_create.any?
+
+ version_filepaths = version_filenames.map { |f| File.join(schema_directory, f) }
+ FileUtils.rm(version_filepaths)
+
+ @context.versions_to_create.each do |version|
+ version_filepath = File.join(schema_directory, version)
+
+ File.open(version_filepath, 'w') do |file|
+ file << Digest::SHA256.hexdigest(version)
+ end
+ end
+ end
+
+ def load_all
+ return if version_filenames.empty?
+
+ values = version_filenames.map { |vf| "('#{@context.connection.quote_string(vf)}')" }
+
+ @context.connection.execute(<<~SQL)
+ INSERT INTO schema_migrations (version)
+ VALUES #{values.join(',')}
+ ON CONFLICT DO NOTHING
+ SQL
+ end
+
+ private
+
+ def schema_directory
+ @context.schema_directory
+ end
+
+ def version_filenames
+ @version_filenames ||= Dir.glob(MIGRATION_VERSION_GLOB, base: schema_directory)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/schema_version_files.rb b/lib/gitlab/database/schema_version_files.rb
deleted file mode 100644
index 27a942404ef..00000000000
--- a/lib/gitlab/database/schema_version_files.rb
+++ /dev/null
@@ -1,64 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- class SchemaVersionFiles
- SCHEMA_DIRECTORY = 'db/schema_migrations'
- MIGRATION_DIRECTORIES = %w[db/migrate db/post_migrate].freeze
- MIGRATION_VERSION_GLOB = '20[0-9][0-9]*'
-
- def self.touch_all(versions_from_database)
- versions_from_migration_files = find_versions_from_migration_files
-
- version_filepaths = find_version_filenames.map { |f| schema_directory.join(f) }
- FileUtils.rm(version_filepaths)
-
- versions_to_create = versions_from_database & versions_from_migration_files
- versions_to_create.each do |version|
- version_filepath = schema_directory.join(version)
-
- File.open(version_filepath, 'w') do |file|
- file << Digest::SHA256.hexdigest(version)
- end
- end
- end
-
- def self.load_all
- version_filenames = find_version_filenames
- return if version_filenames.empty?
-
- values = version_filenames.map { |vf| "('#{connection.quote_string(vf)}')" }
- connection.execute(<<~SQL)
- INSERT INTO schema_migrations (version)
- VALUES #{values.join(',')}
- ON CONFLICT DO NOTHING
- SQL
- end
-
- def self.schema_directory
- @schema_directory ||= Rails.root.join(SCHEMA_DIRECTORY)
- end
-
- def self.migration_directories
- @migration_directories ||= MIGRATION_DIRECTORIES.map { |dir| Rails.root.join(dir) }
- end
-
- def self.find_version_filenames
- Dir.glob(MIGRATION_VERSION_GLOB, base: schema_directory)
- end
-
- def self.find_versions_from_migration_files
- migration_directories.each_with_object([]) do |directory, migration_versions|
- directory_migrations = Dir.glob(MIGRATION_VERSION_GLOB, base: directory)
- directory_versions = directory_migrations.map! { |m| m.split('_').first }
-
- migration_versions.concat(directory_versions)
- end
- end
-
- def self.connection
- ActiveRecord::Base.connection
- end
- end
- end
-end