Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/background_migration/batched_migration.rb14
-rw-r--r--lib/gitlab/database/background_migration/health_status.rb2
-rw-r--r--lib/gitlab/database/batch_average_counter.rb103
-rw-r--r--lib/gitlab/database/batch_count.rb6
-rw-r--r--lib/gitlab/database/batch_counter.rb31
-rw-r--r--lib/gitlab/database/gitlab_schemas.yml8
-rw-r--r--lib/gitlab/database/lock_writes_manager.rb45
-rw-r--r--lib/gitlab/database/migration_helpers.rb15
-rw-r--r--lib/gitlab/database/migrations/base_background_runner.rb2
-rw-r--r--lib/gitlab/database/migrations/test_background_runner.rb1
-rw-r--r--lib/gitlab/database/migrations/test_batched_background_runner.rb75
-rw-r--r--lib/gitlab/database/partitioning.rb12
-rw-r--r--lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb214
-rw-r--r--lib/gitlab/database/partitioning/partition_manager.rb25
-rw-r--r--lib/gitlab/database/partitioning/single_numeric_list_partition.rb20
-rw-r--r--lib/gitlab/database/partitioning/sliding_list_strategy.rb17
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb34
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb48
-rw-r--r--lib/gitlab/database/postgres_constraint.rb29
-rw-r--r--lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb41
-rw-r--r--lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb2
-rw-r--r--lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb2
-rw-r--r--lib/gitlab/database/reflection.rb2
-rw-r--r--lib/gitlab/database/reindexing.rb2
-rw-r--r--lib/gitlab/database/tables_sorted_by_foreign_keys.rb41
-rw-r--r--lib/gitlab/database/tables_truncate.rb96
26 files changed, 820 insertions, 67 deletions
diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb
index 6aed1eed994..45f52765d0f 100644
--- a/lib/gitlab/database/background_migration/batched_migration.rb
+++ b/lib/gitlab/database/background_migration/batched_migration.rb
@@ -8,6 +8,7 @@ module Gitlab
BATCH_CLASS_MODULE = "#{JOB_CLASS_MODULE}::BatchingStrategies"
MAXIMUM_FAILED_RATIO = 0.5
MINIMUM_JOBS = 50
+ FINISHED_PROGRESS_VALUE = 100
self.table_name = :batched_background_migrations
@@ -24,6 +25,7 @@ module Gitlab
scope :queue_order, -> { order(id: :asc) }
scope :queued, -> { with_statuses(:active, :paused) }
+ scope :ordered_by_created_at_desc, -> { order(created_at: :desc) }
# on_hold_until is a temporary runtime status which puts execution "on hold"
scope :executable, -> { with_status(:active).where('on_hold_until IS NULL OR on_hold_until < NOW()') }
@@ -57,11 +59,11 @@ module Gitlab
state :finalizing, value: 5
event :pause do
- transition any => :paused
+ transition [:active, :paused] => :paused
end
event :execute do
- transition any => :active
+ transition [:active, :paused, :failed] => :active
end
event :finish do
@@ -231,7 +233,15 @@ module Gitlab
"BatchedMigration[id: #{id}]"
end
+ # Computes an estimation of the progress of the migration in percents.
+ #
+ # Because `total_tuple_count` is an estimation of the tuples based on DB statistics
+ # when the migration is complete there can actually be more or less tuples that initially
+ # estimated as `total_tuple_count` so the progress may not show 100%. For that reason when
+ # we know migration completed successfully, we just return the 100 value
def progress
+ return FINISHED_PROGRESS_VALUE if finished?
+
return unless total_tuple_count.to_i > 0
100 * migrated_tuple_count / total_tuple_count
diff --git a/lib/gitlab/database/background_migration/health_status.rb b/lib/gitlab/database/background_migration/health_status.rb
index 9a283074b32..506d2996ad5 100644
--- a/lib/gitlab/database/background_migration/health_status.rb
+++ b/lib/gitlab/database/background_migration/health_status.rb
@@ -18,7 +18,7 @@ module Gitlab
indicator.new(migration.health_context).evaluate
rescue StandardError => e
Gitlab::ErrorTracking.track_exception(e, migration_id: migration.id,
- job_class_name: migration.job_class_name)
+ job_class_name: migration.job_class_name)
Signals::Unknown.new(indicator, reason: "unexpected error: #{e.message} (#{e.class})")
end
diff --git a/lib/gitlab/database/batch_average_counter.rb b/lib/gitlab/database/batch_average_counter.rb
new file mode 100644
index 00000000000..9cb1e34ab67
--- /dev/null
+++ b/lib/gitlab/database/batch_average_counter.rb
@@ -0,0 +1,103 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class BatchAverageCounter
+ COLUMN_FALLBACK = 0
+ DEFAULT_BATCH_SIZE = 1_000
+ FALLBACK = -1
+ MAX_ALLOWED_LOOPS = 10_000
+ OFFSET_BY_ONE = 1
+ SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
+
+ attr_reader :relation, :column
+
+ def initialize(relation, column)
+ @relation = relation
+ @column = wrap_column(relation, column)
+ end
+
+ def count(batch_size: nil)
+ raise 'BatchAverageCounter can not be run inside a transaction' if transaction_open?
+
+ batch_size = batch_size.presence || DEFAULT_BATCH_SIZE
+
+ start = column_start
+ finish = column_finish
+
+ total_sum = 0
+ total_records = 0
+
+ batch_start = start
+
+ while batch_start < finish
+ begin
+ batch_end = [batch_start + batch_size, finish].min
+ batch_relation = build_relation_batch(batch_start, batch_end)
+
+ # We use `sum` and `count` instead of `average` here to not run into an "average of averages"
+ # problem as batches will have different sizes, so we are essentially summing up the values for
+ # each batch separately, and then dividing that result on the total number of records.
+ batch_sum, batch_count = batch_relation.pick(column.sum, column.count)
+
+ total_sum += batch_sum.to_i
+ total_records += batch_count
+
+ batch_start = batch_end
+ rescue ActiveRecord::QueryCanceled => error # rubocop:disable Database/RescueQueryCanceled
+ # retry with a safe batch size & warmer cache
+ if batch_size >= 2 * DEFAULT_BATCH_SIZE
+ batch_size /= 2
+ else
+ log_canceled_batch_fetch(batch_start, batch_relation.to_sql, error)
+
+ return FALLBACK
+ end
+ end
+
+ sleep(SLEEP_TIME_IN_SECONDS)
+ end
+
+ return FALLBACK if total_records == 0
+
+ total_sum.to_f / total_records
+ end
+
+ private
+
+ def column_start
+ relation.unscope(:group, :having).minimum(column) || COLUMN_FALLBACK
+ end
+
+ def column_finish
+ (relation.unscope(:group, :having).maximum(column) || COLUMN_FALLBACK) + OFFSET_BY_ONE
+ end
+
+ def build_relation_batch(start, finish)
+ relation.where(column.between(start...finish))
+ end
+
+ def log_canceled_batch_fetch(batch_start, query, error)
+ Gitlab::AppJsonLogger
+ .error(
+ event: 'batch_count',
+ relation: relation.table_name,
+ operation: 'average',
+ start: batch_start,
+ query: query,
+ message: "Query has been canceled with message: #{error.message}"
+ )
+ end
+
+ def transaction_open?
+ relation.connection.transaction_open?
+ end
+
+ def wrap_column(relation, column)
+ return column if column.is_a?(Arel::Attributes::Attribute)
+
+ relation.arel_table[column]
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb
index 92a41bb36ee..7a064fb4005 100644
--- a/lib/gitlab/database/batch_count.rb
+++ b/lib/gitlab/database/batch_count.rb
@@ -35,6 +35,10 @@ module Gitlab
BatchCounter.new(relation, column: column).count(batch_size: batch_size, start: start, finish: finish)
end
+ def batch_count_with_timeout(relation, column = nil, batch_size: nil, start: nil, finish: nil, timeout: nil, partial_results: nil)
+ BatchCounter.new(relation, column: column).count_with_timeout(batch_size: batch_size, start: start, finish: finish, timeout: timeout, partial_results: partial_results)
+ end
+
def batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size, start: start, finish: finish)
end
@@ -44,7 +48,7 @@ module Gitlab
end
def batch_average(relation, column, batch_size: nil, start: nil, finish: nil)
- BatchCounter.new(relation, column: nil, operation: :average, operation_args: [column]).count(batch_size: batch_size, start: start, finish: finish)
+ BatchAverageCounter.new(relation, column).count(batch_size: batch_size)
end
class << self
diff --git a/lib/gitlab/database/batch_counter.rb b/lib/gitlab/database/batch_counter.rb
index 522b598cd9d..abb62140503 100644
--- a/lib/gitlab/database/batch_counter.rb
+++ b/lib/gitlab/database/batch_counter.rb
@@ -6,7 +6,6 @@ module Gitlab
FALLBACK = -1
MIN_REQUIRED_BATCH_SIZE = 1_250
DEFAULT_SUM_BATCH_SIZE = 1_000
- DEFAULT_AVERAGE_BATCH_SIZE = 1_000
MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
ALLOWED_MODES = [:itself, :distinct].freeze
@@ -27,12 +26,19 @@ module Gitlab
def unwanted_configuration?(finish, batch_size, start)
(@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) ||
(@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) ||
- (@operation == :average && batch_size < DEFAULT_AVERAGE_BATCH_SIZE) ||
(finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
start >= finish
end
def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
+ result = count_with_timeout(batch_size: batch_size, mode: mode, start: start, finish: finish, timeout: nil)
+
+ return FALLBACK if result[:status] != :completed
+
+ result[:count]
+ end
+
+ def count_with_timeout(batch_size: nil, mode: :itself, start: nil, finish: nil, timeout: nil, partial_results: nil)
raise 'BatchCount can not be run inside a transaction' if transaction_open?
check_mode!(mode)
@@ -44,12 +50,20 @@ module Gitlab
finish = actual_finish(finish)
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
- return FALLBACK if unwanted_configuration?(finish, batch_size, start)
+ return { status: :bad_config } if unwanted_configuration?(finish, batch_size, start)
- results = nil
+ results = partial_results
batch_start = start
+ start_time = ::Gitlab::Metrics::System.monotonic_time.seconds
+
while batch_start < finish
+
+ # Timeout elapsed, return partial result so the caller can continue later
+ if timeout && ::Gitlab::Metrics::System.monotonic_time.seconds - start_time > timeout
+ return { status: :timeout, partial_results: results, continue_from: batch_start }
+ end
+
begin
batch_end = [batch_start + batch_size, finish].min
batch_relation = build_relation_batch(batch_start, batch_end, mode)
@@ -62,14 +76,14 @@ module Gitlab
batch_size /= 2
else
log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error)
- return FALLBACK
+ return { status: :cancelled }
end
end
sleep(SLEEP_TIME_IN_SECONDS)
end
- results
+ { status: :completed, count: results }
end
def transaction_open?
@@ -94,7 +108,6 @@ module Gitlab
def batch_size_for_mode_and_operation(mode, operation)
return DEFAULT_SUM_BATCH_SIZE if operation == :sum
- return DEFAULT_AVERAGE_BATCH_SIZE if operation == :average
mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
end
@@ -132,10 +145,6 @@ module Gitlab
message: "Query has been canceled with message: #{error.message}"
)
end
-
- def not_group_by_query?
- !@relation.is_a?(ActiveRecord::Relation) || @relation.group_values.blank?
- end
end
end
end
diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml
index d05eee7d6e6..5725d7a4503 100644
--- a/lib/gitlab/database/gitlab_schemas.yml
+++ b/lib/gitlab/database/gitlab_schemas.yml
@@ -91,6 +91,7 @@ ci_job_artifact_states: :gitlab_ci
ci_minutes_additional_packs: :gitlab_ci
ci_namespace_monthly_usages: :gitlab_ci
ci_namespace_mirrors: :gitlab_ci
+ci_partitions: :gitlab_ci
ci_pending_builds: :gitlab_ci
ci_pipeline_artifacts: :gitlab_ci
ci_pipeline_chat_data: :gitlab_ci
@@ -182,6 +183,7 @@ design_management_versions: :gitlab_main
design_user_mentions: :gitlab_main
detached_partitions: :gitlab_shared
diff_note_positions: :gitlab_main
+dora_configurations: :gitlab_main
dora_daily_metrics: :gitlab_main
draft_notes: :gitlab_main
elastic_index_settings: :gitlab_main
@@ -228,6 +230,7 @@ geo_repository_deleted_events: :gitlab_main
geo_repository_renamed_events: :gitlab_main
geo_repository_updated_events: :gitlab_main
geo_reset_checksum_events: :gitlab_main
+ghost_user_migrations: :gitlab_main
gitlab_subscription_histories: :gitlab_main
gitlab_subscriptions: :gitlab_main
gpg_keys: :gitlab_main
@@ -315,6 +318,7 @@ merge_request_diff_details: :gitlab_main
merge_request_diff_files: :gitlab_main
merge_request_diffs: :gitlab_main
merge_request_metrics: :gitlab_main
+merge_request_predictions: :gitlab_main
merge_request_reviewers: :gitlab_main
merge_requests_closing_issues: :gitlab_main
merge_requests: :gitlab_main
@@ -380,6 +384,7 @@ packages_events: :gitlab_main
packages_helm_file_metadata: :gitlab_main
packages_maven_metadata: :gitlab_main
packages_npm_metadata: :gitlab_main
+packages_rpm_metadata: :gitlab_main
packages_nuget_dependency_link_metadata: :gitlab_main
packages_nuget_metadata: :gitlab_main
packages_package_file_build_infos: :gitlab_main
@@ -399,6 +404,7 @@ plans: :gitlab_main
pool_repositories: :gitlab_main
postgres_async_indexes: :gitlab_shared
postgres_autovacuum_activity: :gitlab_shared
+postgres_constraints: :gitlab_shared
postgres_foreign_keys: :gitlab_shared
postgres_index_bloat_estimates: :gitlab_shared
postgres_indexes: :gitlab_shared
@@ -479,6 +485,7 @@ sbom_components: :gitlab_main
sbom_occurrences: :gitlab_main
sbom_component_versions: :gitlab_main
sbom_sources: :gitlab_main
+sbom_vulnerable_component_versions: :gitlab_main
schema_migrations: :gitlab_internal
scim_identities: :gitlab_main
scim_oauth_access_tokens: :gitlab_main
@@ -549,6 +556,7 @@ user_statuses: :gitlab_main
user_synced_attributes_metadata: :gitlab_main
verification_codes: :gitlab_main
vulnerabilities: :gitlab_main
+vulnerability_advisories: :gitlab_main
vulnerability_exports: :gitlab_main
vulnerability_external_issue_links: :gitlab_main
vulnerability_feedback: :gitlab_main
diff --git a/lib/gitlab/database/lock_writes_manager.rb b/lib/gitlab/database/lock_writes_manager.rb
index cd483d616bb..fe75cd763b4 100644
--- a/lib/gitlab/database/lock_writes_manager.rb
+++ b/lib/gitlab/database/lock_writes_manager.rb
@@ -5,42 +5,63 @@ module Gitlab
class LockWritesManager
TRIGGER_FUNCTION_NAME = 'gitlab_schema_prevent_write'
- def initialize(table_name:, connection:, database_name:, logger: nil)
+ def initialize(table_name:, connection:, database_name:, logger: nil, dry_run: false)
@table_name = table_name
@connection = connection
@database_name = database_name
@logger = logger
+ @dry_run = dry_run
+ end
+
+ def table_locked_for_writes?(table_name)
+ query = <<~SQL
+ SELECT COUNT(*) from information_schema.triggers
+ WHERE event_object_table = '#{table_name}'
+ AND trigger_name = '#{write_trigger_name(table_name)}'
+ SQL
+
+ connection.select_value(query) == 3
end
def lock_writes
+ if table_locked_for_writes?(table_name)
+ logger&.info "Skipping lock_writes, because #{table_name} is already locked for writes"
+ return
+ end
+
logger&.info "Database: '#{database_name}', Table: '#{table_name}': Lock Writes".color(:yellow)
- sql = <<-SQL
- DROP TRIGGER IF EXISTS #{write_trigger_name(table_name)} ON #{table_name};
+ sql_statement = <<~SQL
CREATE TRIGGER #{write_trigger_name(table_name)}
BEFORE INSERT OR UPDATE OR DELETE OR TRUNCATE
ON #{table_name}
FOR EACH STATEMENT EXECUTE FUNCTION #{TRIGGER_FUNCTION_NAME}();
SQL
- with_retries(connection) do
- connection.execute(sql)
- end
+ execute_sql_statement(sql_statement)
end
def unlock_writes
logger&.info "Database: '#{database_name}', Table: '#{table_name}': Allow Writes".color(:green)
- sql = <<-SQL
- DROP TRIGGER IF EXISTS #{write_trigger_name(table_name)} ON #{table_name}
+ sql_statement = <<~SQL
+ DROP TRIGGER IF EXISTS #{write_trigger_name(table_name)} ON #{table_name};
SQL
- with_retries(connection) do
- connection.execute(sql)
- end
+ execute_sql_statement(sql_statement)
end
private
- attr_reader :table_name, :connection, :database_name, :logger
+ attr_reader :table_name, :connection, :database_name, :logger, :dry_run
+
+ def execute_sql_statement(sql)
+ if dry_run
+ logger&.info sql
+ else
+ with_retries(connection) do
+ connection.execute(sql)
+ end
+ end
+ end
def with_retries(connection, &block)
with_statement_timeout_retries do
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index db39524f4f6..e574422ce11 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -936,13 +936,14 @@ module Gitlab
def revert_backfill_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
columns = Array.wrap(columns)
- conditions = ActiveRecord::Base.sanitize_sql([
- 'job_class_name = :job_class_name AND table_name = :table_name AND column_name = :column_name AND job_arguments = :job_arguments',
- job_class_name: 'CopyColumnUsingBackgroundMigrationJob',
- table_name: table,
- column_name: primary_key,
- job_arguments: [columns, columns.map { |column| convert_to_bigint_column(column) }].to_json
- ])
+ conditions = ActiveRecord::Base.sanitize_sql(
+ [
+ 'job_class_name = :job_class_name AND table_name = :table_name AND column_name = :column_name AND job_arguments = :job_arguments',
+ job_class_name: 'CopyColumnUsingBackgroundMigrationJob',
+ table_name: table,
+ column_name: primary_key,
+ job_arguments: [columns, columns.map { |column| convert_to_bigint_column(column) }].to_json
+ ])
execute("DELETE FROM batched_background_migrations WHERE #{conditions}")
end
diff --git a/lib/gitlab/database/migrations/base_background_runner.rb b/lib/gitlab/database/migrations/base_background_runner.rb
index a9440cafd30..76982a9da9b 100644
--- a/lib/gitlab/database/migrations/base_background_runner.rb
+++ b/lib/gitlab/database/migrations/base_background_runner.rb
@@ -40,7 +40,7 @@ module Gitlab
instrumentation = Instrumentation.new(result_dir: per_background_migration_result_dir)
batch_names = (1..).each.lazy.map { |i| "batch_#{i}" }
- jobs.shuffle.each do |j|
+ jobs.each do |j|
break if run_until <= Time.current
instrumentation.observe(version: nil,
diff --git a/lib/gitlab/database/migrations/test_background_runner.rb b/lib/gitlab/database/migrations/test_background_runner.rb
index f7713237b38..6da2e098d43 100644
--- a/lib/gitlab/database/migrations/test_background_runner.rb
+++ b/lib/gitlab/database/migrations/test_background_runner.rb
@@ -15,6 +15,7 @@ module Gitlab
def jobs_by_migration_name
traditional_background_migrations.group_by { |j| class_name_for_job(j) }
+ .transform_values(&:shuffle)
end
private
diff --git a/lib/gitlab/database/migrations/test_batched_background_runner.rb b/lib/gitlab/database/migrations/test_batched_background_runner.rb
index f38d847b0e8..c27ae6a2c5d 100644
--- a/lib/gitlab/database/migrations/test_batched_background_runner.rb
+++ b/lib/gitlab/database/migrations/test_batched_background_runner.rb
@@ -4,6 +4,7 @@ module Gitlab
module Database
module Migrations
class TestBatchedBackgroundRunner < BaseBackgroundRunner
+ include Gitlab::Database::DynamicModelHelpers
attr_reader :connection
def initialize(result_dir:, connection:)
@@ -18,31 +19,81 @@ module Gitlab
.to_h do |migration|
batching_strategy = migration.batch_class.new(connection: connection)
- all_migration_jobs = []
+ smallest_batch_start = migration.next_min_value
- min_value = migration.next_min_value
+ table_max_value = define_batchable_model(migration.table_name, connection: connection)
+ .maximum(migration.column_name)
- while (next_bounds = batching_strategy.next_batch(
- migration.table_name,
- migration.column_name,
- batch_min_value: min_value,
- batch_size: migration.batch_size,
- job_arguments: migration.job_arguments
- ))
+ largest_batch_start = table_max_value - migration.batch_size
+
+ # variance is the portion of the batch range that we shrink between variance * 0 and variance * 1
+ # to pick actual batches to sample.
+ variance = largest_batch_start - smallest_batch_start
+
+ batch_starts = uniform_fractions
+ .lazy # frac varies from 0 to 1, values in smallest_batch_start..largest_batch_start
+ .map { |frac| (variance * frac).to_i + smallest_batch_start }
+
+ # Track previously run batches so that we stop sampling if a new batch would intersect an older one
+ completed_batches = []
+
+ jobs_to_sample = batch_starts
+ # Stop sampling if a batch would intersect a previous batch
+ .take_while { |start| completed_batches.none? { |batch| batch.cover?(start) } }
+ .map do |batch_start|
+ next_bounds = batching_strategy.next_batch(
+ migration.table_name,
+ migration.column_name,
+ batch_min_value: batch_start,
+ batch_size: migration.batch_size,
+ job_arguments: migration.job_arguments
+ )
batch_min, batch_max = next_bounds
- all_migration_jobs << migration.create_batched_job!(batch_min, batch_max)
- min_value = batch_max + 1
+ job = migration.create_batched_job!(batch_min, batch_max)
+
+ completed_batches << (batch_min..batch_max)
+
+ job
end
- [migration.job_class_name, all_migration_jobs]
+ [migration.job_class_name, jobs_to_sample]
end
end
def run_job(job)
Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper.new(connection: connection).perform(job)
end
+
+ def uniform_fractions
+ Enumerator.new do |y|
+ # Generates equally distributed fractions between 0 and 1, with increasing detail as more are pulled from
+ # the enumerator.
+ # 0, 1 (special case)
+ # 1/2
+ # 1/4, 3/4
+ # 1/8, 3/8, 5/8, 7/8
+ # etc.
+ # The pattern here is at each outer loop, the denominator multiplies by 2, and at each inner loop,
+ # the numerator counts up all odd numbers 1 <= n < denominator.
+ y << 0
+ y << 1
+
+ # denominators are each increasing power of 2
+ denominators = (1..).lazy.map { |exponent| 2**exponent }
+
+ denominators.each do |denominator|
+ # Numerators at the current step are all odd numbers between 1 and the denominator
+ numerators = (1..denominator).step(2)
+
+ numerators.each do |numerator|
+ next_frac = numerator.fdiv(denominator)
+ y << next_frac
+ end
+ end
+ end
+ end
end
end
end
diff --git a/lib/gitlab/database/partitioning.rb b/lib/gitlab/database/partitioning.rb
index 92825d41599..6314aff9914 100644
--- a/lib/gitlab/database/partitioning.rb
+++ b/lib/gitlab/database/partitioning.rb
@@ -33,6 +33,18 @@ module Gitlab
PartitionManager.new(model).sync_partitions
end
+ unless only_on
+ models_to_sync.each do |model|
+ next if model < ::Gitlab::Database::SharedModel && !(model < TableWithoutModel)
+
+ Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
+ if connection_name != model.connection_db_config.name
+ PartitionManager.new(model, connection: connection).sync_partitions
+ end
+ end
+ end
+ end
+
Gitlab::AppLogger.info(message: 'Finished sync of dynamic postgres partitions')
end
diff --git a/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb b/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb
new file mode 100644
index 00000000000..f45cf02ec9b
--- /dev/null
+++ b/lib/gitlab/database/partitioning/convert_table_to_first_list_partition.rb
@@ -0,0 +1,214 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Partitioning
+ class ConvertTableToFirstListPartition
+ UnableToPartition = Class.new(StandardError)
+
+ include Gitlab::Database::MigrationHelpers
+
+ SQL_STATEMENT_SEPARATOR = ";\n\n"
+
+ attr_reader :partitioning_column, :table_name, :parent_table_name, :zero_partition_value
+
+ def initialize(migration_context:, table_name:, parent_table_name:, partitioning_column:, zero_partition_value:)
+ @migration_context = migration_context
+ @connection = migration_context.connection
+ @table_name = table_name
+ @parent_table_name = parent_table_name
+ @partitioning_column = partitioning_column
+ @zero_partition_value = zero_partition_value
+ end
+
+ def prepare_for_partitioning
+ assert_existing_constraints_partitionable
+
+ add_partitioning_check_constraint
+ end
+
+ def revert_preparation_for_partitioning
+ migration_context.remove_check_constraint(table_name, partitioning_constraint.name)
+ end
+
+ def partition
+ assert_existing_constraints_partitionable
+ assert_partitioning_constraint_present
+ create_parent_table
+ attach_foreign_keys_to_parent
+
+ migration_context.with_lock_retries(raise_on_exhaustion: true) do
+ migration_context.execute(sql_to_convert_table)
+ end
+ end
+
+ def revert_partitioning
+ migration_context.with_lock_retries(raise_on_exhaustion: true) do
+ migration_context.execute(<<~SQL)
+ ALTER TABLE #{connection.quote_table_name(parent_table_name)}
+ DETACH PARTITION #{connection.quote_table_name(table_name)};
+ SQL
+
+ alter_sequences_sql = alter_sequence_statements(old_table: parent_table_name, new_table: table_name)
+ .join(SQL_STATEMENT_SEPARATOR)
+
+ migration_context.execute(alter_sequences_sql)
+
+ # This takes locks for all the foreign keys that the parent table had.
+ # However, those same locks were taken while detaching the partition, and we can't avoid that.
+ # If we dropped the foreign key before detaching the partition to avoid this locking,
+ # the drop would cascade to the child partitions and drop their foreign keys as well
+ migration_context.drop_table(parent_table_name)
+ end
+
+ add_partitioning_check_constraint
+ end
+
+ private
+
+ attr_reader :connection, :migration_context
+
+ delegate :quote_table_name, :quote_column_name, to: :connection
+
+ def sql_to_convert_table
+ # The critical statement here is the attach_table_to_parent statement.
+ # The following statements could be run in a later transaction,
+ # but they acquire the same locks so it's much faster to incude them
+ # here.
+ [
+ attach_table_to_parent_statement,
+ alter_sequence_statements(old_table: table_name, new_table: parent_table_name),
+ remove_constraint_statement
+ ].flatten.join(SQL_STATEMENT_SEPARATOR)
+ end
+
+ def table_identifier
+ "#{connection.current_schema}.#{table_name}"
+ end
+
+ def assert_existing_constraints_partitionable
+ violating_constraints = Gitlab::Database::PostgresConstraint
+ .by_table_identifier(table_identifier)
+ .primary_or_unique_constraints
+ .not_including_column(partitioning_column)
+ .to_a
+
+ return if violating_constraints.empty?
+
+ violation_messages = violating_constraints.map { |c| "#{c.name} on (#{c.column_names.join(', ')})" }
+
+ raise UnableToPartition, <<~MSG
+ Constraints on #{table_name} are incompatible with partitioning on #{partitioning_column}
+
+ All primary key and unique constraints must include the partitioning column.
+ Violations:
+ #{violation_messages.join("\n")}
+ MSG
+ end
+
+ def partitioning_constraint
+ constraints_on_column = Gitlab::Database::PostgresConstraint
+ .by_table_identifier(table_identifier)
+ .check_constraints
+ .valid
+ .including_column(partitioning_column)
+
+ constraints_on_column.to_a.find do |constraint|
+ constraint.definition == "CHECK ((#{partitioning_column} = #{zero_partition_value}))"
+ end
+ end
+
+ def assert_partitioning_constraint_present
+ return if partitioning_constraint
+
+ raise UnableToPartition, <<~MSG
+ Table #{table_name} is not ready for partitioning.
+ Before partitioning, a check constraint must enforce that (#{partitioning_column} = #{zero_partition_value})
+ MSG
+ end
+
+ def add_partitioning_check_constraint
+ return if partitioning_constraint.present?
+
+ check_body = "#{partitioning_column} = #{connection.quote(zero_partition_value)}"
+ # Any constraint name would work. The constraint is found based on its definition before partitioning
+ migration_context.add_check_constraint(table_name, check_body, 'partitioning_constraint')
+
+ raise UnableToPartition, 'Error adding partitioning constraint' unless partitioning_constraint.present?
+ end
+
+ def create_parent_table
+ migration_context.execute(<<~SQL)
+ CREATE TABLE IF NOT EXISTS #{quote_table_name(parent_table_name)} (
+ LIKE #{quote_table_name(table_name)} INCLUDING ALL
+ ) PARTITION BY LIST(#{quote_column_name(partitioning_column)})
+ SQL
+ end
+
+ def attach_foreign_keys_to_parent
+ migration_context.foreign_keys(table_name).each do |fk|
+ # At this point no other connection knows about the parent table.
+ # Thus the only contended lock in the following transaction is on fk.to_table.
+ # So a deadlock is impossible.
+
+ # If we're rerunning this migration after a failure to acquire a lock, the foreign key might already exist.
+ # Don't try to recreate it in that case
+ if migration_context.foreign_keys(parent_table_name)
+ .any? { |p_fk| p_fk.options[:name] == fk.options[:name] }
+ next
+ end
+
+ migration_context.with_lock_retries(raise_on_exhaustion: true) do
+ migration_context.add_foreign_key(parent_table_name, fk.to_table, **fk.options)
+ end
+ end
+ end
+
+ def attach_table_to_parent_statement
+ <<~SQL
+ ALTER TABLE #{quote_table_name(parent_table_name)}
+ ATTACH PARTITION #{table_name}
+ FOR VALUES IN (#{zero_partition_value})
+ SQL
+ end
+
+ def alter_sequence_statements(old_table:, new_table:)
+ sequences_owned_by(old_table).map do |seq_info|
+ seq_name, column_name = seq_info.values_at(:name, :column_name)
+ <<~SQL.chomp
+ ALTER SEQUENCE #{quote_table_name(seq_name)} OWNED BY #{quote_table_name(new_table)}.#{quote_column_name(column_name)}
+ SQL
+ end
+ end
+
+ def remove_constraint_statement
+ <<~SQL
+ ALTER TABLE #{quote_table_name(parent_table_name)}
+ DROP CONSTRAINT #{quote_table_name(partitioning_constraint.name)}
+ SQL
+ end
+
+ # TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/373887
+ def sequences_owned_by(table_name)
+ sequence_data = connection.exec_query(<<~SQL, nil, [table_name])
+ SELECT seq_pg_class.relname AS seq_name,
+ dep_pg_class.relname AS table_name,
+ pg_attribute.attname AS col_name
+ FROM pg_class seq_pg_class
+ INNER JOIN pg_depend ON seq_pg_class.oid = pg_depend.objid
+ INNER JOIN pg_class dep_pg_class ON pg_depend.refobjid = dep_pg_class.oid
+ INNER JOIN pg_attribute ON dep_pg_class.oid = pg_attribute.attrelid
+ AND pg_depend.refobjsubid = pg_attribute.attnum
+ WHERE seq_pg_class.relkind = 'S'
+ AND dep_pg_class.relname = $1
+ SQL
+
+ sequence_data.map do |seq_info|
+ name, column_name = seq_info.values_at('seq_name', 'col_name')
+ { name: name, column_name: column_name }
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/partitioning/partition_manager.rb b/lib/gitlab/database/partitioning/partition_manager.rb
index aac91eaadb1..55ca9ff8645 100644
--- a/lib/gitlab/database/partitioning/partition_manager.rb
+++ b/lib/gitlab/database/partitioning/partition_manager.rb
@@ -10,12 +10,15 @@ module Gitlab
MANAGEMENT_LEASE_KEY = 'database_partition_management_%s'
RETAIN_DETACHED_PARTITIONS_FOR = 1.week
- def initialize(model)
+ def initialize(model, connection: nil)
@model = model
- @connection_name = model.connection.pool.db_config.name
+ @connection = connection || model.connection
+ @connection_name = @connection.pool.db_config.name
end
def sync_partitions
+ return skip_synching_partitions unless table_partitioned?
+
Gitlab::AppLogger.info(
message: "Checking state of dynamic postgres partitions",
table_name: model.table_name,
@@ -43,9 +46,7 @@ module Gitlab
private
- attr_reader :model
-
- delegate :connection, to: :model
+ attr_reader :model, :connection
def missing_partitions
return [] unless connection.table_exists?(model.table_name)
@@ -129,6 +130,20 @@ module Gitlab
connection: connection
).run(&block)
end
+
+ def table_partitioned?
+ Gitlab::Database::SharedModel.using_connection(connection) do
+ Gitlab::Database::PostgresPartitionedTable.find_by_name_in_current_schema(model.table_name).present?
+ end
+ end
+
+ def skip_synching_partitions
+ Gitlab::AppLogger.warn(
+ message: "Skipping synching partitions",
+ table_name: model.table_name,
+ connection_name: @connection_name
+ )
+ end
end
end
end
diff --git a/lib/gitlab/database/partitioning/single_numeric_list_partition.rb b/lib/gitlab/database/partitioning/single_numeric_list_partition.rb
index 23ac73a0e53..4e38eea963b 100644
--- a/lib/gitlab/database/partitioning/single_numeric_list_partition.rb
+++ b/lib/gitlab/database/partitioning/single_numeric_list_partition.rb
@@ -8,7 +8,7 @@ module Gitlab
def self.from_sql(table, partition_name, definition)
# A list partition can support multiple values, but we only support a single number
- matches = definition.match(/FOR VALUES IN \('(?<value>\d+)'\)/)
+ matches = definition.match(/FOR VALUES IN \('?(?<value>\d+)'?\)/)
raise ArgumentError, 'Unknown partition definition' unless matches
@@ -29,17 +29,21 @@ module Gitlab
@partition_name || "#{table}_#{value}"
end
+ def data_size
+ execute("SELECT pg_table_size(#{quote(full_partition_name)})").first['pg_table_size']
+ end
+
def to_sql
<<~SQL
CREATE TABLE IF NOT EXISTS #{fully_qualified_partition}
- PARTITION OF #{conn.quote_table_name(table)}
- FOR VALUES IN (#{conn.quote(value)})
+ PARTITION OF #{quote_table_name(table)}
+ FOR VALUES IN (#{quote(value)})
SQL
end
def to_detach_sql
<<~SQL
- ALTER TABLE #{conn.quote_table_name(table)}
+ ALTER TABLE #{quote_table_name(table)}
DETACH PARTITION #{fully_qualified_partition}
SQL
end
@@ -63,8 +67,14 @@ module Gitlab
private
+ delegate :execute, :quote, :quote_table_name, to: :conn, private: true
+
+ def full_partition_name
+ "%s.%s" % [Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA, partition_name]
+ end
+
def fully_qualified_partition
- "%s.%s" % [conn.quote_table_name(Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA), conn.quote_table_name(partition_name)]
+ quote_table_name(full_partition_name)
end
def conn
diff --git a/lib/gitlab/database/partitioning/sliding_list_strategy.rb b/lib/gitlab/database/partitioning/sliding_list_strategy.rb
index 4b5349f0327..5bb34a86d43 100644
--- a/lib/gitlab/database/partitioning/sliding_list_strategy.rb
+++ b/lib/gitlab/database/partitioning/sliding_list_strategy.rb
@@ -14,7 +14,7 @@ module Gitlab
@next_partition_if = next_partition_if
@detach_partition_if = detach_partition_if
- ensure_partitioning_column_ignored!
+ ensure_partitioning_column_ignored_or_readonly!
end
def current_partitions
@@ -26,7 +26,7 @@ module Gitlab
def missing_partitions
if no_partitions_exist?
[initial_partition]
- elsif next_partition_if.call(active_partition.value)
+ elsif next_partition_if.call(active_partition)
[next_partition]
else
[]
@@ -44,7 +44,7 @@ module Gitlab
def extra_partitions
possibly_extra = current_partitions[0...-1] # Never consider the most recent partition
- extra = possibly_extra.take_while { |p| detach_partition_if.call(p.value) }
+ extra = possibly_extra.take_while { |p| detach_partition_if.call(p) }
default_value = current_default_value
if extra.any? { |p| p.value == default_value }
@@ -128,12 +128,17 @@ module Gitlab
Integer(value)
end
- def ensure_partitioning_column_ignored!
- unless model.ignored_columns.include?(partitioning_key.to_s)
- raise "Add #{partitioning_key} to #{model.name}.ignored_columns to use it with SlidingListStrategy"
+ def ensure_partitioning_column_ignored_or_readonly!
+ unless key_ignored_or_readonly?
+ raise "Add #{partitioning_key} to #{model.name}.ignored_columns or " \
+ "mark it as readonly to use it with SlidingListStrategy"
end
end
+ def key_ignored_or_readonly?
+ model.ignored_columns.include?(partitioning_key.to_s) || model.readonly_attribute?(partitioning_key.to_s)
+ end
+
def with_lock_retries(&block)
Gitlab::Database::WithLockRetries.new(
klass: self.class,
diff --git a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
index c9a3b5caf79..15b542cf089 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
@@ -77,8 +77,42 @@ module Gitlab
end
end
+ # Finds duplicate indexes for a given schema and table. This finds
+ # indexes where the index definition is identical but the names are
+ # different. Returns an array of arrays containing duplicate index name
+ # pairs.
+ #
+ # Example:
+ #
+ # find_duplicate_indexes('table_name_goes_here')
+ def find_duplicate_indexes(table_name, schema_name: connection.current_schema)
+ find_indexes(table_name, schema_name: schema_name)
+ .group_by { |r| r['index_id'] }
+ .select { |_, v| v.size > 1 }
+ .map { |_, indexes| indexes.map { |index| index['index_name'] } }
+ end
+
private
+ def find_indexes(table_name, schema_name: connection.current_schema)
+ indexes = connection.select_all(<<~SQL, 'SQL', [schema_name, table_name])
+ SELECT n.nspname AS schema_name,
+ c.relname AS table_name,
+ i.relname AS index_name,
+ regexp_replace(pg_get_indexdef(i.oid), 'INDEX .*? USING', '_') AS index_id
+ FROM pg_index x
+ JOIN pg_class c ON c.oid = x.indrelid
+ JOIN pg_class i ON i.oid = x.indexrelid
+ LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+ WHERE (c.relkind = ANY (ARRAY['r'::"char", 'm'::"char", 'p'::"char"]))
+ AND (i.relkind = ANY (ARRAY['i'::"char", 'I'::"char"]))
+ AND n.nspname = $1
+ AND c.relname = $2;
+ SQL
+
+ indexes.to_a
+ end
+
def find_partitioned_table(table_name)
partitioned_table = Gitlab::Database::PostgresPartitionedTable.find_by_name_in_current_schema(table_name)
diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
index a541ecf5316..695a5d7ec77 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
@@ -251,6 +251,54 @@ module Gitlab
create_sync_trigger(source_table_name, trigger_name, function_name)
end
+ def prepare_constraint_for_list_partitioning(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:)
+ validate_not_in_transaction!(:prepare_constraint_for_list_partitioning)
+
+ Gitlab::Database::Partitioning::ConvertTableToFirstListPartition
+ .new(migration_context: self,
+ table_name: table_name,
+ parent_table_name: parent_table_name,
+ partitioning_column: partitioning_column,
+ zero_partition_value: initial_partitioning_value
+ ).prepare_for_partitioning
+ end
+
+ def revert_preparing_constraint_for_list_partitioning(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:)
+ validate_not_in_transaction!(:revert_preparing_constraint_for_list_partitioning)
+
+ Gitlab::Database::Partitioning::ConvertTableToFirstListPartition
+ .new(migration_context: self,
+ table_name: table_name,
+ parent_table_name: parent_table_name,
+ partitioning_column: partitioning_column,
+ zero_partition_value: initial_partitioning_value
+ ).revert_preparation_for_partitioning
+ end
+
+ def convert_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:)
+ validate_not_in_transaction!(:convert_table_to_first_list_partition)
+
+ Gitlab::Database::Partitioning::ConvertTableToFirstListPartition
+ .new(migration_context: self,
+ table_name: table_name,
+ parent_table_name: parent_table_name,
+ partitioning_column: partitioning_column,
+ zero_partition_value: initial_partitioning_value
+ ).partition
+ end
+
+ def revert_converting_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:)
+ validate_not_in_transaction!(:revert_converting_table_to_first_list_partition)
+
+ Gitlab::Database::Partitioning::ConvertTableToFirstListPartition
+ .new(migration_context: self,
+ table_name: table_name,
+ parent_table_name: parent_table_name,
+ partitioning_column: partitioning_column,
+ zero_partition_value: initial_partitioning_value
+ ).revert_partitioning
+ end
+
private
def assert_table_is_allowed(table_name)
diff --git a/lib/gitlab/database/postgres_constraint.rb b/lib/gitlab/database/postgres_constraint.rb
new file mode 100644
index 00000000000..fa590914332
--- /dev/null
+++ b/lib/gitlab/database/postgres_constraint.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ # Backed by the postgres_constraints view
+ class PostgresConstraint < SharedModel
+ IDENTIFIER_REGEX = /^\w+\.\w+$/.freeze
+ self.primary_key = :oid
+
+ scope :check_constraints, -> { where(constraint_type: 'c') }
+ scope :primary_key_constraints, -> { where(constraint_type: 'p') }
+ scope :unique_constraints, -> { where(constraint_type: 'u') }
+ scope :primary_or_unique_constraints, -> { where(constraint_type: %w[u p]) }
+
+ scope :including_column, ->(column) { where("? = ANY(column_names)", column) }
+ scope :not_including_column, ->(column) { where.not("? = ANY(column_names)", column) }
+
+ scope :valid, -> { where(constraint_valid: true) }
+
+ scope :by_table_identifier, ->(identifier) do
+ unless identifier =~ IDENTIFIER_REGEX
+ raise ArgumentError, "Table name is not fully qualified with a schema: #{identifier}"
+ end
+
+ where(table_identifier: identifier)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb b/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb
new file mode 100644
index 00000000000..c2d5dfc1a15
--- /dev/null
+++ b/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module QueryAnalyzers
+ module Ci
+ # The purpose of this analyzer is to detect queries not going through a partitioning routing table
+ class PartitioningAnalyzer < Database::QueryAnalyzers::Base
+ RoutingTableNotUsedError = Class.new(QueryAnalyzerError)
+
+ ENABLED_TABLES = %w[
+ ci_builds_metadata
+ ].freeze
+
+ class << self
+ def enabled?
+ ::Feature::FlipperFeature.table_exists? &&
+ ::Feature.enabled?(:ci_partitioning_analyze_queries, type: :ops)
+ end
+
+ def analyze(parsed)
+ analyze_legacy_tables_usage(parsed)
+ end
+
+ private
+
+ def analyze_legacy_tables_usage(parsed)
+ detected = ENABLED_TABLES & (parsed.pg.dml_tables + parsed.pg.select_tables)
+
+ return if detected.none?
+
+ ::Gitlab::ErrorTracking.track_and_raise_for_dev_exception(
+ RoutingTableNotUsedError.new("Detected non-partitioned table use #{detected.inspect}: #{parsed.sql}")
+ )
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb b/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb
index 06e2b114c91..b4b9161f0c2 100644
--- a/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb
+++ b/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb
@@ -14,7 +14,7 @@ module Gitlab
class << self
def enabled?
::Feature::FlipperFeature.table_exists? &&
- Feature.enabled?(:query_analyzer_gitlab_schema_metrics)
+ Feature.enabled?(:query_analyzer_gitlab_schema_metrics, type: :ops)
end
def analyze(parsed)
diff --git a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb
index e0cb803b872..3b1751c863d 100644
--- a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb
+++ b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb
@@ -33,7 +33,7 @@ module Gitlab
def self.enabled?
::Feature::FlipperFeature.table_exists? &&
- Feature.enabled?(:detect_cross_database_modification)
+ Feature.enabled?(:detect_cross_database_modification, type: :ops)
end
def self.requires_tracking?(parsed)
diff --git a/lib/gitlab/database/reflection.rb b/lib/gitlab/database/reflection.rb
index 3ea7277571f..33c965cb150 100644
--- a/lib/gitlab/database/reflection.rb
+++ b/lib/gitlab/database/reflection.rb
@@ -114,7 +114,7 @@ module Gitlab
'PostgreSQL on Amazon RDS' => { statement: 'SHOW rds.extensions', error: /PG::UndefinedObject/ },
# Based on https://cloud.google.com/sql/docs/postgres/flags#postgres-c this should be specific
# to Cloud SQL for PostgreSQL
- 'Cloud SQL for PostgreSQL' => { statement: 'SHOW cloudsql.iam_authentication', error: /PG::UndefinedObject/ },
+ 'Cloud SQL for PostgreSQL' => { statement: 'SHOW cloudsql.iam_authentication', error: /PG::UndefinedObject/ },
# Based on
# - https://docs.microsoft.com/en-us/azure/postgresql/flexible-server/concepts-extensions
# - https://docs.microsoft.com/en-us/azure/postgresql/concepts-extensions
diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb
index b96dffc99ac..aba45fcc57b 100644
--- a/lib/gitlab/database/reindexing.rb
+++ b/lib/gitlab/database/reindexing.rb
@@ -27,7 +27,7 @@ module Gitlab
# Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
- Gitlab::Database::AsyncIndexes.drop_pending_indexes! if Feature.enabled?(:database_async_index_destruction, type: :ops)
+ Gitlab::Database::AsyncIndexes.drop_pending_indexes!
automatic_reindexing
end
diff --git a/lib/gitlab/database/tables_sorted_by_foreign_keys.rb b/lib/gitlab/database/tables_sorted_by_foreign_keys.rb
new file mode 100644
index 00000000000..9f096904d31
--- /dev/null
+++ b/lib/gitlab/database/tables_sorted_by_foreign_keys.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class TablesSortedByForeignKeys
+ include TSort
+
+ def initialize(connection, tables)
+ @connection = connection
+ @tables = tables
+ end
+
+ def execute
+ strongly_connected_components
+ end
+
+ private
+
+ def tsort_each_node(&block)
+ tables_dependencies.each_key(&block)
+ end
+
+ def tsort_each_child(node, &block)
+ tables_dependencies[node].each(&block)
+ end
+
+ # it maps the tables to the tables that depend on it
+ def tables_dependencies
+ @tables.to_h do |table_name|
+ [table_name, all_foreign_keys[table_name]&.map(&:from_table).to_a]
+ end
+ end
+
+ def all_foreign_keys
+ @all_foreign_keys ||= @tables.flat_map do |table_name|
+ @connection.foreign_keys(table_name)
+ end.group_by(&:to_table)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/tables_truncate.rb b/lib/gitlab/database/tables_truncate.rb
new file mode 100644
index 00000000000..164520fbab3
--- /dev/null
+++ b/lib/gitlab/database/tables_truncate.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class TablesTruncate
+ GITLAB_SCHEMAS_TO_IGNORE = %i[gitlab_geo].freeze
+
+ def initialize(database_name:, min_batch_size:, logger: nil, until_table: nil, dry_run: false)
+ @database_name = database_name
+ @min_batch_size = min_batch_size
+ @logger = logger
+ @until_table = until_table
+ @dry_run = dry_run
+ end
+
+ def execute
+ raise "Cannot truncate legacy tables in single-db setup" unless Gitlab::Database.has_config?(:ci)
+ raise "database is not supported" unless %w[main ci].include?(database_name)
+
+ logger&.info "DRY RUN:" if dry_run
+
+ connection = Gitlab::Database.database_base_models[database_name].connection
+
+ schemas_for_connection = Gitlab::Database.gitlab_schemas_for_connection(connection)
+ tables_to_truncate = Gitlab::Database::GitlabSchema.tables_to_schema.reject do |_, schema_name|
+ (GITLAB_SCHEMAS_TO_IGNORE.union(schemas_for_connection)).include?(schema_name)
+ end.keys
+
+ tables_sorted = Gitlab::Database::TablesSortedByForeignKeys.new(connection, tables_to_truncate).execute
+ # Checking if all the tables have the write-lock triggers
+ # to make sure we are deleting the right tables on the right database.
+ tables_sorted.flatten.each do |table_name|
+ query = <<~SQL
+ SELECT COUNT(*) from information_schema.triggers
+ WHERE event_object_table = '#{table_name}'
+ AND trigger_name = 'gitlab_schema_write_trigger_for_#{table_name}'
+ SQL
+
+ if connection.select_value(query) == 0
+ raise "Table '#{table_name}' is not locked for writes. Run the rake task gitlab:db:lock_writes first"
+ end
+ end
+
+ if until_table
+ table_index = tables_sorted.find_index { |tables_group| tables_group.include?(until_table) }
+ raise "The table '#{until_table}' is not within the truncated tables" if table_index.nil?
+
+ tables_sorted = tables_sorted[0..table_index]
+ end
+
+ # min_batch_size is the minimum number of new tables to truncate at each stage.
+ # But in each stage we have also have to truncate the already truncated tables in the previous stages
+ logger&.info "Truncating legacy tables for the database #{database_name}"
+ truncate_tables_in_batches(connection, tables_sorted, min_batch_size)
+ end
+
+ private
+
+ attr_accessor :database_name, :min_batch_size, :logger, :dry_run, :until_table
+
+ def truncate_tables_in_batches(connection, tables_sorted, min_batch_size)
+ truncated_tables = []
+
+ tables_sorted.flatten.each do |table|
+ sql_statement = "SELECT set_config('lock_writes.#{table}', 'false', false)"
+ logger&.info(sql_statement)
+ connection.execute(sql_statement) unless dry_run
+ end
+
+ # We do the truncation in stages to avoid high IO
+ # In each stage, we truncate the new tables along with the already truncated
+ # tables before. That's because PostgreSQL doesn't allow to truncate any table (A)
+ # without truncating any other table (B) that has a Foreign Key pointing to the table (A).
+ # even if table (B) is empty, because it has been already truncated in a previous stage.
+ tables_sorted.in_groups_of(min_batch_size, false).each do |tables_groups|
+ new_tables_to_truncate = tables_groups.flatten
+ logger&.info "= New tables to truncate: #{new_tables_to_truncate.join(', ')}"
+ truncated_tables.push(*new_tables_to_truncate).tap(&:sort!)
+ sql_statements = [
+ "SET LOCAL statement_timeout = 0",
+ "SET LOCAL lock_timeout = 0",
+ "TRUNCATE TABLE #{truncated_tables.join(', ')} RESTRICT"
+ ]
+
+ sql_statements.each { |sql_statement| logger&.info(sql_statement) }
+
+ next if dry_run
+
+ connection.transaction do
+ sql_statements.each { |sql_statement| connection.execute(sql_statement) }
+ end
+ end
+ end
+ end
+ end
+end