Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2020-09-19 04:45:44 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2020-09-19 04:45:44 +0300
commit85dc423f7090da0a52c73eb66faf22ddb20efff9 (patch)
tree9160f299afd8c80c038f08e1545be119f5e3f1e1 /lib/gitlab/database
parent15c2c8c66dbe422588e5411eee7e68f1fa440bb8 (diff)
Add latest changes from gitlab-org/gitlab@13-4-stable-ee
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/background_migration_job.rb2
-rw-r--r--lib/gitlab/database/concurrent_reindex.rb143
-rw-r--r--lib/gitlab/database/custom_structure.rb3
-rw-r--r--lib/gitlab/database/migration_helpers.rb19
-rw-r--r--lib/gitlab/database/partitioning/partition_monitoring.rb34
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb4
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb85
-rw-r--r--lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb2
-rw-r--r--lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb2
-rw-r--r--lib/gitlab/database/schema_cleaner.rb11
10 files changed, 272 insertions, 33 deletions
diff --git a/lib/gitlab/database/background_migration_job.rb b/lib/gitlab/database/background_migration_job.rb
index 445735b232a..1b9d7cbc9a1 100644
--- a/lib/gitlab/database/background_migration_job.rb
+++ b/lib/gitlab/database/background_migration_job.rb
@@ -3,6 +3,8 @@
module Gitlab
module Database
class BackgroundMigrationJob < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord
+ include EachBatch
+
self.table_name = :background_migration_jobs
scope :for_migration_class, -> (class_name) { where(class_name: normalize_class_name(class_name)) }
diff --git a/lib/gitlab/database/concurrent_reindex.rb b/lib/gitlab/database/concurrent_reindex.rb
new file mode 100644
index 00000000000..485ab35e55d
--- /dev/null
+++ b/lib/gitlab/database/concurrent_reindex.rb
@@ -0,0 +1,143 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class ConcurrentReindex
+ include Gitlab::Utils::StrongMemoize
+ include MigrationHelpers
+
+ ReindexError = Class.new(StandardError)
+
+ PG_IDENTIFIER_LENGTH = 63
+ TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
+ REPLACED_INDEX_PREFIX = 'old_reindex_'
+
+ attr_reader :index_name, :logger
+
+ def initialize(index_name, logger:)
+ @index_name = index_name
+ @logger = logger
+ end
+
+ def execute
+ raise ReindexError, "index #{index_name} does not exist" unless index_exists?
+
+ raise ReindexError, 'UNIQUE indexes are currently not supported' if index_unique?
+
+ logger.debug("dropping dangling index from previous run: #{replacement_index_name}")
+ remove_replacement_index
+
+ begin
+ create_replacement_index
+
+ unless replacement_index_valid?
+ message = 'replacement index was created as INVALID'
+ logger.error("#{message}, cleaning up")
+ raise ReindexError, "failed to reindex #{index_name}: #{message}"
+ end
+
+ swap_replacement_index
+ rescue Gitlab::Database::WithLockRetries::AttemptsExhaustedError => e
+ logger.error('failed to obtain the required database locks to swap the indexes, cleaning up')
+ raise ReindexError, e.message
+ rescue ActiveRecord::ActiveRecordError, PG::Error => e
+ logger.error("database error while attempting reindex of #{index_name}: #{e.message}")
+ raise ReindexError, e.message
+ ensure
+ logger.info("dropping unneeded replacement index: #{replacement_index_name}")
+ remove_replacement_index
+ end
+ end
+
+ private
+
+ def connection
+ @connection ||= ActiveRecord::Base.connection
+ end
+
+ def replacement_index_name
+ @replacement_index_name ||= constrained_index_name(TEMPORARY_INDEX_PREFIX)
+ end
+
+ def index
+ strong_memoize(:index) do
+ find_index(index_name)
+ end
+ end
+
+ def index_exists?
+ !index.nil?
+ end
+
+ def index_unique?
+ index.indisunique
+ end
+
+ def constrained_index_name(prefix)
+ "#{prefix}#{index_name}".slice(0, PG_IDENTIFIER_LENGTH)
+ end
+
+ def create_replacement_index
+ create_replacement_index_statement = index.indexdef
+ .sub(/CREATE INDEX/, 'CREATE INDEX CONCURRENTLY')
+ .sub(/#{index_name}/, replacement_index_name)
+
+ logger.info("creating replacement index #{replacement_index_name}")
+ logger.debug("replacement index definition: #{create_replacement_index_statement}")
+
+ disable_statement_timeout do
+ connection.execute(create_replacement_index_statement)
+ end
+ end
+
+ def replacement_index_valid?
+ find_index(replacement_index_name).indisvalid
+ end
+
+ def find_index(index_name)
+ record = connection.select_one(<<~SQL)
+ SELECT
+ pg_index.indisunique,
+ pg_index.indisvalid,
+ pg_indexes.indexdef
+ FROM pg_index
+ INNER JOIN pg_class ON pg_class.oid = pg_index.indexrelid
+ INNER JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
+ INNER JOIN pg_indexes ON pg_class.relname = pg_indexes.indexname
+ WHERE pg_namespace.nspname = 'public'
+ AND pg_class.relname = #{connection.quote(index_name)}
+ SQL
+
+ OpenStruct.new(record) if record
+ end
+
+ def swap_replacement_index
+ replaced_index_name = constrained_index_name(REPLACED_INDEX_PREFIX)
+
+ logger.info("swapping replacement index #{replacement_index_name} with #{index_name}")
+
+ with_lock_retries do
+ rename_index(index_name, replaced_index_name)
+ rename_index(replacement_index_name, index_name)
+ rename_index(replaced_index_name, replacement_index_name)
+ end
+ end
+
+ def rename_index(old_index_name, new_index_name)
+ connection.execute("ALTER INDEX #{old_index_name} RENAME TO #{new_index_name}")
+ end
+
+ def remove_replacement_index
+ disable_statement_timeout do
+ connection.execute("DROP INDEX CONCURRENTLY IF EXISTS #{replacement_index_name}")
+ end
+ end
+
+ def with_lock_retries(&block)
+ arguments = { klass: self.class, logger: logger }
+
+ Gitlab::Database::WithLockRetries.new(arguments).run(raise_on_exhaustion: true, &block)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/custom_structure.rb b/lib/gitlab/database/custom_structure.rb
index c5a76c5a787..e4404e73a63 100644
--- a/lib/gitlab/database/custom_structure.rb
+++ b/lib/gitlab/database/custom_structure.rb
@@ -8,8 +8,7 @@ module Gitlab
def dump
File.open(self.class.custom_dump_filepath, 'wb') do |io|
io << "-- this file tracks custom GitLab data, such as foreign keys referencing partitioned tables\n"
- io << "-- more details can be found in the issue: https://gitlab.com/gitlab-org/gitlab/-/issues/201872\n"
- io << "SET search_path=public;\n\n"
+ io << "-- more details can be found in the issue: https://gitlab.com/gitlab-org/gitlab/-/issues/201872\n\n"
dump_partitioned_foreign_keys(io) if partitioned_foreign_keys_exist?
end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index b62b6e20dd5..723f0f6a308 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -87,7 +87,7 @@ module Gitlab
options = options.merge({ algorithm: :concurrently })
if index_exists?(table_name, column_name, options)
- Rails.logger.warn "Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}" # rubocop:disable Gitlab/RailsLogger
+ Gitlab::AppLogger.warn "Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}"
return
end
@@ -113,7 +113,7 @@ module Gitlab
options = options.merge({ algorithm: :concurrently })
unless index_exists?(table_name, column_name, options)
- Rails.logger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}" # rubocop:disable Gitlab/RailsLogger
+ Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}"
return
end
@@ -143,7 +143,7 @@ module Gitlab
options = options.merge({ algorithm: :concurrently })
unless index_exists_by_name?(table_name, index_name)
- Rails.logger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, index_name: #{index_name}" # rubocop:disable Gitlab/RailsLogger
+ Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, index_name: #{index_name}"
return
end
@@ -163,7 +163,6 @@ module Gitlab
# defaults to "CASCADE".
# name - The name of the foreign key.
#
- # rubocop:disable Gitlab/RailsLogger
def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, name: nil, validate: true)
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
@@ -183,7 +182,7 @@ module Gitlab
"source: #{source}, target: #{target}, column: #{options[:column]}, "\
"name: #{options[:name]}, on_delete: #{options[:on_delete]}"
- Rails.logger.warn warning_message
+ Gitlab::AppLogger.warn warning_message
else
# Using NOT VALID allows us to create a key without immediately
# validating it. This means we keep the ALTER TABLE lock only for a
@@ -217,7 +216,6 @@ module Gitlab
end
end
end
- # rubocop:enable Gitlab/RailsLogger
def validate_foreign_key(source, column, name: nil)
fk_name = name || concurrent_foreign_key_name(source, column)
@@ -540,10 +538,10 @@ module Gitlab
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
- def change_column_type_concurrently(table, column, new_type, type_cast_function: nil)
+ def change_column_type_concurrently(table, column, new_type, type_cast_function: nil, batch_column_name: :id)
temp_column = "#{column}_for_type_change"
- rename_column_concurrently(table, column, temp_column, type: new_type, type_cast_function: type_cast_function)
+ rename_column_concurrently(table, column, temp_column, type: new_type, type_cast_function: type_cast_function, batch_column_name: batch_column_name)
end
# Performs cleanup of a concurrent type change.
@@ -1085,7 +1083,6 @@ into similar problems in the future (e.g. when new tables are created).
# Should be unique per table (not per column)
# validate - Whether to validate the constraint in this call
#
- # rubocop:disable Gitlab/RailsLogger
def add_check_constraint(table, check, constraint_name, validate: true)
validate_check_constraint_name!(constraint_name)
@@ -1102,7 +1099,7 @@ into similar problems in the future (e.g. when new tables are created).
table: #{table}, check: #{check}, constraint name: #{constraint_name}
MESSAGE
- Rails.logger.warn warning_message
+ Gitlab::AppLogger.warn warning_message
else
# Only add the constraint without validating it
# Even though it is fast, ADD CONSTRAINT requires an EXCLUSIVE lock
@@ -1187,7 +1184,7 @@ into similar problems in the future (e.g. when new tables are created).
column #{table}.#{column} is already defined as `NOT NULL`
MESSAGE
- Rails.logger.warn warning_message
+ Gitlab::AppLogger.warn warning_message
end
end
diff --git a/lib/gitlab/database/partitioning/partition_monitoring.rb b/lib/gitlab/database/partitioning/partition_monitoring.rb
new file mode 100644
index 00000000000..9ec9ae684a5
--- /dev/null
+++ b/lib/gitlab/database/partitioning/partition_monitoring.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Partitioning
+ class PartitionMonitoring
+ attr_reader :models
+
+ def initialize(models = PartitionCreator.models)
+ @models = models
+ end
+
+ def report_metrics
+ models.each do |model|
+ strategy = model.partitioning_strategy
+
+ gauge_present.set({ table: model.table_name }, strategy.current_partitions.size)
+ gauge_missing.set({ table: model.table_name }, strategy.missing_partitions.size)
+ end
+ end
+
+ private
+
+ def gauge_present
+ @gauge_present ||= Gitlab::Metrics.gauge(:db_partitions_present, 'Number of database partitions present')
+ end
+
+ def gauge_missing
+ @gauge_missing ||= Gitlab::Metrics.gauge(:db_partitions_missing, 'Number of database partitions currently expected, but not present')
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb
index 1fb9476b7d9..2def3a4d3a9 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb
@@ -31,7 +31,7 @@ module Gitlab
current_keys << specified_key
else
- Rails.logger.warn "foreign key not added because it already exists: #{specified_key}" # rubocop:disable Gitlab/RailsLogger
+ Gitlab::AppLogger.warn "foreign key not added because it already exists: #{specified_key}"
current_keys
end
end
@@ -56,7 +56,7 @@ module Gitlab
existing_key.delete
current_keys.delete(existing_key)
else
- Rails.logger.warn "foreign key not removed because it doesn't exist: #{specified_key}" # rubocop:disable Gitlab/RailsLogger
+ Gitlab::AppLogger.warn "foreign key not removed because it doesn't exist: #{specified_key}"
end
current_keys
diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
index 84b6fb9f76e..f7b0306b769 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
@@ -6,6 +6,7 @@ module Gitlab
module TableManagementHelpers
include ::Gitlab::Database::SchemaHelpers
include ::Gitlab::Database::DynamicModelHelpers
+ include ::Gitlab::Database::MigrationHelpers
include ::Gitlab::Database::Migrations::BackgroundMigrationHelpers
ALLOWED_TABLES = %w[audit_events].freeze
@@ -15,6 +16,12 @@ module Gitlab
BATCH_INTERVAL = 2.minutes.freeze
BATCH_SIZE = 50_000
+ JobArguments = Struct.new(:start_id, :stop_id, :source_table_name, :partitioned_table_name, :source_column) do
+ def self.from_array(arguments)
+ self.new(*arguments)
+ end
+ end
+
# Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column.
# One partition is created per month between the given `min_date` and `max_date`. Also installs a trigger on
# the original table to copy writes into the partitioned table. To copy over historic data from before creation
@@ -134,6 +141,42 @@ module Gitlab
end
end
+ # Executes cleanup tasks from a previous BackgroundMigration to backfill a partitioned table by finishing
+ # pending jobs and performing a final data synchronization.
+ # This performs two steps:
+ # 1. Wait to finish any pending BackgroundMigration jobs that have not succeeded
+ # 2. Inline copy any missed rows from the original table to the partitioned table
+ #
+ # **NOTE** Migrations using this method cannot be scheduled in the same release as the migration that
+ # schedules the background migration using the `enqueue_background_migration` helper, or else the
+ # background migration jobs will be force-executed.
+ #
+ # Example:
+ #
+ # finalize_backfilling_partitioned_table :audit_events
+ #
+ def finalize_backfilling_partitioned_table(table_name)
+ assert_table_is_allowed(table_name)
+ assert_not_in_transaction_block(scope: ERROR_SCOPE)
+
+ partitioned_table_name = make_partitioned_table_name(table_name)
+ unless table_exists?(partitioned_table_name)
+ raise "could not find partitioned table for #{table_name}, " \
+ "this could indicate the previous partitioning migration has been rolled back."
+ end
+
+ Gitlab::BackgroundMigration.steal(MIGRATION_CLASS_NAME) do |raw_arguments|
+ JobArguments.from_array(raw_arguments).source_table_name == table_name.to_s
+ end
+
+ primary_key = connection.primary_key(table_name)
+ copy_missed_records(table_name, partitioned_table_name, primary_key)
+
+ disable_statement_timeout do
+ execute("VACUUM FREEZE ANALYZE #{partitioned_table_name}")
+ end
+ end
+
private
def assert_table_is_allowed(table_name)
@@ -161,10 +204,8 @@ module Gitlab
def create_range_partitioned_copy(source_table_name, partitioned_table_name, partition_column, primary_key)
if table_exists?(partitioned_table_name)
- # rubocop:disable Gitlab/RailsLogger
- Rails.logger.warn "Partitioned table not created because it already exists" \
+ Gitlab::AppLogger.warn "Partitioned table not created because it already exists" \
" (this may be due to an aborted migration or similar): table_name: #{partitioned_table_name} "
- # rubocop:enable Gitlab/RailsLogger
return
end
@@ -217,10 +258,8 @@ module Gitlab
def create_range_partition_safely(partition_name, table_name, lower_bound, upper_bound)
if table_exists?(table_for_range_partition(partition_name))
- # rubocop:disable Gitlab/RailsLogger
- Rails.logger.warn "Partition not created because it already exists" \
+ Gitlab::AppLogger.warn "Partition not created because it already exists" \
" (this may be due to an aborted migration or similar): partition_name: #{partition_name}"
- # rubocop:enable Gitlab/RailsLogger
return
end
@@ -241,10 +280,8 @@ module Gitlab
def create_sync_function(name, partitioned_table_name, unique_key)
if function_exists?(name)
- # rubocop:disable Gitlab/RailsLogger
- Rails.logger.warn "Partitioning sync function not created because it already exists" \
+ Gitlab::AppLogger.warn "Partitioning sync function not created because it already exists" \
" (this may be due to an aborted migration or similar): function name: #{name}"
- # rubocop:enable Gitlab/RailsLogger
return
end
@@ -276,17 +313,15 @@ module Gitlab
def create_sync_trigger(table_name, trigger_name, function_name)
if trigger_exists?(table_name, trigger_name)
- # rubocop:disable Gitlab/RailsLogger
- Rails.logger.warn "Partitioning sync trigger not created because it already exists" \
+ Gitlab::AppLogger.warn "Partitioning sync trigger not created because it already exists" \
" (this may be due to an aborted migration or similar): trigger name: #{trigger_name}"
- # rubocop:enable Gitlab/RailsLogger
return
end
create_trigger(table_name, trigger_name, function_name, fires: 'AFTER INSERT OR UPDATE OR DELETE')
end
- def enqueue_background_migration(source_table_name, partitioned_table_name, source_key)
+ def enqueue_background_migration(source_table_name, partitioned_table_name, source_column)
source_model = define_batchable_model(source_table_name)
queue_background_migration_jobs_by_range_at_intervals(
@@ -294,13 +329,35 @@ module Gitlab
MIGRATION_CLASS_NAME,
BATCH_INTERVAL,
batch_size: BATCH_SIZE,
- other_job_arguments: [source_table_name.to_s, partitioned_table_name, source_key],
+ other_job_arguments: [source_table_name.to_s, partitioned_table_name, source_column],
track_jobs: true)
end
def cleanup_migration_jobs(table_name)
::Gitlab::Database::BackgroundMigrationJob.for_partitioning_migration(MIGRATION_CLASS_NAME, table_name).delete_all
end
+
+ def copy_missed_records(source_table_name, partitioned_table_name, source_column)
+ backfill_table = BackfillPartitionedTable.new
+ relation = ::Gitlab::Database::BackgroundMigrationJob.pending
+ .for_partitioning_migration(MIGRATION_CLASS_NAME, source_table_name)
+
+ relation.each_batch do |batch|
+ batch.each do |pending_migration_job|
+ job_arguments = JobArguments.from_array(pending_migration_job.arguments)
+ start_id = job_arguments.start_id
+ stop_id = job_arguments.stop_id
+
+ say("Backfilling data into partitioned table for ids from #{start_id} to #{stop_id}")
+ job_updated_count = backfill_table.perform(start_id, stop_id, source_table_name,
+ partitioned_table_name, source_column)
+
+ unless job_updated_count > 0
+ raise "failed to update tracking record for ids from #{start_id} to #{stop_id}"
+ end
+ end
+ end
+ end
end
end
end
diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
index 4fbbfdc4914..562e651cabc 100644
--- a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
+++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
@@ -71,7 +71,7 @@ module Gitlab
unless gitlab_shell.mv_namespace(repository_storage, old_full_path, new_full_path)
message = "Exception moving on shard #{repository_storage} from #{old_full_path} to #{new_full_path}"
- Rails.logger.error message # rubocop:disable Gitlab/RailsLogger
+ Gitlab::AppLogger.error message
end
end
end
diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb
index 8b92b296408..5dbf30bad4e 100644
--- a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb
+++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb
@@ -56,7 +56,7 @@ module Gitlab
unless gitlab_shell.mv_repository(project.repository_storage,
old_path,
new_path)
- Rails.logger.error "Error moving #{old_path} to #{new_path}" # rubocop:disable Gitlab/RailsLogger
+ Gitlab::AppLogger.error "Error moving #{old_path} to #{new_path}"
end
end
diff --git a/lib/gitlab/database/schema_cleaner.rb b/lib/gitlab/database/schema_cleaner.rb
index 7c415287878..8f93da2b66c 100644
--- a/lib/gitlab/database/schema_cleaner.rb
+++ b/lib/gitlab/database/schema_cleaner.rb
@@ -18,11 +18,18 @@ module Gitlab
structure.gsub!(/^SELECT pg_catalog\.set_config\('search_path'.+/, '')
structure.gsub!(/^--.*/, "\n")
- structure = "SET search_path=public;\n" + structure
+ # We typically don't assume we're working with the public schema.
+ # pg_dump uses fully qualified object names though, since we have multiple schemas
+ # in the database.
+ #
+ # The intention here is to not introduce an assumption about the standard schema,
+ # unless we have a good reason to do so.
+ structure.gsub!(/public\.(\w+)/, '\1')
+ structure.gsub!(/CREATE EXTENSION IF NOT EXISTS (\w+) WITH SCHEMA public;/, 'CREATE EXTENSION IF NOT EXISTS \1;')
structure.gsub!(/\n{3,}/, "\n\n")
- io << structure
+ io << structure.strip
io << <<~MSG
-- schema_migrations.version information is no longer stored in this file,
-- but instead tracked in the db/schema_migrations directory