Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2020-10-21 10:08:36 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2020-10-21 10:08:36 +0300
commit48aff82709769b098321c738f3444b9bdaa694c6 (patch)
treee00c7c43e2d9b603a5a6af576b1685e400410dee /lib/gitlab/database
parent879f5329ee916a948223f8f43d77fba4da6cd028 (diff)
Add latest changes from gitlab-org/gitlab@13-5-stable-eev13.5.0-rc42
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/batch_count.rb48
-rw-r--r--lib/gitlab/database/bulk_update.rb168
-rw-r--r--lib/gitlab/database/concurrent_reindex.rb143
-rw-r--r--lib/gitlab/database/count/reltuples_count_strategy.rb3
-rw-r--r--lib/gitlab/database/migration_helpers.rb196
-rw-r--r--lib/gitlab/database/partitioning/partition_creator.rb4
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb2
-rw-r--r--lib/gitlab/database/postgres_index.rb31
-rw-r--r--lib/gitlab/database/reindexing.rb18
-rw-r--r--lib/gitlab/database/reindexing/concurrent_reindex.rb127
-rw-r--r--lib/gitlab/database/reindexing/coordinator.rb41
-rw-r--r--lib/gitlab/database/reindexing/reindex_action.rb35
-rw-r--r--lib/gitlab/database/schema_helpers.rb17
-rw-r--r--lib/gitlab/database/similarity_score.rb11
-rw-r--r--lib/gitlab/database/with_lock_retries.rb8
15 files changed, 669 insertions, 183 deletions
diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb
index 1762b81b7d8..11d9881aac2 100644
--- a/lib/gitlab/database/batch_count.rb
+++ b/lib/gitlab/database/batch_count.rb
@@ -8,15 +8,20 @@
# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
# the start and finish can be sent specifically
#
+# Grouped relations can be used as well. However, the preferred batch count should be around 10K because group by count is more expensive.
+#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
#
# Examples:
# extend ::Gitlab::Database::BatchCount
# batch_count(User.active)
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
+# batch_count(Namespace.group(:type))
# batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
+# batch_distinct_count(Project.group(:visibility_level), :creator_id)
# batch_sum(User, :sign_in_count)
+# batch_sum(Issue.group(:state_id), :weight))
module Gitlab
module Database
module BatchCount
@@ -77,34 +82,45 @@ module Gitlab
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
return FALLBACK if unwanted_configuration?(finish, batch_size, start)
- counter = 0
+ results = nil
batch_start = start
while batch_start <= finish
+ batch_relation = build_relation_batch(batch_start, batch_start + batch_size, mode)
begin
- counter += batch_fetch(batch_start, batch_start + batch_size, mode)
+ results = merge_results(results, batch_relation.send(@operation, *@operation_args)) # rubocop:disable GitlabSecurity/PublicSend
batch_start += batch_size
- rescue ActiveRecord::QueryCanceled
+ rescue ActiveRecord::QueryCanceled => error
# retry with a safe batch size & warmer cache
if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE
batch_size /= 2
else
+ log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error)
return FALLBACK
end
end
sleep(SLEEP_TIME_IN_SECONDS)
end
- counter
+ results
end
- def batch_fetch(start, finish, mode)
- # rubocop:disable GitlabSecurity/PublicSend
- @relation.select(@column).public_send(mode).where(between_condition(start, finish)).send(@operation, *@operation_args)
+ def merge_results(results, object)
+ return object unless results
+
+ if object.is_a?(Hash)
+ results.merge!(object) { |_, a, b| a + b }
+ else
+ results + object
+ end
end
private
+ def build_relation_batch(start, finish, mode)
+ @relation.select(@column).public_send(mode).where(between_condition(start, finish)) # rubocop:disable GitlabSecurity/PublicSend
+ end
+
def batch_size_for_mode_and_operation(mode, operation)
return DEFAULT_SUM_BATCH_SIZE if operation == :sum
@@ -118,11 +134,11 @@ module Gitlab
end
def actual_start(start)
- start || @relation.minimum(@column) || 0
+ start || @relation.unscope(:group, :having).minimum(@column) || 0
end
def actual_finish(finish)
- finish || @relation.maximum(@column) || 0
+ finish || @relation.unscope(:group, :having).maximum(@column) || 0
end
def check_mode!(mode)
@@ -130,6 +146,20 @@ module Gitlab
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct
end
+
+ def log_canceled_batch_fetch(batch_start, mode, query, error)
+ Gitlab::AppJsonLogger
+ .error(
+ event: 'batch_count',
+ relation: @relation.table_name,
+ operation: @operation,
+ operation_args: @operation_args,
+ start: batch_start,
+ mode: mode,
+ query: query,
+ message: "Query has been canceled with message: #{error.message}"
+ )
+ end
end
end
end
diff --git a/lib/gitlab/database/bulk_update.rb b/lib/gitlab/database/bulk_update.rb
new file mode 100644
index 00000000000..1403d561890
--- /dev/null
+++ b/lib/gitlab/database/bulk_update.rb
@@ -0,0 +1,168 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ # Constructs queries of the form:
+ #
+ # with cte(a, b, c) as (
+ # select * from (values (:x, :y, :z), (:q, :r, :s)) as t
+ # )
+ # update table set b = cte.b, c = cte.c where a = cte.a
+ #
+ # Which is useful if you want to update a set of records in a single query
+ # but cannot express the update as a calculation (i.e. you have arbitrary
+ # updates to perform).
+ #
+ # The requirements are that the table must have an ID column used to
+ # identify the rows to be updated.
+ #
+ # Usage:
+ #
+ # mapping = {
+ # issue_a => { title: 'This title', relative_position: 100 },
+ # issue_b => { title: 'That title', relative_position: 173 }
+ # }
+ #
+ # ::Gitlab::Database::BulkUpdate.execute(%i[title relative_position], mapping)
+ #
+ # Note that this is a very low level tool, and operates on the raw column
+ # values. Enums/state fields must be translated into their underlying
+ # representations, for example, and no hooks will be called.
+ #
+ module BulkUpdate
+ LIST_SEPARATOR = ', '
+
+ class Setter
+ include Gitlab::Utils::StrongMemoize
+
+ def initialize(model, columns, mapping)
+ @table_name = model.table_name
+ @connection = model.connection
+ @columns = self.class.column_definitions(model, columns)
+ @mapping = self.class.value_mapping(mapping)
+ end
+
+ def update!
+ if without_prepared_statement?
+ # A workaround for https://github.com/rails/rails/issues/24893
+ # When prepared statements are prevented (such as when using the
+ # query counter or in omnibus by default), we cannot call
+ # `exec_update`, since that will discard the bindings.
+ connection.send(:exec_no_cache, sql, log_name, params) # rubocop: disable GitlabSecurity/PublicSend
+ else
+ connection.exec_update(sql, log_name, params)
+ end
+ end
+
+ def self.column_definitions(model, columns)
+ raise ArgumentError, 'invalid columns' if columns.blank? || columns.any? { |c| !c.is_a?(Symbol) }
+ raise ArgumentError, 'cannot set ID' if columns.include?(:id)
+
+ ([:id] | columns).map { |name| column_definition(model, name) }
+ end
+
+ def self.column_definition(model, name)
+ definition = model.column_for_attribute(name)
+ raise ArgumentError, "Unknown column: #{name}" unless definition.type
+
+ definition
+ end
+
+ def self.value_mapping(mapping)
+ raise ArgumentError, 'invalid mapping' if mapping.blank?
+ raise ArgumentError, 'invalid mapping value' if mapping.any? { |_k, v| !v.is_a?(Hash) }
+
+ mapping
+ end
+
+ private
+
+ attr_reader :table_name, :connection, :columns, :mapping
+
+ def log_name
+ strong_memoize(:log_name) do
+ "BulkUpdate #{table_name} #{columns.drop(1).map(&:name)}:#{mapping.size}"
+ end
+ end
+
+ def params
+ mapping.flat_map do |k, v|
+ obj_id = k.try(:id) || k
+ v = v.merge(id: obj_id)
+ columns.map { |c| query_attribute(c, k, v.with_indifferent_access) }
+ end
+ end
+
+ # A workaround for https://github.com/rails/rails/issues/24893
+ # We need to detect if prepared statements have been disabled.
+ def without_prepared_statement?
+ strong_memoize(:without_prepared_statement) do
+ connection.send(:without_prepared_statement?, [1]) # rubocop: disable GitlabSecurity/PublicSend
+ end
+ end
+
+ def query_attribute(column, key, values)
+ value = values[column.name]
+ key[column.name] = value if key.try(:id) # optimistic update
+ ActiveRecord::Relation::QueryAttribute.from_user(nil, value, ActiveModel::Type.lookup(column.type))
+ end
+
+ def values
+ counter = 0
+ typed = false
+
+ mapping.map do |k, v|
+ binds = columns.map do |c|
+ bind = "$#{counter += 1}"
+ # PG is not great at inferring types - help it for the first row.
+ bind += "::#{c.sql_type}" unless typed
+ bind
+ end
+ typed = true
+
+ "(#{list_of(binds)})"
+ end
+ end
+
+ def list_of(list)
+ list.join(LIST_SEPARATOR)
+ end
+
+ def sql
+ <<~SQL
+ WITH cte(#{list_of(cte_columns)}) AS (VALUES #{list_of(values)})
+ UPDATE #{table_name} SET #{list_of(updates)} FROM cte WHERE cte_id = id
+ SQL
+ end
+
+ def column_names
+ strong_memoize(:column_names) { columns.map(&:name) }
+ end
+
+ def cte_columns
+ strong_memoize(:cte_columns) do
+ column_names.map do |c|
+ connection.quote_column_name("cte_#{c}")
+ end
+ end
+ end
+
+ def updates
+ column_names.zip(cte_columns).drop(1).map do |dest, src|
+ "#{connection.quote_column_name(dest)} = cte.#{src}"
+ end
+ end
+ end
+
+ def self.execute(columns, mapping, &to_class)
+ raise ArgumentError if mapping.blank?
+
+ entries_by_class = mapping.group_by { |k, v| block_given? ? to_class.call(k) : k.class }
+
+ entries_by_class.each do |model, entries|
+ Setter.new(model, columns, entries).update!
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/concurrent_reindex.rb b/lib/gitlab/database/concurrent_reindex.rb
deleted file mode 100644
index 485ab35e55d..00000000000
--- a/lib/gitlab/database/concurrent_reindex.rb
+++ /dev/null
@@ -1,143 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- class ConcurrentReindex
- include Gitlab::Utils::StrongMemoize
- include MigrationHelpers
-
- ReindexError = Class.new(StandardError)
-
- PG_IDENTIFIER_LENGTH = 63
- TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
- REPLACED_INDEX_PREFIX = 'old_reindex_'
-
- attr_reader :index_name, :logger
-
- def initialize(index_name, logger:)
- @index_name = index_name
- @logger = logger
- end
-
- def execute
- raise ReindexError, "index #{index_name} does not exist" unless index_exists?
-
- raise ReindexError, 'UNIQUE indexes are currently not supported' if index_unique?
-
- logger.debug("dropping dangling index from previous run: #{replacement_index_name}")
- remove_replacement_index
-
- begin
- create_replacement_index
-
- unless replacement_index_valid?
- message = 'replacement index was created as INVALID'
- logger.error("#{message}, cleaning up")
- raise ReindexError, "failed to reindex #{index_name}: #{message}"
- end
-
- swap_replacement_index
- rescue Gitlab::Database::WithLockRetries::AttemptsExhaustedError => e
- logger.error('failed to obtain the required database locks to swap the indexes, cleaning up')
- raise ReindexError, e.message
- rescue ActiveRecord::ActiveRecordError, PG::Error => e
- logger.error("database error while attempting reindex of #{index_name}: #{e.message}")
- raise ReindexError, e.message
- ensure
- logger.info("dropping unneeded replacement index: #{replacement_index_name}")
- remove_replacement_index
- end
- end
-
- private
-
- def connection
- @connection ||= ActiveRecord::Base.connection
- end
-
- def replacement_index_name
- @replacement_index_name ||= constrained_index_name(TEMPORARY_INDEX_PREFIX)
- end
-
- def index
- strong_memoize(:index) do
- find_index(index_name)
- end
- end
-
- def index_exists?
- !index.nil?
- end
-
- def index_unique?
- index.indisunique
- end
-
- def constrained_index_name(prefix)
- "#{prefix}#{index_name}".slice(0, PG_IDENTIFIER_LENGTH)
- end
-
- def create_replacement_index
- create_replacement_index_statement = index.indexdef
- .sub(/CREATE INDEX/, 'CREATE INDEX CONCURRENTLY')
- .sub(/#{index_name}/, replacement_index_name)
-
- logger.info("creating replacement index #{replacement_index_name}")
- logger.debug("replacement index definition: #{create_replacement_index_statement}")
-
- disable_statement_timeout do
- connection.execute(create_replacement_index_statement)
- end
- end
-
- def replacement_index_valid?
- find_index(replacement_index_name).indisvalid
- end
-
- def find_index(index_name)
- record = connection.select_one(<<~SQL)
- SELECT
- pg_index.indisunique,
- pg_index.indisvalid,
- pg_indexes.indexdef
- FROM pg_index
- INNER JOIN pg_class ON pg_class.oid = pg_index.indexrelid
- INNER JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
- INNER JOIN pg_indexes ON pg_class.relname = pg_indexes.indexname
- WHERE pg_namespace.nspname = 'public'
- AND pg_class.relname = #{connection.quote(index_name)}
- SQL
-
- OpenStruct.new(record) if record
- end
-
- def swap_replacement_index
- replaced_index_name = constrained_index_name(REPLACED_INDEX_PREFIX)
-
- logger.info("swapping replacement index #{replacement_index_name} with #{index_name}")
-
- with_lock_retries do
- rename_index(index_name, replaced_index_name)
- rename_index(replacement_index_name, index_name)
- rename_index(replaced_index_name, replacement_index_name)
- end
- end
-
- def rename_index(old_index_name, new_index_name)
- connection.execute("ALTER INDEX #{old_index_name} RENAME TO #{new_index_name}")
- end
-
- def remove_replacement_index
- disable_statement_timeout do
- connection.execute("DROP INDEX CONCURRENTLY IF EXISTS #{replacement_index_name}")
- end
- end
-
- def with_lock_retries(&block)
- arguments = { klass: self.class, logger: logger }
-
- Gitlab::Database::WithLockRetries.new(arguments).run(raise_on_exhaustion: true, &block)
- end
- end
- end
-end
diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb
index e226ed7613a..89190320cf9 100644
--- a/lib/gitlab/database/count/reltuples_count_strategy.rb
+++ b/lib/gitlab/database/count/reltuples_count_strategy.rb
@@ -74,8 +74,9 @@ module Gitlab
def get_statistics(table_names, check_statistics: true)
time = 6.hours.ago
- query = PgClass.joins("LEFT JOIN pg_stat_user_tables USING (relname)")
+ query = PgClass.joins("LEFT JOIN pg_stat_user_tables ON pg_stat_user_tables.relid = pg_class.oid")
.where(relname: table_names)
+ .where('schemaname = current_schema()')
.select('pg_class.relname AS table_name, reltuples::bigint AS estimate')
if check_statistics
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 723f0f6a308..66b6ce1ec55 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -176,7 +176,7 @@ module Gitlab
name: name.presence || concurrent_foreign_key_name(source, column)
}
- if foreign_key_exists?(source, target, options)
+ if foreign_key_exists?(source, target, **options)
warning_message = "Foreign key not created because it exists already " \
"(this may be due to an aborted migration or similar): " \
"source: #{source}, target: #{target}, column: #{options[:column]}, "\
@@ -330,13 +330,13 @@ module Gitlab
# * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the block, sleep time before the next iteration, defaults to `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION`
# * +logger+ - [Gitlab::JsonLogger]
# * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES`
- def with_lock_retries(**args, &block)
+ def with_lock_retries(*args, **kwargs, &block)
merged_args = {
klass: self.class,
logger: Gitlab::BackgroundMigration::Logger
- }.merge(args)
+ }.merge(kwargs)
- Gitlab::Database::WithLockRetries.new(merged_args).run(&block)
+ Gitlab::Database::WithLockRetries.new(**merged_args).run(&block)
end
def true_value
@@ -544,6 +544,16 @@ module Gitlab
rename_column_concurrently(table, column, temp_column, type: new_type, type_cast_function: type_cast_function, batch_column_name: batch_column_name)
end
+ # Reverses operations performed by change_column_type_concurrently.
+ #
+ # table - The table containing the column.
+ # column - The name of the column to change.
+ def undo_change_column_type_concurrently(table, column)
+ temp_column = "#{column}_for_type_change"
+
+ undo_rename_column_concurrently(table, column, temp_column)
+ end
+
# Performs cleanup of a concurrent type change.
#
# table - The table containing the column.
@@ -560,6 +570,65 @@ module Gitlab
end
end
+ # Reverses operations performed by cleanup_concurrent_column_type_change.
+ #
+ # table - The table containing the column.
+ # column - The name of the column to change.
+ # old_type - The type of the original column used with change_column_type_concurrently.
+ # type_cast_function - Required if the conversion back to the original type is not automatic
+ # batch_column_name - option for tables without a primary key, in this case
+ # another unique integer column can be used. Example: :user_id
+ def undo_cleanup_concurrent_column_type_change(table, column, old_type, type_cast_function: nil, batch_column_name: :id)
+ temp_column = "#{column}_for_type_change"
+
+ # Using a descriptive name that includes orinal column's name risks
+ # taking us above the 63 character limit, so we use a hash
+ identifier = "#{table}_#{column}_for_type_change"
+ hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10)
+ temp_undo_cleanup_column = "tmp_undo_cleanup_column_#{hashed_identifier}"
+
+ unless column_exists?(table, batch_column_name)
+ raise "Column #{batch_column_name} does not exist on #{table}"
+ end
+
+ if transaction_open?
+ raise 'undo_cleanup_concurrent_column_type_change can not be run inside a transaction'
+ end
+
+ check_trigger_permissions!(table)
+
+ begin
+ create_column_from(
+ table,
+ column,
+ temp_undo_cleanup_column,
+ type: old_type,
+ batch_column_name: batch_column_name,
+ type_cast_function: type_cast_function
+ )
+
+ transaction do
+ # This has to be performed in a transaction as otherwise we might
+ # have inconsistent data.
+ rename_column(table, column, temp_column)
+ rename_column(table, temp_undo_cleanup_column, column)
+
+ install_rename_triggers(table, column, temp_column)
+ end
+ rescue
+ # create_column_from can not run inside a transaction, which means
+ # that there is a risk that if any of the operations that follow it
+ # fail, we'll be left with an inconsistent schema
+ # For those reasons, we make sure that we drop temp_undo_cleanup_column
+ # if an error is caught
+ if column_exists?(table, temp_undo_cleanup_column)
+ remove_column(table, temp_undo_cleanup_column)
+ end
+
+ raise
+ end
+ end
+
# Cleans up a concurrent column name.
#
# This method takes care of removing previously installed triggers as well
@@ -882,7 +951,7 @@ module Gitlab
# column.
opclasses[new] = opclasses.delete(old) if opclasses[old]
- options[:opclasses] = opclasses
+ options[:opclass] = opclasses
end
add_concurrent_index(table, new_columns, options)
@@ -994,10 +1063,10 @@ into similar problems in the future (e.g. when new tables are created).
def postgres_exists_by_name?(table, name)
index_sql = <<~SQL
SELECT COUNT(*)
- FROM pg_index
- JOIN pg_class i ON (indexrelid=i.oid)
- JOIN pg_class t ON (indrelid=t.oid)
- WHERE i.relname = '#{name}' AND t.relname = '#{table}'
+ FROM pg_catalog.pg_indexes
+ WHERE schemaname = #{connection.quote(current_schema)}
+ AND tablename = #{connection.quote(table)}
+ AND indexname = #{connection.quote(name)}
SQL
connection.select_value(index_sql).to_i > 0
@@ -1053,11 +1122,15 @@ into similar problems in the future (e.g. when new tables are created).
# the table name in addition to using the constraint_name
check_sql = <<~SQL
SELECT COUNT(*)
- FROM pg_constraint
- JOIN pg_class ON pg_constraint.conrelid = pg_class.oid
- WHERE pg_constraint.contype = 'c'
- AND pg_constraint.conname = '#{constraint_name}'
- AND pg_class.relname = '#{table}'
+ FROM pg_catalog.pg_constraint con
+ INNER JOIN pg_catalog.pg_class rel
+ ON rel.oid = con.conrelid
+ INNER JOIN pg_catalog.pg_namespace nsp
+ ON nsp.oid = con.connamespace
+ WHERE con.contype = 'c'
+ AND con.conname = #{connection.quote(constraint_name)}
+ AND nsp.nspname = #{connection.quote(current_schema)}
+ AND rel.relname = #{connection.quote(table)}
SQL
connection.select_value(check_sql) > 0
@@ -1147,6 +1220,64 @@ into similar problems in the future (e.g. when new tables are created).
end
end
+ # Copies all check constraints for the old column to the new column.
+ #
+ # table - The table containing the columns.
+ # old - The old column.
+ # new - The new column.
+ # schema - The schema the table is defined for
+ # If it is not provided, then the current_schema is used
+ def copy_check_constraints(table, old, new, schema: nil)
+ if transaction_open?
+ raise 'copy_check_constraints can not be run inside a transaction'
+ end
+
+ unless column_exists?(table, old)
+ raise "Column #{old} does not exist on #{table}"
+ end
+
+ unless column_exists?(table, new)
+ raise "Column #{new} does not exist on #{table}"
+ end
+
+ table_with_schema = schema.present? ? "#{schema}.#{table}" : table
+
+ check_constraints_for(table, old, schema: schema).each do |check_c|
+ validate = !(check_c["constraint_def"].end_with? "NOT VALID")
+
+ # Normalize:
+ # - Old constraint definitions:
+ # '(char_length(entity_path) <= 5500)'
+ # - Definitionss from pg_get_constraintdef(oid):
+ # 'CHECK ((char_length(entity_path) <= 5500))'
+ # - Definitions from pg_get_constraintdef(oid, pretty_bool):
+ # 'CHECK (char_length(entity_path) <= 5500)'
+ # - Not valid constraints: 'CHECK (...) NOT VALID'
+ # to a single format that we can use:
+ # '(char_length(entity_path) <= 5500)'
+ check_definition = check_c["constraint_def"]
+ .sub(/^\s*(CHECK)?\s*\({0,2}/, '(')
+ .sub(/\){0,2}\s*(NOT VALID)?\s*$/, ')')
+
+ constraint_name = begin
+ if check_definition == "(#{old} IS NOT NULL)"
+ not_null_constraint_name(table_with_schema, new)
+ elsif check_definition.start_with? "(char_length(#{old}) <="
+ text_limit_name(table_with_schema, new)
+ else
+ check_constraint_name(table_with_schema, new, 'copy_check_constraint')
+ end
+ end
+
+ add_check_constraint(
+ table_with_schema,
+ check_definition.gsub(old.to_s, new.to_s),
+ constraint_name,
+ validate: validate
+ )
+ end
+ end
+
# Migration Helpers for adding limit to text columns
def add_text_limit(table, column, limit, constraint_name: nil, validate: true)
add_check_constraint(
@@ -1274,6 +1405,37 @@ into similar problems in the future (e.g. when new tables are created).
end
end
+ # Returns an ActiveRecord::Result containing the check constraints
+ # defined for the given column.
+ #
+ # If the schema is not provided, then the current_schema is used
+ def check_constraints_for(table, column, schema: nil)
+ check_sql = <<~SQL
+ SELECT
+ ccu.table_schema as schema_name,
+ ccu.table_name as table_name,
+ ccu.column_name as column_name,
+ con.conname as constraint_name,
+ pg_get_constraintdef(con.oid) as constraint_def
+ FROM pg_catalog.pg_constraint con
+ INNER JOIN pg_catalog.pg_class rel
+ ON rel.oid = con.conrelid
+ INNER JOIN pg_catalog.pg_namespace nsp
+ ON nsp.oid = con.connamespace
+ INNER JOIN information_schema.constraint_column_usage ccu
+ ON con.conname = ccu.constraint_name
+ AND nsp.nspname = ccu.constraint_schema
+ AND rel.relname = ccu.table_name
+ WHERE nsp.nspname = #{connection.quote(schema.presence || current_schema)}
+ AND rel.relname = #{connection.quote(table)}
+ AND ccu.column_name = #{connection.quote(column)}
+ AND con.contype = 'c'
+ ORDER BY constraint_name
+ SQL
+
+ connection.exec_query(check_sql)
+ end
+
def statement_timeout_disabled?
# This is a string of the form "100ms" or "0" when disabled
connection.select_value('SHOW statement_timeout') == "0"
@@ -1284,8 +1446,9 @@ into similar problems in the future (e.g. when new tables are created).
check_sql = <<~SQL
SELECT c.is_nullable
FROM information_schema.columns c
- WHERE c.table_name = '#{table}'
- AND c.column_name = '#{column}'
+ WHERE c.table_schema = #{connection.quote(current_schema)}
+ AND c.table_name = #{connection.quote(table)}
+ AND c.column_name = #{connection.quote(column)}
SQL
connection.select_value(check_sql) == 'YES'
@@ -1352,6 +1515,7 @@ into similar problems in the future (e.g. when new tables are created).
copy_indexes(table, old, new)
copy_foreign_keys(table, old, new)
+ copy_check_constraints(table, old, new)
end
def validate_timestamp_column_name!(column_name)
diff --git a/lib/gitlab/database/partitioning/partition_creator.rb b/lib/gitlab/database/partitioning/partition_creator.rb
index 4c1b13fe3b5..547e0b9b957 100644
--- a/lib/gitlab/database/partitioning/partition_creator.rb
+++ b/lib/gitlab/database/partitioning/partition_creator.rb
@@ -72,10 +72,10 @@ module Gitlab
end
def with_lock_retries(&block)
- Gitlab::Database::WithLockRetries.new({
+ Gitlab::Database::WithLockRetries.new(
klass: self.class,
logger: Gitlab::AppLogger
- }).run(&block)
+ ).run(&block)
end
def connection
diff --git a/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb b/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb
index f9ad1e60776..17a42d997e6 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb
@@ -11,8 +11,6 @@ module Gitlab
PAUSE_SECONDS = 0.25
def perform(start_id, stop_id, source_table, partitioned_table, source_column)
- return unless Feature.enabled?(:backfill_partitioned_audit_events, default_enabled: true)
-
if transaction_open?
raise "Aborting job to backfill partitioned #{source_table} table! Do not run this job in a transaction block!"
end
diff --git a/lib/gitlab/database/postgres_index.rb b/lib/gitlab/database/postgres_index.rb
new file mode 100644
index 00000000000..2a9f23f0098
--- /dev/null
+++ b/lib/gitlab/database/postgres_index.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class PostgresIndex < ActiveRecord::Base
+ self.table_name = 'postgres_indexes'
+ self.primary_key = 'identifier'
+
+ scope :by_identifier, ->(identifier) do
+ raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
+
+ find(identifier)
+ end
+
+ # A 'regular' index is a non-unique index,
+ # that does not serve an exclusion constraint and
+ # is defined on a table that is not partitioned.
+ scope :regular, -> { where(unique: false, partitioned: false, exclusion: false)}
+
+ scope :random_few, ->(how_many) do
+ limit(how_many).order(Arel.sql('RANDOM()'))
+ end
+
+ scope :not_match, ->(regex) { where("name !~ ?", regex)}
+
+ def to_s
+ name
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb
new file mode 100644
index 00000000000..074752fe75b
--- /dev/null
+++ b/lib/gitlab/database/reindexing.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Reindexing
+ def self.perform(index_selector)
+ Coordinator.new(index_selector).perform
+ end
+
+ def self.candidate_indexes
+ Gitlab::Database::PostgresIndex
+ .regular
+ .not_match("^#{ConcurrentReindex::TEMPORARY_INDEX_PREFIX}")
+ .not_match("^#{ConcurrentReindex::REPLACED_INDEX_PREFIX}")
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/reindexing/concurrent_reindex.rb b/lib/gitlab/database/reindexing/concurrent_reindex.rb
new file mode 100644
index 00000000000..fd3dca88567
--- /dev/null
+++ b/lib/gitlab/database/reindexing/concurrent_reindex.rb
@@ -0,0 +1,127 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Reindexing
+ class ConcurrentReindex
+ include Gitlab::Utils::StrongMemoize
+
+ ReindexError = Class.new(StandardError)
+
+ PG_IDENTIFIER_LENGTH = 63
+ TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
+ REPLACED_INDEX_PREFIX = 'old_reindex_'
+ STATEMENT_TIMEOUT = 6.hours
+
+ attr_reader :index, :logger
+
+ def initialize(index, logger: Gitlab::AppLogger)
+ @index = index
+ @logger = logger
+ end
+
+ def perform
+ raise ReindexError, 'UNIQUE indexes are currently not supported' if index.unique?
+ raise ReindexError, 'partitioned indexes are currently not supported' if index.partitioned?
+ raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
+ raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name.start_with?(TEMPORARY_INDEX_PREFIX, REPLACED_INDEX_PREFIX)
+
+ logger.info "Starting reindex of #{index}"
+
+ with_rebuilt_index do |replacement_index|
+ swap_index(replacement_index)
+ end
+ end
+
+ private
+
+ def with_rebuilt_index
+ if Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
+ logger.debug("dropping dangling index from previous run (if it exists): #{replacement_index_name}")
+ remove_index(index.schema, replacement_index_name)
+ end
+
+ create_replacement_index_statement = index.definition
+ .sub(/CREATE INDEX #{index.name}/, "CREATE INDEX CONCURRENTLY #{replacement_index_name}")
+
+ logger.info("creating replacement index #{replacement_index_name}")
+ logger.debug("replacement index definition: #{create_replacement_index_statement}")
+
+ set_statement_timeout do
+ connection.execute(create_replacement_index_statement)
+ end
+
+ replacement_index = Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
+
+ unless replacement_index.valid_index?
+ message = 'replacement index was created as INVALID'
+ logger.error("#{message}, cleaning up")
+ raise ReindexError, "failed to reindex #{index}: #{message}"
+ end
+
+ yield replacement_index
+ ensure
+ begin
+ remove_index(index.schema, replacement_index_name)
+ rescue => e
+ logger.error(e)
+ end
+ end
+
+ def swap_index(replacement_index)
+ logger.info("swapping replacement index #{replacement_index} with #{index}")
+
+ with_lock_retries do
+ rename_index(index.schema, index.name, replaced_index_name)
+ rename_index(replacement_index.schema, replacement_index.name, index.name)
+ rename_index(index.schema, replaced_index_name, replacement_index.name)
+ end
+ end
+
+ def rename_index(schema, old_index_name, new_index_name)
+ connection.execute(<<~SQL)
+ ALTER INDEX #{quote_table_name(schema)}.#{quote_table_name(old_index_name)}
+ RENAME TO #{quote_table_name(new_index_name)}
+ SQL
+ end
+
+ def remove_index(schema, name)
+ logger.info("Removing index #{schema}.#{name}")
+
+ set_statement_timeout do
+ connection.execute(<<~SQL)
+ DROP INDEX CONCURRENTLY
+ IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)}
+ SQL
+ end
+ end
+
+ def replacement_index_name
+ @replacement_index_name ||= "#{TEMPORARY_INDEX_PREFIX}#{index.indexrelid}"
+ end
+
+ def replaced_index_name
+ @replaced_index_name ||= "#{REPLACED_INDEX_PREFIX}#{index.indexrelid}"
+ end
+
+ def with_lock_retries(&block)
+ arguments = { klass: self.class, logger: logger }
+
+ Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
+ end
+
+ def set_statement_timeout
+ execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
+ yield
+ ensure
+ execute('RESET statement_timeout')
+ end
+
+ delegate :execute, :quote_table_name, to: :connection
+ def connection
+ @connection ||= ActiveRecord::Base.connection
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb
new file mode 100644
index 00000000000..0957f43e166
--- /dev/null
+++ b/lib/gitlab/database/reindexing/coordinator.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Reindexing
+ class Coordinator
+ include ExclusiveLeaseGuard
+
+ # Maximum lease time for the global Redis lease
+ # This should be higher than the maximum time for any
+ # long running step in the reindexing process (compare with
+ # statement timeouts).
+ TIMEOUT_PER_ACTION = 1.day
+
+ attr_reader :indexes
+
+ def initialize(indexes)
+ @indexes = indexes
+ end
+
+ def perform
+ indexes.each do |index|
+ # This obtains a global lease such that there's
+ # only one live reindexing process at a time.
+ try_obtain_lease do
+ ReindexAction.keep_track_of(index) do
+ ConcurrentReindex.new(index).perform
+ end
+ end
+ end
+ end
+
+ private
+
+ def lease_timeout
+ TIMEOUT_PER_ACTION
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/reindexing/reindex_action.rb b/lib/gitlab/database/reindexing/reindex_action.rb
new file mode 100644
index 00000000000..0928ef90e5d
--- /dev/null
+++ b/lib/gitlab/database/reindexing/reindex_action.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Reindexing
+ class ReindexAction < ActiveRecord::Base
+ self.table_name = 'postgres_reindex_actions'
+
+ enum state: { started: 0, finished: 1, failed: 2 }
+
+ def self.keep_track_of(index, &block)
+ action = create!(
+ index_identifier: index.identifier,
+ action_start: Time.zone.now,
+ ondisk_size_bytes_start: index.ondisk_size_bytes
+ )
+
+ yield
+
+ action.state = :finished
+ rescue
+ action.state = :failed
+ raise
+ ensure
+ index.reload # rubocop:disable Cop/ActiveRecordAssociationReload
+
+ action.action_end = Time.zone.now
+ action.ondisk_size_bytes_end = index.ondisk_size_bytes
+
+ action.save!
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb
index dda4d8eecdb..3d929c62933 100644
--- a/lib/gitlab/database/schema_helpers.rb
+++ b/lib/gitlab/database/schema_helpers.rb
@@ -32,11 +32,14 @@ module Gitlab
def trigger_exists?(table_name, name)
connection.select_value(<<~SQL)
SELECT 1
- FROM pg_trigger
- INNER JOIN pg_class
- ON pg_trigger.tgrelid = pg_class.oid
- WHERE pg_class.relname = '#{table_name}'
- AND pg_trigger.tgname = '#{name}'
+ FROM pg_catalog.pg_trigger trgr
+ INNER JOIN pg_catalog.pg_class rel
+ ON trgr.tgrelid = rel.oid
+ INNER JOIN pg_catalog.pg_namespace nsp
+ ON nsp.oid = rel.relnamespace
+ WHERE nsp.nspname = #{connection.quote(current_schema)}
+ AND rel.relname = #{connection.quote(table_name)}
+ AND trgr.tgname = #{connection.quote(name)}
SQL
end
@@ -68,10 +71,10 @@ module Gitlab
end
def with_lock_retries(&block)
- Gitlab::Database::WithLockRetries.new({
+ Gitlab::Database::WithLockRetries.new(
klass: self.class,
logger: Gitlab::BackgroundMigration::Logger
- }).run(&block)
+ ).run(&block)
end
def assert_not_in_transaction_block(scope:)
diff --git a/lib/gitlab/database/similarity_score.rb b/lib/gitlab/database/similarity_score.rb
index 2633c29438a..ff78fd0218c 100644
--- a/lib/gitlab/database/similarity_score.rb
+++ b/lib/gitlab/database/similarity_score.rb
@@ -6,6 +6,11 @@ module Gitlab
EMPTY_STRING = Arel.sql("''").freeze
EXPRESSION_ON_INVALID_INPUT = Arel::Nodes::NamedFunction.new('CAST', [Arel.sql('0').as('integer')]).freeze
DEFAULT_MULTIPLIER = 1
+ DISPLAY_NAME = self.name.underscore.freeze
+
+ # Adds a "magic" comment in the generated SQL expression in order to be able to tell if we're sorting by similarity.
+ # Example: /* gitlab/database/similarity_score */ SIMILARITY(COALESCE...
+ SIMILARITY_FUNCTION_CALL_WITH_ANNOTATION = "/* #{DISPLAY_NAME} */ SIMILARITY".freeze
# This method returns an Arel expression that can be used in an ActiveRecord query to order the resultset by similarity.
#
@@ -74,6 +79,10 @@ module Gitlab
end
end
+ def self.order_by_similarity?(arel_query)
+ arel_query.to_sql.include?(SIMILARITY_FUNCTION_CALL_WITH_ANNOTATION)
+ end
+
# (SIMILARITY(COALESCE(column, ''), 'search_string') * CAST(multiplier AS numeric))
def self.rule_to_arel(search, rule)
Arel::Nodes::Grouping.new(
@@ -91,7 +100,7 @@ module Gitlab
# SIMILARITY(COALESCE(column, ''), 'search_string')
def self.similarity_function_call(search, column)
- Arel::Nodes::NamedFunction.new('SIMILARITY', [column, Arel.sql(search)])
+ Arel::Nodes::NamedFunction.new(SIMILARITY_FUNCTION_CALL_WITH_ANNOTATION, [column, Arel.sql(search)])
end
# CAST(multiplier AS numeric)
diff --git a/lib/gitlab/database/with_lock_retries.rb b/lib/gitlab/database/with_lock_retries.rb
index a9c86e4e267..3fb52d786ad 100644
--- a/lib/gitlab/database/with_lock_retries.rb
+++ b/lib/gitlab/database/with_lock_retries.rb
@@ -95,7 +95,7 @@ module Gitlab
run_block_with_transaction
rescue ActiveRecord::LockWaitTimeout
if retry_with_lock_timeout?
- disable_idle_in_transaction_timeout
+ disable_idle_in_transaction_timeout if ActiveRecord::Base.connection.transaction_open?
wait_until_next_retry
reset_db_settings
@@ -149,7 +149,7 @@ module Gitlab
log(message: "Couldn't acquire lock to perform the migration", current_iteration: current_iteration)
log(message: "Executing the migration without lock timeout", current_iteration: current_iteration)
- execute("SET LOCAL lock_timeout TO '0'")
+ disable_lock_timeout if ActiveRecord::Base.connection.transaction_open?
run_block
@@ -184,6 +184,10 @@ module Gitlab
execute("SET LOCAL idle_in_transaction_session_timeout TO '0'")
end
+ def disable_lock_timeout
+ execute("SET LOCAL lock_timeout TO '0'")
+ end
+
def reset_db_settings
execute('RESET idle_in_transaction_session_timeout; RESET lock_timeout')
end