Add latest changes from gitlab-org/gitlab@13-5-stable-eev13.5.0-rc42

author: GitLab Bot <gitlab-bot@gitlab.com> 2020-10-21 10:08:36 +0300
committer: GitLab Bot <gitlab-bot@gitlab.com> 2020-10-21 10:08:36 +0300
commit: 48aff82709769b098321c738f3444b9bdaa694c6 (patch)
tree: e00c7c43e2d9b603a5a6af576b1685e400410dee /lib/gitlab/database
parent: 879f5329ee916a948223f8f43d77fba4da6cd028 (diff)
15 files changed, 669 insertions, 183 deletions
diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb
index 1762b81b7d8..11d9881aac2 100644
--- a/lib/gitlab/database/batch_count.rb
+++ b/lib/gitlab/database/batch_count.rb
@@ -8,15 +8,20 @@
 # In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
 # the start and finish can be sent specifically
 #
+# Grouped relations can be used as well. However, the preferred batch count should be around 10K because group by count is more expensive.
+#
 # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
 #
 # Examples:
 #  extend ::Gitlab::Database::BatchCount
 #  batch_count(User.active)
 #  batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
+#  batch_count(Namespace.group(:type))
 #  batch_distinct_count(::Project, :creator_id)
 #  batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
+#  batch_distinct_count(Project.group(:visibility_level), :creator_id)
 #  batch_sum(User, :sign_in_count)
+#  batch_sum(Issue.group(:state_id), :weight))
 module Gitlab
   module Database
     module BatchCount
@@ -77,34 +82,45 @@ module Gitlab
         raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
         return FALLBACK if unwanted_configuration?(finish, batch_size, start)
 
-        counter = 0
+        results = nil
         batch_start = start
 
         while batch_start <= finish
+          batch_relation = build_relation_batch(batch_start, batch_start + batch_size, mode)
           begin
-            counter += batch_fetch(batch_start, batch_start + batch_size, mode)
+            results = merge_results(results, batch_relation.send(@operation, *@operation_args)) # rubocop:disable GitlabSecurity/PublicSend
             batch_start += batch_size
-          rescue ActiveRecord::QueryCanceled
+          rescue ActiveRecord::QueryCanceled => error
             # retry with a safe batch size & warmer cache
             if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE
               batch_size /= 2
             else
+              log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error)
               return FALLBACK
             end
           end
           sleep(SLEEP_TIME_IN_SECONDS)
         end
 
-        counter
+        results
       end
 
-      def batch_fetch(start, finish, mode)
-        # rubocop:disable GitlabSecurity/PublicSend
-        @relation.select(@column).public_send(mode).where(between_condition(start, finish)).send(@operation, *@operation_args)
+      def merge_results(results, object)
+        return object unless results
+
+        if object.is_a?(Hash)
+          results.merge!(object) { |_, a, b| a + b }
+        else
+          results + object
+        end
       end
 
       private
 
+      def build_relation_batch(start, finish, mode)
+        @relation.select(@column).public_send(mode).where(between_condition(start, finish)) # rubocop:disable GitlabSecurity/PublicSend
+      end
+
       def batch_size_for_mode_and_operation(mode, operation)
         return DEFAULT_SUM_BATCH_SIZE if operation == :sum
 
@@ -118,11 +134,11 @@ module Gitlab
       end
 
       def actual_start(start)
-        start || @relation.minimum(@column) || 0
+        start || @relation.unscope(:group, :having).minimum(@column) || 0
       end
 
       def actual_finish(finish)
-        finish || @relation.maximum(@column) || 0
+        finish || @relation.unscope(:group, :having).maximum(@column) || 0
       end
 
       def check_mode!(mode)
@@ -130,6 +146,20 @@ module Gitlab
         raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
         raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct
       end
+
+      def log_canceled_batch_fetch(batch_start, mode, query, error)
+        Gitlab::AppJsonLogger
+          .error(
+            event: 'batch_count',
+            relation: @relation.table_name,
+            operation: @operation,
+            operation_args: @operation_args,
+            start: batch_start,
+            mode: mode,
+            query: query,
+            message: "Query has been canceled with message: #{error.message}"
+          )
+      end
     end
   end
 end
diff --git a/lib/gitlab/database/bulk_update.rb b/lib/gitlab/database/bulk_update.rb
new file mode 100644
index 00000000000..1403d561890
--- /dev/null
+++ b/lib/gitlab/database/bulk_update.rb
@@ -0,0 +1,168 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    # Constructs queries of the form:
+    #
+    #   with cte(a, b, c) as (
+    #     select * from (values (:x, :y, :z), (:q, :r, :s)) as t
+    #     )
+    #   update table set b = cte.b, c = cte.c where a = cte.a
+    #
+    # Which is useful if you want to update a set of records in a single query
+    # but cannot express the update as a calculation (i.e. you have arbitrary
+    # updates to perform).
+    #
+    # The requirements are that the table must have an ID column used to
+    # identify the rows to be updated.
+    #
+    # Usage:
+    #
+    #  mapping = {
+    #    issue_a => { title: 'This title', relative_position: 100 },
+    #    issue_b => { title: 'That title', relative_position: 173 }
+    #  }
+    #
+    #  ::Gitlab::Database::BulkUpdate.execute(%i[title relative_position], mapping)
+    #
+    # Note that this is a very low level tool, and operates on the raw column
+    # values. Enums/state fields must be translated into their underlying
+    # representations, for example, and no hooks will be called.
+    #
+    module BulkUpdate
+      LIST_SEPARATOR = ', '
+
+      class Setter
+        include Gitlab::Utils::StrongMemoize
+
+        def initialize(model, columns, mapping)
+          @table_name = model.table_name
+          @connection = model.connection
+          @columns = self.class.column_definitions(model, columns)
+          @mapping = self.class.value_mapping(mapping)
+        end
+
+        def update!
+          if without_prepared_statement?
+            # A workaround for https://github.com/rails/rails/issues/24893
+            # When prepared statements are prevented (such as when using the
+            # query counter or in omnibus by default), we cannot call
+            # `exec_update`, since that will discard the bindings.
+            connection.send(:exec_no_cache, sql, log_name, params) # rubocop: disable GitlabSecurity/PublicSend
+          else
+            connection.exec_update(sql, log_name, params)
+          end
+        end
+
+        def self.column_definitions(model, columns)
+          raise ArgumentError, 'invalid columns' if columns.blank? || columns.any? { |c| !c.is_a?(Symbol) }
+          raise ArgumentError, 'cannot set ID' if columns.include?(:id)
+
+          ([:id] | columns).map { |name| column_definition(model, name) }
+        end
+
+        def self.column_definition(model, name)
+          definition = model.column_for_attribute(name)
+          raise ArgumentError, "Unknown column: #{name}" unless definition.type
+
+          definition
+        end
+
+        def self.value_mapping(mapping)
+          raise ArgumentError, 'invalid mapping' if mapping.blank?
+          raise ArgumentError, 'invalid mapping value' if mapping.any? { |_k, v| !v.is_a?(Hash) }
+
+          mapping
+        end
+
+        private
+
+        attr_reader :table_name, :connection, :columns, :mapping
+
+        def log_name
+          strong_memoize(:log_name) do
+            "BulkUpdate #{table_name} #{columns.drop(1).map(&:name)}:#{mapping.size}"
+          end
+        end
+
+        def params
+          mapping.flat_map do |k, v|
+            obj_id = k.try(:id) || k
+            v = v.merge(id: obj_id)
+            columns.map { |c| query_attribute(c, k, v.with_indifferent_access) }
+          end
+        end
+
+        # A workaround for https://github.com/rails/rails/issues/24893
+        # We need to detect if prepared statements have been disabled.
+        def without_prepared_statement?
+          strong_memoize(:without_prepared_statement) do
+            connection.send(:without_prepared_statement?, [1]) # rubocop: disable GitlabSecurity/PublicSend
+          end
+        end
+
+        def query_attribute(column, key, values)
+          value = values[column.name]
+          key[column.name] = value if key.try(:id) # optimistic update
+          ActiveRecord::Relation::QueryAttribute.from_user(nil, value, ActiveModel::Type.lookup(column.type))
+        end
+
+        def values
+          counter = 0
+          typed = false
+
+          mapping.map do |k, v|
+            binds = columns.map do |c|
+              bind = "$#{counter += 1}"
+              # PG is not great at inferring types - help it for the first row.
+              bind += "::#{c.sql_type}" unless typed
+              bind
+            end
+            typed = true
+
+            "(#{list_of(binds)})"
+          end
+        end
+
+        def list_of(list)
+          list.join(LIST_SEPARATOR)
+        end
+
+        def sql
+          <<~SQL
+            WITH cte(#{list_of(cte_columns)}) AS (VALUES #{list_of(values)})
+            UPDATE #{table_name} SET #{list_of(updates)} FROM cte WHERE cte_id = id
+          SQL
+        end
+
+        def column_names
+          strong_memoize(:column_names) { columns.map(&:name) }
+        end
+
+        def cte_columns
+          strong_memoize(:cte_columns) do
+            column_names.map do |c|
+              connection.quote_column_name("cte_#{c}")
+            end
+          end
+        end
+
+        def updates
+          column_names.zip(cte_columns).drop(1).map do |dest, src|
+            "#{connection.quote_column_name(dest)} = cte.#{src}"
+          end
+        end
+      end
+
+      def self.execute(columns, mapping, &to_class)
+        raise ArgumentError if mapping.blank?
+
+        entries_by_class = mapping.group_by { |k, v| block_given? ? to_class.call(k) : k.class }
+
+        entries_by_class.each do |model, entries|
+          Setter.new(model, columns, entries).update!
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/concurrent_reindex.rb b/lib/gitlab/database/concurrent_reindex.rb
deleted file mode 100644
index 485ab35e55d..00000000000
--- a/lib/gitlab/database/concurrent_reindex.rb
+++ /dev/null
@@ -1,143 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
-  module Database
-    class ConcurrentReindex
-      include Gitlab::Utils::StrongMemoize
-      include MigrationHelpers
-
-      ReindexError = Class.new(StandardError)
-
-      PG_IDENTIFIER_LENGTH = 63
-      TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
-      REPLACED_INDEX_PREFIX = 'old_reindex_'
-
-      attr_reader :index_name, :logger
-
-      def initialize(index_name, logger:)
-        @index_name = index_name
-        @logger = logger
-      end
-
-      def execute
-        raise ReindexError, "index #{index_name} does not exist" unless index_exists?
-
-        raise ReindexError, 'UNIQUE indexes are currently not supported' if index_unique?
-
-        logger.debug("dropping dangling index from previous run: #{replacement_index_name}")
-        remove_replacement_index
-
-        begin
-          create_replacement_index
-
-          unless replacement_index_valid?
-            message = 'replacement index was created as INVALID'
-            logger.error("#{message}, cleaning up")
-            raise ReindexError, "failed to reindex #{index_name}: #{message}"
-          end
-
-          swap_replacement_index
-        rescue Gitlab::Database::WithLockRetries::AttemptsExhaustedError => e
-          logger.error('failed to obtain the required database locks to swap the indexes, cleaning up')
-          raise ReindexError, e.message
-        rescue ActiveRecord::ActiveRecordError, PG::Error => e
-          logger.error("database error while attempting reindex of #{index_name}: #{e.message}")
-          raise ReindexError, e.message
-        ensure
-          logger.info("dropping unneeded replacement index: #{replacement_index_name}")
-          remove_replacement_index
-        end
-      end
-
-      private
-
-      def connection
-        @connection ||= ActiveRecord::Base.connection
-      end
-
-      def replacement_index_name
-        @replacement_index_name ||= constrained_index_name(TEMPORARY_INDEX_PREFIX)
-      end
-
-      def index
-        strong_memoize(:index) do
-          find_index(index_name)
-        end
-      end
-
-      def index_exists?
-        !index.nil?
-      end
-
-      def index_unique?
-        index.indisunique
-      end
-
-      def constrained_index_name(prefix)
-        "#{prefix}#{index_name}".slice(0, PG_IDENTIFIER_LENGTH)
-      end
-
-      def create_replacement_index
-        create_replacement_index_statement = index.indexdef
-          .sub(/CREATE INDEX/, 'CREATE INDEX CONCURRENTLY')
-          .sub(/#{index_name}/, replacement_index_name)
-
-        logger.info("creating replacement index #{replacement_index_name}")
-        logger.debug("replacement index definition: #{create_replacement_index_statement}")
-
-        disable_statement_timeout do
-          connection.execute(create_replacement_index_statement)
-        end
-      end
-
-      def replacement_index_valid?
-        find_index(replacement_index_name).indisvalid
-      end
-
-      def find_index(index_name)
-        record = connection.select_one(<<~SQL)
-          SELECT
-            pg_index.indisunique,
-            pg_index.indisvalid,
-            pg_indexes.indexdef
-          FROM pg_index
-          INNER JOIN pg_class ON pg_class.oid = pg_index.indexrelid
-          INNER JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
-          INNER JOIN pg_indexes ON pg_class.relname = pg_indexes.indexname
-          WHERE pg_namespace.nspname = 'public'
-          AND pg_class.relname = #{connection.quote(index_name)}
-        SQL
-
-        OpenStruct.new(record) if record
-      end
-
-      def swap_replacement_index
-        replaced_index_name = constrained_index_name(REPLACED_INDEX_PREFIX)
-
-        logger.info("swapping replacement index #{replacement_index_name} with #{index_name}")
-
-        with_lock_retries do
-          rename_index(index_name, replaced_index_name)
-          rename_index(replacement_index_name, index_name)
-          rename_index(replaced_index_name, replacement_index_name)
-        end
-      end
-
-      def rename_index(old_index_name, new_index_name)
-        connection.execute("ALTER INDEX #{old_index_name} RENAME TO #{new_index_name}")
-      end
-
-      def remove_replacement_index
-        disable_statement_timeout do
-          connection.execute("DROP INDEX CONCURRENTLY IF EXISTS #{replacement_index_name}")
-        end
-      end
-
-      def with_lock_retries(&block)
-        arguments = { klass: self.class, logger: logger }
-
-        Gitlab::Database::WithLockRetries.new(arguments).run(raise_on_exhaustion: true, &block)
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb
index e226ed7613a..89190320cf9 100644
--- a/lib/gitlab/database/count/reltuples_count_strategy.rb
+++ b/lib/gitlab/database/count/reltuples_count_strategy.rb
@@ -74,8 +74,9 @@ module Gitlab
         def get_statistics(table_names, check_statistics: true)
           time = 6.hours.ago
 
-          query = PgClass.joins("LEFT JOIN pg_stat_user_tables USING (relname)")
+          query = PgClass.joins("LEFT JOIN pg_stat_user_tables ON pg_stat_user_tables.relid = pg_class.oid")
             .where(relname: table_names)
+            .where('schemaname = current_schema()')
             .select('pg_class.relname AS table_name, reltuples::bigint AS estimate')
 
           if check_statistics
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 723f0f6a308..66b6ce1ec55 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -176,7 +176,7 @@ module Gitlab
           name: name.presence || concurrent_foreign_key_name(source, column)
         }
 
-        if foreign_key_exists?(source, target, options)
+        if foreign_key_exists?(source, target, **options)
           warning_message = "Foreign key not created because it exists already " \
             "(this may be due to an aborted migration or similar): " \
             "source: #{source}, target: #{target}, column: #{options[:column]}, "\
@@ -330,13 +330,13 @@ module Gitlab
       # * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the block, sleep time before the next iteration, defaults to `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION`
       # * +logger+ - [Gitlab::JsonLogger]
       # * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES`
-      def with_lock_retries(**args, &block)
+      def with_lock_retries(*args, **kwargs, &block)
         merged_args = {
           klass: self.class,
           logger: Gitlab::BackgroundMigration::Logger
-        }.merge(args)
+        }.merge(kwargs)
 
-        Gitlab::Database::WithLockRetries.new(merged_args).run(&block)
+        Gitlab::Database::WithLockRetries.new(**merged_args).run(&block)
       end
 
       def true_value
@@ -544,6 +544,16 @@ module Gitlab
         rename_column_concurrently(table, column, temp_column, type: new_type, type_cast_function: type_cast_function, batch_column_name: batch_column_name)
       end
 
+      # Reverses operations performed by change_column_type_concurrently.
+      #
+      # table - The table containing the column.
+      # column - The name of the column to change.
+      def undo_change_column_type_concurrently(table, column)
+        temp_column = "#{column}_for_type_change"
+
+        undo_rename_column_concurrently(table, column, temp_column)
+      end
+
       # Performs cleanup of a concurrent type change.
       #
       # table - The table containing the column.
@@ -560,6 +570,65 @@ module Gitlab
         end
       end
 
+      # Reverses operations performed by cleanup_concurrent_column_type_change.
+      #
+      # table - The table containing the column.
+      # column - The name of the column to change.
+      # old_type - The type of the original column used with change_column_type_concurrently.
+      # type_cast_function - Required if the conversion back to the original type is not automatic
+      # batch_column_name - option for tables without a primary key, in this case
+      #            another unique integer column can be used. Example: :user_id
+      def undo_cleanup_concurrent_column_type_change(table, column, old_type, type_cast_function: nil, batch_column_name: :id)
+        temp_column = "#{column}_for_type_change"
+
+        # Using a descriptive name that includes orinal column's name risks
+        # taking us above the 63 character limit, so we use a hash
+        identifier = "#{table}_#{column}_for_type_change"
+        hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10)
+        temp_undo_cleanup_column = "tmp_undo_cleanup_column_#{hashed_identifier}"
+
+        unless column_exists?(table, batch_column_name)
+          raise "Column #{batch_column_name} does not exist on #{table}"
+        end
+
+        if transaction_open?
+          raise 'undo_cleanup_concurrent_column_type_change can not be run inside a transaction'
+        end
+
+        check_trigger_permissions!(table)
+
+        begin
+          create_column_from(
+            table,
+            column,
+            temp_undo_cleanup_column,
+            type: old_type,
+            batch_column_name: batch_column_name,
+            type_cast_function: type_cast_function
+          )
+
+          transaction do
+            # This has to be performed in a transaction as otherwise we might
+            # have inconsistent data.
+            rename_column(table, column, temp_column)
+            rename_column(table, temp_undo_cleanup_column, column)
+
+            install_rename_triggers(table, column, temp_column)
+          end
+        rescue
+          # create_column_from can not run inside a transaction, which means
+          #  that there is a risk that if any of the operations that follow it
+          #  fail, we'll be left with an inconsistent schema
+          # For those reasons, we make sure that we drop temp_undo_cleanup_column
+          #  if an error is caught
+          if column_exists?(table, temp_undo_cleanup_column)
+            remove_column(table, temp_undo_cleanup_column)
+          end
+
+          raise
+        end
+      end
+
       # Cleans up a concurrent column name.
       #
       # This method takes care of removing previously installed triggers as well
@@ -882,7 +951,7 @@ module Gitlab
             # column.
             opclasses[new] = opclasses.delete(old) if opclasses[old]
 
-            options[:opclasses] = opclasses
+            options[:opclass] = opclasses
           end
 
           add_concurrent_index(table, new_columns, options)
@@ -994,10 +1063,10 @@ into similar problems in the future (e.g. when new tables are created).
       def postgres_exists_by_name?(table, name)
         index_sql = <<~SQL
           SELECT COUNT(*)
-          FROM pg_index
-          JOIN pg_class i ON (indexrelid=i.oid)
-          JOIN pg_class t ON (indrelid=t.oid)
-          WHERE i.relname = '#{name}' AND t.relname = '#{table}'
+          FROM pg_catalog.pg_indexes
+          WHERE schemaname = #{connection.quote(current_schema)}
+            AND tablename = #{connection.quote(table)}
+            AND indexname = #{connection.quote(name)}
         SQL
 
         connection.select_value(index_sql).to_i > 0
@@ -1053,11 +1122,15 @@ into similar problems in the future (e.g. when new tables are created).
         # the table name in addition to using the constraint_name
         check_sql = <<~SQL
           SELECT COUNT(*)
-          FROM pg_constraint
-          JOIN pg_class ON pg_constraint.conrelid = pg_class.oid
-          WHERE pg_constraint.contype = 'c'
-          AND pg_constraint.conname = '#{constraint_name}'
-          AND pg_class.relname = '#{table}'
+          FROM pg_catalog.pg_constraint con
+            INNER JOIN pg_catalog.pg_class rel
+              ON rel.oid = con.conrelid
+            INNER JOIN pg_catalog.pg_namespace nsp
+              ON nsp.oid = con.connamespace
+          WHERE con.contype = 'c'
+          AND con.conname = #{connection.quote(constraint_name)}
+          AND nsp.nspname = #{connection.quote(current_schema)}
+          AND rel.relname = #{connection.quote(table)}
         SQL
 
         connection.select_value(check_sql) > 0
@@ -1147,6 +1220,64 @@ into similar problems in the future (e.g. when new tables are created).
         end
       end
 
+      # Copies all check constraints for the old column to the new column.
+      #
+      # table - The table containing the columns.
+      # old - The old column.
+      # new - The new column.
+      # schema - The schema the table is defined for
+      #          If it is not provided, then the current_schema is used
+      def copy_check_constraints(table, old, new, schema: nil)
+        if transaction_open?
+          raise 'copy_check_constraints can not be run inside a transaction'
+        end
+
+        unless column_exists?(table, old)
+          raise "Column #{old} does not exist on #{table}"
+        end
+
+        unless column_exists?(table, new)
+          raise "Column #{new} does not exist on #{table}"
+        end
+
+        table_with_schema = schema.present? ? "#{schema}.#{table}" : table
+
+        check_constraints_for(table, old, schema: schema).each do |check_c|
+          validate = !(check_c["constraint_def"].end_with? "NOT VALID")
+
+          # Normalize:
+          # - Old constraint definitions:
+          #    '(char_length(entity_path) <= 5500)'
+          # - Definitionss from pg_get_constraintdef(oid):
+          #    'CHECK ((char_length(entity_path) <= 5500))'
+          # - Definitions from pg_get_constraintdef(oid, pretty_bool):
+          #    'CHECK (char_length(entity_path) <= 5500)'
+          # - Not valid constraints: 'CHECK (...) NOT VALID'
+          # to a single format that we can use:
+          #    '(char_length(entity_path) <= 5500)'
+          check_definition = check_c["constraint_def"]
+                              .sub(/^\s*(CHECK)?\s*\({0,2}/, '(')
+                              .sub(/\){0,2}\s*(NOT VALID)?\s*$/, ')')
+
+          constraint_name = begin
+            if check_definition == "(#{old} IS NOT NULL)"
+              not_null_constraint_name(table_with_schema, new)
+            elsif check_definition.start_with? "(char_length(#{old}) <="
+              text_limit_name(table_with_schema, new)
+            else
+              check_constraint_name(table_with_schema, new, 'copy_check_constraint')
+            end
+          end
+
+          add_check_constraint(
+            table_with_schema,
+            check_definition.gsub(old.to_s, new.to_s),
+            constraint_name,
+            validate: validate
+          )
+        end
+      end
+
       # Migration Helpers for adding limit to text columns
       def add_text_limit(table, column, limit, constraint_name: nil, validate: true)
         add_check_constraint(
@@ -1274,6 +1405,37 @@ into similar problems in the future (e.g. when new tables are created).
         end
       end
 
+      # Returns an ActiveRecord::Result containing the check constraints
+      # defined for the given column.
+      #
+      # If the schema is not provided, then the current_schema is used
+      def check_constraints_for(table, column, schema: nil)
+        check_sql = <<~SQL
+          SELECT
+            ccu.table_schema as schema_name,
+            ccu.table_name as table_name,
+            ccu.column_name as column_name,
+            con.conname as constraint_name,
+            pg_get_constraintdef(con.oid) as constraint_def
+          FROM pg_catalog.pg_constraint con
+            INNER JOIN pg_catalog.pg_class rel
+              ON rel.oid = con.conrelid
+            INNER JOIN pg_catalog.pg_namespace nsp
+              ON nsp.oid = con.connamespace
+            INNER JOIN information_schema.constraint_column_usage ccu
+              ON con.conname = ccu.constraint_name
+                     AND nsp.nspname = ccu.constraint_schema
+                     AND rel.relname = ccu.table_name
+          WHERE  nsp.nspname = #{connection.quote(schema.presence || current_schema)}
+            AND rel.relname = #{connection.quote(table)}
+            AND ccu.column_name = #{connection.quote(column)}
+            AND con.contype = 'c'
+          ORDER BY constraint_name
+        SQL
+
+        connection.exec_query(check_sql)
+      end
+
       def statement_timeout_disabled?
         # This is a string of the form "100ms" or "0" when disabled
         connection.select_value('SHOW statement_timeout') == "0"
@@ -1284,8 +1446,9 @@ into similar problems in the future (e.g. when new tables are created).
         check_sql = <<~SQL
           SELECT c.is_nullable
           FROM information_schema.columns c
-          WHERE c.table_name = '#{table}'
-          AND c.column_name = '#{column}'
+          WHERE c.table_schema = #{connection.quote(current_schema)}
+            AND c.table_name = #{connection.quote(table)}
+            AND c.column_name = #{connection.quote(column)}
         SQL
 
         connection.select_value(check_sql) == 'YES'
@@ -1352,6 +1515,7 @@ into similar problems in the future (e.g. when new tables are created).
 
         copy_indexes(table, old, new)
         copy_foreign_keys(table, old, new)
+        copy_check_constraints(table, old, new)
       end
 
       def validate_timestamp_column_name!(column_name)
diff --git a/lib/gitlab/database/partitioning/partition_creator.rb b/lib/gitlab/database/partitioning/partition_creator.rb
index 4c1b13fe3b5..547e0b9b957 100644
--- a/lib/gitlab/database/partitioning/partition_creator.rb
+++ b/lib/gitlab/database/partitioning/partition_creator.rb
@@ -72,10 +72,10 @@ module Gitlab
         end
 
         def with_lock_retries(&block)
-          Gitlab::Database::WithLockRetries.new({
+          Gitlab::Database::WithLockRetries.new(
             klass: self.class,
             logger: Gitlab::AppLogger
-          }).run(&block)
+          ).run(&block)
         end
 
         def connection
diff --git a/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb b/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb
index f9ad1e60776..17a42d997e6 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb
@@ -11,8 +11,6 @@ module Gitlab
         PAUSE_SECONDS = 0.25
 
         def perform(start_id, stop_id, source_table, partitioned_table, source_column)
-          return unless Feature.enabled?(:backfill_partitioned_audit_events, default_enabled: true)
-
           if transaction_open?
             raise "Aborting job to backfill partitioned #{source_table} table! Do not run this job in a transaction block!"
           end
diff --git a/lib/gitlab/database/postgres_index.rb b/lib/gitlab/database/postgres_index.rb
new file mode 100644
index 00000000000..2a9f23f0098
--- /dev/null
+++ b/lib/gitlab/database/postgres_index.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    class PostgresIndex < ActiveRecord::Base
+      self.table_name = 'postgres_indexes'
+      self.primary_key = 'identifier'
+
+      scope :by_identifier, ->(identifier) do
+        raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
+
+        find(identifier)
+      end
+
+      # A 'regular' index is a non-unique index,
+      # that does not serve an exclusion constraint and
+      # is defined on a table that is not partitioned.
+      scope :regular, -> { where(unique: false, partitioned: false, exclusion: false)}
+
+      scope :random_few, ->(how_many) do
+        limit(how_many).order(Arel.sql('RANDOM()'))
+      end
+
+      scope :not_match, ->(regex) { where("name !~ ?", regex)}
+
+      def to_s
+        name
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb
new file mode 100644
index 00000000000..074752fe75b
--- /dev/null
+++ b/lib/gitlab/database/reindexing.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    module Reindexing
+      def self.perform(index_selector)
+        Coordinator.new(index_selector).perform
+      end
+
+      def self.candidate_indexes
+        Gitlab::Database::PostgresIndex
+          .regular
+          .not_match("^#{ConcurrentReindex::TEMPORARY_INDEX_PREFIX}")
+          .not_match("^#{ConcurrentReindex::REPLACED_INDEX_PREFIX}")
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/reindexing/concurrent_reindex.rb b/lib/gitlab/database/reindexing/concurrent_reindex.rb
new file mode 100644
index 00000000000..fd3dca88567
--- /dev/null
+++ b/lib/gitlab/database/reindexing/concurrent_reindex.rb
@@ -0,0 +1,127 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    module Reindexing
+      class ConcurrentReindex
+        include Gitlab::Utils::StrongMemoize
+
+        ReindexError = Class.new(StandardError)
+
+        PG_IDENTIFIER_LENGTH = 63
+        TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
+        REPLACED_INDEX_PREFIX = 'old_reindex_'
+        STATEMENT_TIMEOUT = 6.hours
+
+        attr_reader :index, :logger
+
+        def initialize(index, logger: Gitlab::AppLogger)
+          @index = index
+          @logger = logger
+        end
+
+        def perform
+          raise ReindexError, 'UNIQUE indexes are currently not supported' if index.unique?
+          raise ReindexError, 'partitioned indexes are currently not supported' if index.partitioned?
+          raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
+          raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name.start_with?(TEMPORARY_INDEX_PREFIX, REPLACED_INDEX_PREFIX)
+
+          logger.info "Starting reindex of #{index}"
+
+          with_rebuilt_index do |replacement_index|
+            swap_index(replacement_index)
+          end
+        end
+
+        private
+
+        def with_rebuilt_index
+          if Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
+            logger.debug("dropping dangling index from previous run (if it exists): #{replacement_index_name}")
+            remove_index(index.schema, replacement_index_name)
+          end
+
+          create_replacement_index_statement = index.definition
+            .sub(/CREATE INDEX #{index.name}/, "CREATE INDEX CONCURRENTLY #{replacement_index_name}")
+
+          logger.info("creating replacement index #{replacement_index_name}")
+          logger.debug("replacement index definition: #{create_replacement_index_statement}")
+
+          set_statement_timeout do
+            connection.execute(create_replacement_index_statement)
+          end
+
+          replacement_index = Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
+
+          unless replacement_index.valid_index?
+            message = 'replacement index was created as INVALID'
+            logger.error("#{message}, cleaning up")
+            raise ReindexError, "failed to reindex #{index}: #{message}"
+          end
+
+          yield replacement_index
+        ensure
+          begin
+            remove_index(index.schema, replacement_index_name)
+          rescue => e
+            logger.error(e)
+          end
+        end
+
+        def swap_index(replacement_index)
+          logger.info("swapping replacement index #{replacement_index} with #{index}")
+
+          with_lock_retries do
+            rename_index(index.schema, index.name, replaced_index_name)
+            rename_index(replacement_index.schema, replacement_index.name, index.name)
+            rename_index(index.schema, replaced_index_name, replacement_index.name)
+          end
+        end
+
+        def rename_index(schema, old_index_name, new_index_name)
+          connection.execute(<<~SQL)
+            ALTER INDEX #{quote_table_name(schema)}.#{quote_table_name(old_index_name)}
+            RENAME TO #{quote_table_name(new_index_name)}
+          SQL
+        end
+
+        def remove_index(schema, name)
+          logger.info("Removing index #{schema}.#{name}")
+
+          set_statement_timeout do
+            connection.execute(<<~SQL)
+              DROP INDEX CONCURRENTLY
+              IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)}
+            SQL
+          end
+        end
+
+        def replacement_index_name
+          @replacement_index_name ||= "#{TEMPORARY_INDEX_PREFIX}#{index.indexrelid}"
+        end
+
+        def replaced_index_name
+          @replaced_index_name ||= "#{REPLACED_INDEX_PREFIX}#{index.indexrelid}"
+        end
+
+        def with_lock_retries(&block)
+          arguments = { klass: self.class, logger: logger }
+
+          Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
+        end
+
+        def set_statement_timeout
+          execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
+          yield
+        ensure
+          execute('RESET statement_timeout')
+        end
+
+        delegate :execute, :quote_table_name, to: :connection
+        def connection
+          @connection ||= ActiveRecord::Base.connection
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb
new file mode 100644
index 00000000000..0957f43e166
--- /dev/null
+++ b/lib/gitlab/database/reindexing/coordinator.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    module Reindexing
+      class Coordinator
+        include ExclusiveLeaseGuard
+
+        # Maximum lease time for the global Redis lease
+        # This should be higher than the maximum time for any
+        # long running step in the reindexing process (compare with
+        # statement timeouts).
+        TIMEOUT_PER_ACTION = 1.day
+
+        attr_reader :indexes
+
+        def initialize(indexes)
+          @indexes = indexes
+        end
+
+        def perform
+          indexes.each do |index|
+            # This obtains a global lease such that there's
+            # only one live reindexing process at a time.
+            try_obtain_lease do
+              ReindexAction.keep_track_of(index) do
+                ConcurrentReindex.new(index).perform
+              end
+            end
+          end
+        end
+
+        private
+
+        def lease_timeout
+          TIMEOUT_PER_ACTION
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/reindexing/reindex_action.rb b/lib/gitlab/database/reindexing/reindex_action.rb
new file mode 100644
index 00000000000..0928ef90e5d
--- /dev/null
+++ b/lib/gitlab/database/reindexing/reindex_action.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    module Reindexing
+      class ReindexAction < ActiveRecord::Base
+        self.table_name = 'postgres_reindex_actions'
+
+        enum state: { started: 0, finished: 1, failed: 2 }
+
+        def self.keep_track_of(index, &block)
+          action = create!(
+            index_identifier: index.identifier,
+            action_start: Time.zone.now,
+            ondisk_size_bytes_start: index.ondisk_size_bytes
+          )
+
+          yield
+
+          action.state = :finished
+        rescue
+          action.state = :failed
+          raise
+        ensure
+          index.reload # rubocop:disable Cop/ActiveRecordAssociationReload
+
+          action.action_end = Time.zone.now
+          action.ondisk_size_bytes_end = index.ondisk_size_bytes
+
+          action.save!
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb
index dda4d8eecdb..3d929c62933 100644
--- a/lib/gitlab/database/schema_helpers.rb
+++ b/lib/gitlab/database/schema_helpers.rb
@@ -32,11 +32,14 @@ module Gitlab
       def trigger_exists?(table_name, name)
         connection.select_value(<<~SQL)
           SELECT 1
-          FROM pg_trigger
-          INNER JOIN pg_class
-            ON pg_trigger.tgrelid = pg_class.oid
-          WHERE pg_class.relname = '#{table_name}'
-            AND pg_trigger.tgname = '#{name}'
+          FROM pg_catalog.pg_trigger trgr
+            INNER JOIN pg_catalog.pg_class rel
+              ON trgr.tgrelid = rel.oid
+            INNER JOIN pg_catalog.pg_namespace nsp
+              ON nsp.oid = rel.relnamespace
+          WHERE nsp.nspname = #{connection.quote(current_schema)}
+            AND rel.relname = #{connection.quote(table_name)}
+            AND trgr.tgname = #{connection.quote(name)}
         SQL
       end
 
@@ -68,10 +71,10 @@ module Gitlab
       end
 
       def with_lock_retries(&block)
-        Gitlab::Database::WithLockRetries.new({
+        Gitlab::Database::WithLockRetries.new(
           klass: self.class,
           logger: Gitlab::BackgroundMigration::Logger
-        }).run(&block)
+        ).run(&block)
       end
 
       def assert_not_in_transaction_block(scope:)
diff --git a/lib/gitlab/database/similarity_score.rb b/lib/gitlab/database/similarity_score.rb
index 2633c29438a..ff78fd0218c 100644
--- a/lib/gitlab/database/similarity_score.rb
+++ b/lib/gitlab/database/similarity_score.rb
@@ -6,6 +6,11 @@ module Gitlab
       EMPTY_STRING = Arel.sql("''").freeze
       EXPRESSION_ON_INVALID_INPUT = Arel::Nodes::NamedFunction.new('CAST', [Arel.sql('0').as('integer')]).freeze
       DEFAULT_MULTIPLIER = 1
+      DISPLAY_NAME = self.name.underscore.freeze
+
+      # Adds a "magic" comment in the generated SQL expression in order to be able to tell if we're sorting by similarity.
+      # Example: /* gitlab/database/similarity_score */ SIMILARITY(COALESCE...
+      SIMILARITY_FUNCTION_CALL_WITH_ANNOTATION = "/* #{DISPLAY_NAME} */ SIMILARITY".freeze
 
       # This method returns an Arel expression that can be used in an ActiveRecord query to order the resultset by similarity.
       #
@@ -74,6 +79,10 @@ module Gitlab
         end
       end
 
+      def self.order_by_similarity?(arel_query)
+        arel_query.to_sql.include?(SIMILARITY_FUNCTION_CALL_WITH_ANNOTATION)
+      end
+
       # (SIMILARITY(COALESCE(column, ''), 'search_string') * CAST(multiplier AS numeric))
       def self.rule_to_arel(search, rule)
         Arel::Nodes::Grouping.new(
@@ -91,7 +100,7 @@ module Gitlab
 
       # SIMILARITY(COALESCE(column, ''), 'search_string')
       def self.similarity_function_call(search, column)
-        Arel::Nodes::NamedFunction.new('SIMILARITY', [column, Arel.sql(search)])
+        Arel::Nodes::NamedFunction.new(SIMILARITY_FUNCTION_CALL_WITH_ANNOTATION, [column, Arel.sql(search)])
       end
 
       # CAST(multiplier AS numeric)
diff --git a/lib/gitlab/database/with_lock_retries.rb b/lib/gitlab/database/with_lock_retries.rb
index a9c86e4e267..3fb52d786ad 100644
--- a/lib/gitlab/database/with_lock_retries.rb
+++ b/lib/gitlab/database/with_lock_retries.rb
@@ -95,7 +95,7 @@ module Gitlab
           run_block_with_transaction
         rescue ActiveRecord::LockWaitTimeout
           if retry_with_lock_timeout?
-            disable_idle_in_transaction_timeout
+            disable_idle_in_transaction_timeout if ActiveRecord::Base.connection.transaction_open?
             wait_until_next_retry
             reset_db_settings
 
@@ -149,7 +149,7 @@ module Gitlab
         log(message: "Couldn't acquire lock to perform the migration", current_iteration: current_iteration)
         log(message: "Executing the migration without lock timeout", current_iteration: current_iteration)
 
-        execute("SET LOCAL lock_timeout TO '0'")
+        disable_lock_timeout if ActiveRecord::Base.connection.transaction_open?
 
         run_block
 
@@ -184,6 +184,10 @@ module Gitlab
         execute("SET LOCAL idle_in_transaction_session_timeout TO '0'")
       end
 
+      def disable_lock_timeout
+        execute("SET LOCAL lock_timeout TO '0'")
+      end
+
       def reset_db_settings
         execute('RESET idle_in_transaction_session_timeout; RESET lock_timeout')
       end
author	GitLab Bot <gitlab-bot@gitlab.com>	2020-10-21 10:08:36 +0300
committer	GitLab Bot <gitlab-bot@gitlab.com>	2020-10-21 10:08:36 +0300
commit	48aff82709769b098321c738f3444b9bdaa694c6 (patch)
tree	e00c7c43e2d9b603a5a6af576b1685e400410dee /lib/gitlab/database
parent	879f5329ee916a948223f8f43d77fba4da6cd028 (diff)