Add latest changes from gitlab-org/gitlab@13-3-stable-ee

author: GitLab Bot <gitlab-bot@gitlab.com> 2020-08-20 21:42:06 +0300
committer: GitLab Bot <gitlab-bot@gitlab.com> 2020-08-20 21:42:06 +0300
commit: 6e4e1050d9dba2b7b2523fdd1768823ab85feef4 (patch)
tree: 78be5963ec075d80116a932011d695dd33910b4e /lib/gitlab/database
parent: 1ce776de4ae122aba3f349c02c17cebeaa8ecf07 (diff)
14 files changed, 296 insertions, 44 deletions
diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb
index ab069ce1da1..1762b81b7d8 100644
--- a/lib/gitlab/database/batch_count.rb
+++ b/lib/gitlab/database/batch_count.rb
@@ -16,6 +16,7 @@
 #  batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
 #  batch_distinct_count(::Project, :creator_id)
 #  batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
+#  batch_sum(User, :sign_in_count)
 module Gitlab
   module Database
     module BatchCount
@@ -27,6 +28,10 @@ module Gitlab
         BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size, start: start, finish: finish)
       end
 
+      def batch_sum(relation, column, batch_size: nil, start: nil, finish: nil)
+        BatchCounter.new(relation, column: nil, operation: :sum, operation_args: [column]).count(batch_size: batch_size, start: start, finish: finish)
+      end
+
       class << self
         include BatchCount
       end
@@ -35,6 +40,7 @@ module Gitlab
     class BatchCounter
       FALLBACK = -1
       MIN_REQUIRED_BATCH_SIZE = 1_250
+      DEFAULT_SUM_BATCH_SIZE = 1_000
       MAX_ALLOWED_LOOPS = 10_000
       SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
       ALLOWED_MODES = [:itself, :distinct].freeze
@@ -43,13 +49,16 @@ module Gitlab
       DEFAULT_DISTINCT_BATCH_SIZE = 10_000
       DEFAULT_BATCH_SIZE = 100_000
 
-      def initialize(relation, column: nil)
+      def initialize(relation, column: nil, operation: :count, operation_args: nil)
         @relation = relation
         @column = column || relation.primary_key
+        @operation = operation
+        @operation_args = operation_args
       end
 
       def unwanted_configuration?(finish, batch_size, start)
-        batch_size <= MIN_REQUIRED_BATCH_SIZE ||
+        (@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) ||
+          (@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) ||
           (finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
           start > finish
       end
@@ -60,7 +69,7 @@ module Gitlab
         check_mode!(mode)
 
         # non-distinct have better performance
-        batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
+        batch_size ||= batch_size_for_mode_and_operation(mode, @operation)
 
         start = actual_start(start)
         finish = actual_finish(finish)
@@ -91,11 +100,17 @@ module Gitlab
 
       def batch_fetch(start, finish, mode)
         # rubocop:disable GitlabSecurity/PublicSend
-        @relation.select(@column).public_send(mode).where(between_condition(start, finish)).count
+        @relation.select(@column).public_send(mode).where(between_condition(start, finish)).send(@operation, *@operation_args)
       end
 
       private
 
+      def batch_size_for_mode_and_operation(mode, operation)
+        return DEFAULT_SUM_BATCH_SIZE if operation == :sum
+
+        mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
+      end
+
       def between_condition(start, finish)
         return @column.between(start..(finish - 1)) if @column.is_a?(Arel::Attributes::Attribute)
 
diff --git a/lib/gitlab/database/connection_timer.rb b/lib/gitlab/database/connection_timer.rb
index ef8d52ba71c..f9b893ffd0f 100644
--- a/lib/gitlab/database/connection_timer.rb
+++ b/lib/gitlab/database/connection_timer.rb
@@ -23,7 +23,7 @@ module Gitlab
         end
 
         def interval_with_randomization
-          interval + rand(RANDOMIZATION_INTERVAL) if interval.positive?
+          interval + rand(RANDOMIZATION_INTERVAL) if interval > 0
         end
 
         def current_clock_value
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 006a24da8fe..a618a3017b2 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -815,7 +815,7 @@ module Gitlab
         BEFORE INSERT OR UPDATE
         ON #{table}
         FOR EACH ROW
-        EXECUTE PROCEDURE #{trigger}()
+        EXECUTE FUNCTION #{trigger}()
         EOF
       end
 
@@ -1062,7 +1062,7 @@ into similar problems in the future (e.g. when new tables are created).
           AND pg_class.relname = '#{table}'
         SQL
 
-        connection.select_value(check_sql).positive?
+        connection.select_value(check_sql) > 0
       end
 
       # Adds a check constraint to a table
diff --git a/lib/gitlab/database/partitioning/partition_creator.rb b/lib/gitlab/database/partitioning/partition_creator.rb
index 348dd1ba660..4c1b13fe3b5 100644
--- a/lib/gitlab/database/partitioning/partition_creator.rb
+++ b/lib/gitlab/database/partitioning/partition_creator.rb
@@ -24,7 +24,7 @@ module Gitlab
         end
 
         def create_partitions
-          return unless Feature.enabled?(:postgres_dynamic_partition_creation, default_enabled: true)
+          Gitlab::AppLogger.info("Checking state of dynamic postgres partitions")
 
           models.each do |model|
             # Double-checking before getting the lease:
diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
index b676767f41d..e6d8ec55319 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
@@ -16,7 +16,9 @@ module Gitlab
         BATCH_SIZE = 50_000
 
         # Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column.
-        # One partition is created per month between the given `min_date` and `max_date`.
+        # One partition is created per month between the given `min_date` and `max_date`. Also installs a trigger on
+        # the original table to copy writes into the partitioned table. To copy over historic data from before creation
+        # of the partitioned table, use the `enqueue_partitioning_data_migration` helper in a post-deploy migration.
         #
         # A copy of the original table is required as PG currently does not support partitioning existing tables.
         #
@@ -56,10 +58,10 @@ module Gitlab
           create_range_partitioned_copy(table_name, partitioned_table_name, partition_column, primary_key)
           create_daterange_partitions(partitioned_table_name, partition_column.name, min_date, max_date)
           create_trigger_to_sync_tables(table_name, partitioned_table_name, primary_key)
-          enqueue_background_migration(table_name, partitioned_table_name, primary_key)
         end
 
-        # Clean up a partitioned copy of an existing table. This deletes the partitioned table and all partitions.
+        # Clean up a partitioned copy of an existing table. First, deletes the database function and trigger that were
+        # used to copy writes to the partitioned table, then removes the partitioned table (also removing partitions).
         #
         # Example:
         #
@@ -69,8 +71,6 @@ module Gitlab
           assert_table_is_allowed(table_name)
           assert_not_in_transaction_block(scope: ERROR_SCOPE)
 
-          cleanup_migration_jobs(table_name)
-
           with_lock_retries do
             trigger_name = make_sync_trigger_name(table_name)
             drop_trigger(table_name, trigger_name)
@@ -83,6 +83,38 @@ module Gitlab
           drop_table(partitioned_table_name)
         end
 
+        # Enqueue the background jobs that will backfill data in the partitioned table, by batch-copying records from
+        # original table. This helper should be called from a post-deploy migration.
+        #
+        # Example:
+        #
+        #   enqueue_partitioning_data_migration :audit_events
+        #
+        def enqueue_partitioning_data_migration(table_name)
+          assert_table_is_allowed(table_name)
+
+          assert_not_in_transaction_block(scope: ERROR_SCOPE)
+
+          partitioned_table_name = make_partitioned_table_name(table_name)
+          primary_key = connection.primary_key(table_name)
+          enqueue_background_migration(table_name, partitioned_table_name, primary_key)
+        end
+
+        # Cleanup a previously enqueued background migration to copy data into a partitioned table. This will not
+        # prevent the enqueued jobs from executing, but instead cleans up information in the database used to track the
+        # state of the background migration. It should be safe to also remove the partitioned table even if the
+        # background jobs are still in-progress, as the absence of the table will cause them to safely exit.
+        #
+        # Example:
+        #
+        #   cleanup_partitioning_data_migration :audit_events
+        #
+        def cleanup_partitioning_data_migration(table_name)
+          assert_table_is_allowed(table_name)
+
+          cleanup_migration_jobs(table_name)
+        end
+
         def create_hash_partitions(table_name, number_of_partitions)
           transaction do
             (0..number_of_partitions - 1).each do |partition|
diff --git a/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb b/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb
new file mode 100644
index 00000000000..59bd24d3c37
--- /dev/null
+++ b/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    module PostgresqlAdapter
+      module DumpSchemaVersionsMixin
+        extend ActiveSupport::Concern
+
+        def dump_schema_information # :nodoc:
+          versions = schema_migration.all_versions
+          Gitlab::Database::SchemaVersionFiles.touch_all(versions) if versions.any?
+
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/postgresql_adapter/schema_versions_copy_mixin.rb b/lib/gitlab/database/postgresql_adapter/schema_versions_copy_mixin.rb
deleted file mode 100644
index d8f96643dcb..00000000000
--- a/lib/gitlab/database/postgresql_adapter/schema_versions_copy_mixin.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
-  module Database
-    module PostgresqlAdapter
-      module SchemaVersionsCopyMixin
-        extend ActiveSupport::Concern
-
-        def dump_schema_information # :nodoc:
-          versions = schema_migration.all_versions
-          copy_versions_sql(versions) if versions.any?
-        end
-
-        private
-
-        def copy_versions_sql(versions)
-          sm_table = quote_table_name(schema_migration.table_name)
-
-          sql = +"COPY #{sm_table} (version) FROM STDIN;\n"
-          sql << versions.map { |v| Integer(v) }.sort.join("\n")
-          sql << "\n\\.\n"
-
-          sql
-        end
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb b/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb
new file mode 100644
index 00000000000..cf8342941c4
--- /dev/null
+++ b/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    module PostgresqlDatabaseTasks
+      module LoadSchemaVersionsMixin
+        extend ActiveSupport::Concern
+
+        def structure_load(*args)
+          super(*args)
+          Gitlab::Database::SchemaVersionFiles.load_all
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
index 6b9af51a6ab..4fbbfdc4914 100644
--- a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
+++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
@@ -45,7 +45,7 @@ module Gitlab
             reverts_for_type('namespace') do |path_before_rename, current_path|
               matches_path = MigrationClasses::Route.arel_table[:path].matches(current_path)
               namespace = MigrationClasses::Namespace.joins(:route)
-                            .find_by(matches_path)&.becomes(MigrationClasses::Namespace)
+                            .find_by(matches_path)&.becomes(MigrationClasses::Namespace) # rubocop: disable Cop/AvoidBecomes
 
               if namespace
                 perform_rename(namespace, current_path, path_before_rename)
diff --git a/lib/gitlab/database/schema_cleaner.rb b/lib/gitlab/database/schema_cleaner.rb
index ae9d77e635e..7c415287878 100644
--- a/lib/gitlab/database/schema_cleaner.rb
+++ b/lib/gitlab/database/schema_cleaner.rb
@@ -23,6 +23,11 @@ module Gitlab
         structure.gsub!(/\n{3,}/, "\n\n")
 
         io << structure
+        io << <<~MSG
+          -- schema_migrations.version information is no longer stored in this file,
+          -- but instead tracked in the db/schema_migrations directory
+          -- see https://gitlab.com/gitlab-org/gitlab/-/issues/218590 for details
+        MSG
 
         nil
       end
diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb
index 34daafd06de..dda4d8eecdb 100644
--- a/lib/gitlab/database/schema_helpers.rb
+++ b/lib/gitlab/database/schema_helpers.rb
@@ -25,7 +25,7 @@ module Gitlab
           CREATE TRIGGER #{name}
           #{fires} ON #{table_name}
           FOR EACH ROW
-          EXECUTE PROCEDURE #{function_name}()
+          EXECUTE FUNCTION #{function_name}()
         SQL
       end
 
diff --git a/lib/gitlab/database/schema_version_files.rb b/lib/gitlab/database/schema_version_files.rb
new file mode 100644
index 00000000000..27a942404ef
--- /dev/null
+++ b/lib/gitlab/database/schema_version_files.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    class SchemaVersionFiles
+      SCHEMA_DIRECTORY = 'db/schema_migrations'
+      MIGRATION_DIRECTORIES = %w[db/migrate db/post_migrate].freeze
+      MIGRATION_VERSION_GLOB = '20[0-9][0-9]*'
+
+      def self.touch_all(versions_from_database)
+        versions_from_migration_files = find_versions_from_migration_files
+
+        version_filepaths = find_version_filenames.map { |f| schema_directory.join(f) }
+        FileUtils.rm(version_filepaths)
+
+        versions_to_create = versions_from_database & versions_from_migration_files
+        versions_to_create.each do |version|
+          version_filepath = schema_directory.join(version)
+
+          File.open(version_filepath, 'w') do |file|
+            file << Digest::SHA256.hexdigest(version)
+          end
+        end
+      end
+
+      def self.load_all
+        version_filenames = find_version_filenames
+        return if version_filenames.empty?
+
+        values = version_filenames.map { |vf| "('#{connection.quote_string(vf)}')" }
+        connection.execute(<<~SQL)
+          INSERT INTO schema_migrations (version)
+          VALUES #{values.join(',')}
+          ON CONFLICT DO NOTHING
+        SQL
+      end
+
+      def self.schema_directory
+        @schema_directory ||= Rails.root.join(SCHEMA_DIRECTORY)
+      end
+
+      def self.migration_directories
+        @migration_directories ||= MIGRATION_DIRECTORIES.map { |dir| Rails.root.join(dir) }
+      end
+
+      def self.find_version_filenames
+        Dir.glob(MIGRATION_VERSION_GLOB, base: schema_directory)
+      end
+
+      def self.find_versions_from_migration_files
+        migration_directories.each_with_object([]) do |directory, migration_versions|
+          directory_migrations = Dir.glob(MIGRATION_VERSION_GLOB, base: directory)
+          directory_versions = directory_migrations.map! { |m| m.split('_').first }
+
+          migration_versions.concat(directory_versions)
+        end
+      end
+
+      def self.connection
+        ActiveRecord::Base.connection
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/similarity_score.rb b/lib/gitlab/database/similarity_score.rb
new file mode 100644
index 00000000000..2633c29438a
--- /dev/null
+++ b/lib/gitlab/database/similarity_score.rb
@@ -0,0 +1,110 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    class SimilarityScore
+      EMPTY_STRING = Arel.sql("''").freeze
+      EXPRESSION_ON_INVALID_INPUT = Arel::Nodes::NamedFunction.new('CAST', [Arel.sql('0').as('integer')]).freeze
+      DEFAULT_MULTIPLIER = 1
+
+      # This method returns an Arel expression that can be used in an ActiveRecord query to order the resultset by similarity.
+      #
+      # Note: Calculating similarity score for large volume of records is inefficient. use SimilarityScore only for smaller
+      # resultset which is already filtered by other conditions (< 10_000 records).
+      #
+      # ==== Parameters
+      # * +search+ - [String] the user provided search string
+      # * +rules+ - [{ column: COLUMN, multiplier: 1 }, { column: COLUMN_2, multiplier: 0.5 }] rules for the scoring.
+      #   * +column+ - Arel column expression, example: Project.arel_table["name"]
+      #   * +multiplier+ - Integer or Float to increase or decrease the score (optional, defaults to 1)
+      #
+      # ==== Use case
+      #
+      # We'd like to search for projects by path, name and description. We want to rank higher the path and name matches, since
+      # it's more likely that the user was remembering the path or the name of the project.
+      #
+      # Rules:
+      #   [
+      #     { column: Project.arel_table['path'], multiplier: 1 },
+      #     { column: Project.arel_table['name'], multiplier: 1 },
+      #     { column: Project.arel_table['description'], multiplier: 0.5 }
+      #   ]
+      #
+      # ==== Examples
+      #
+      #  Similarity calculation based on one column:
+      #
+      #  Gitlab::Database::SimilarityScore.build_expession(search: 'my input', rules: [{ column: Project.arel_table['name'] }])
+      #
+      #  Similarity calculation based on two column, where the second column has lower priority:
+      #
+      #  Gitlab::Database::SimilarityScore.build_expession(search: 'my input', rules: [
+      #    { column: Project.arel_table['name'], multiplier: 1 },
+      #    { column: Project.arel_table['description'], multiplier: 0.5 }
+      #  ])
+      #
+      #  Integration with an ActiveRecord query:
+      #
+      #  table = Project.arel_table
+      #
+      #  order_expression = Gitlab::Database::SimilarityScore.build_expession(search: 'input', rules: [
+      #    { column: table['name'], multiplier: 1 },
+      #    { column: table['description'], multiplier: 0.5 }
+      #  ])
+      #
+      #  Project.where("name LIKE ?", '%' + 'input' + '%').order(order_expression.desc)
+      #
+      #  The expression can be also used in SELECT:
+      #
+      #  results = Project.select(order_expression.as('similarity')).where("name LIKE ?", '%' + 'input' + '%').order(similarity: :desc)
+      #  puts results.map(&:similarity)
+      #
+      def self.build_expression(search:, rules:)
+        return EXPRESSION_ON_INVALID_INPUT if search.blank? || rules.empty?
+
+        quoted_search = ActiveRecord::Base.connection.quote(search.to_s)
+
+        first_expression, *expressions = rules.map do |rule|
+          rule_to_arel(quoted_search, rule)
+        end
+
+        # (SIMILARITY ...) + (SIMILARITY ...)
+        expressions.inject(first_expression) do |expression1, expression2|
+          Arel::Nodes::Addition.new(expression1, expression2)
+        end
+      end
+
+      # (SIMILARITY(COALESCE(column, ''), 'search_string') * CAST(multiplier AS numeric))
+      def self.rule_to_arel(search, rule)
+        Arel::Nodes::Grouping.new(
+          Arel::Nodes::Multiplication.new(
+            similarity_function_call(search, column_expression(rule)),
+            multiplier_expression(rule)
+          )
+        )
+      end
+
+      # COALESCE(column, '')
+      def self.column_expression(rule)
+        Arel::Nodes::NamedFunction.new('COALESCE', [rule.fetch(:column), EMPTY_STRING])
+      end
+
+      # SIMILARITY(COALESCE(column, ''), 'search_string')
+      def self.similarity_function_call(search, column)
+        Arel::Nodes::NamedFunction.new('SIMILARITY', [column, Arel.sql(search)])
+      end
+
+      # CAST(multiplier AS numeric)
+      def self.multiplier_expression(rule)
+        quoted_multiplier = ActiveRecord::Base.connection.quote(rule.fetch(:multiplier, DEFAULT_MULTIPLIER).to_s)
+
+        Arel::Nodes::NamedFunction.new('CAST', [Arel.sql(quoted_multiplier).as('numeric')])
+      end
+
+      private_class_method :rule_to_arel
+      private_class_method :column_expression
+      private_class_method :similarity_function_call
+      private_class_method :multiplier_expression
+    end
+  end
+end
diff --git a/lib/gitlab/database/with_lock_retries.rb b/lib/gitlab/database/with_lock_retries.rb
index bebcba6f42e..a9c86e4e267 100644
--- a/lib/gitlab/database/with_lock_retries.rb
+++ b/lib/gitlab/database/with_lock_retries.rb
@@ -2,7 +2,14 @@
 
 module Gitlab
   module Database
+    # This class provides a way to automatically execute code that relies on acquiring a database lock in a way
+    # designed to minimize impact on a busy production database.
+    #
+    # A default timing configuration is provided that makes repeated attempts to acquire the necessary lock, with
+    # varying lock_timeout settings, and also serves to limit the maximum number of attempts.
     class WithLockRetries
+      AttemptsExhaustedError = Class.new(StandardError)
+
       NULL_LOGGER = Gitlab::JsonLogger.new('/dev/null')
 
       # Each element of the array represents a retry iteration.
@@ -63,7 +70,17 @@ module Gitlab
         @log_params = { method: 'with_lock_retries', class: klass.to_s }
       end
 
-      def run(&block)
+      # Executes a block of code, retrying it whenever a database lock can't be acquired in time
+      #
+      # When a database lock can't be acquired, ActiveRecord throws ActiveRecord::LockWaitTimeout
+      # exception which we intercept to re-execute the block of code, until it finishes or we reach the
+      # max attempt limit. The default behavior when max attempts have been reached is to make a final attempt with the
+      # lock_timeout disabled, but this can be altered with the raise_on_exhaustion parameter.
+      #
+      # @see DEFAULT_TIMING_CONFIGURATION for the timings used when attempting a retry
+      # @param [Boolean] raise_on_exhaustion whether to raise `AttemptsExhaustedError` when exhausting max attempts
+      # @param [Proc] block of code that will be executed
+      def run(raise_on_exhaustion: false, &block)
         raise 'no block given' unless block_given?
 
         @block = block
@@ -85,6 +102,9 @@ module Gitlab
             retry
           else
             reset_db_settings
+
+            raise AttemptsExhaustedError, 'configured attempts to obtain locks are exhausted' if raise_on_exhaustion
+
             run_block_without_lock_timeout
           end
author	GitLab Bot <gitlab-bot@gitlab.com>	2020-08-20 21:42:06 +0300
committer	GitLab Bot <gitlab-bot@gitlab.com>	2020-08-20 21:42:06 +0300
commit	6e4e1050d9dba2b7b2523fdd1768823ab85feef4 (patch)
tree	78be5963ec075d80116a932011d695dd33910b4e /lib/gitlab/database
parent	1ce776de4ae122aba3f349c02c17cebeaa8ecf07 (diff)