Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/batch_count.rb23
-rw-r--r--lib/gitlab/database/connection_timer.rb2
-rw-r--r--lib/gitlab/database/migration_helpers.rb4
-rw-r--r--lib/gitlab/database/partitioning/partition_creator.rb2
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb42
-rw-r--r--lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb18
-rw-r--r--lib/gitlab/database/postgresql_adapter/schema_versions_copy_mixin.rb28
-rw-r--r--lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb16
-rw-r--r--lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb2
-rw-r--r--lib/gitlab/database/schema_cleaner.rb5
-rw-r--r--lib/gitlab/database/schema_helpers.rb2
-rw-r--r--lib/gitlab/database/schema_version_files.rb64
-rw-r--r--lib/gitlab/database/similarity_score.rb110
-rw-r--r--lib/gitlab/database/with_lock_retries.rb22
14 files changed, 296 insertions, 44 deletions
diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb
index ab069ce1da1..1762b81b7d8 100644
--- a/lib/gitlab/database/batch_count.rb
+++ b/lib/gitlab/database/batch_count.rb
@@ -16,6 +16,7 @@
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
+# batch_sum(User, :sign_in_count)
module Gitlab
module Database
module BatchCount
@@ -27,6 +28,10 @@ module Gitlab
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size, start: start, finish: finish)
end
+ def batch_sum(relation, column, batch_size: nil, start: nil, finish: nil)
+ BatchCounter.new(relation, column: nil, operation: :sum, operation_args: [column]).count(batch_size: batch_size, start: start, finish: finish)
+ end
+
class << self
include BatchCount
end
@@ -35,6 +40,7 @@ module Gitlab
class BatchCounter
FALLBACK = -1
MIN_REQUIRED_BATCH_SIZE = 1_250
+ DEFAULT_SUM_BATCH_SIZE = 1_000
MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
ALLOWED_MODES = [:itself, :distinct].freeze
@@ -43,13 +49,16 @@ module Gitlab
DEFAULT_DISTINCT_BATCH_SIZE = 10_000
DEFAULT_BATCH_SIZE = 100_000
- def initialize(relation, column: nil)
+ def initialize(relation, column: nil, operation: :count, operation_args: nil)
@relation = relation
@column = column || relation.primary_key
+ @operation = operation
+ @operation_args = operation_args
end
def unwanted_configuration?(finish, batch_size, start)
- batch_size <= MIN_REQUIRED_BATCH_SIZE ||
+ (@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) ||
+ (@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) ||
(finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
start > finish
end
@@ -60,7 +69,7 @@ module Gitlab
check_mode!(mode)
# non-distinct have better performance
- batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
+ batch_size ||= batch_size_for_mode_and_operation(mode, @operation)
start = actual_start(start)
finish = actual_finish(finish)
@@ -91,11 +100,17 @@ module Gitlab
def batch_fetch(start, finish, mode)
# rubocop:disable GitlabSecurity/PublicSend
- @relation.select(@column).public_send(mode).where(between_condition(start, finish)).count
+ @relation.select(@column).public_send(mode).where(between_condition(start, finish)).send(@operation, *@operation_args)
end
private
+ def batch_size_for_mode_and_operation(mode, operation)
+ return DEFAULT_SUM_BATCH_SIZE if operation == :sum
+
+ mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
+ end
+
def between_condition(start, finish)
return @column.between(start..(finish - 1)) if @column.is_a?(Arel::Attributes::Attribute)
diff --git a/lib/gitlab/database/connection_timer.rb b/lib/gitlab/database/connection_timer.rb
index ef8d52ba71c..f9b893ffd0f 100644
--- a/lib/gitlab/database/connection_timer.rb
+++ b/lib/gitlab/database/connection_timer.rb
@@ -23,7 +23,7 @@ module Gitlab
end
def interval_with_randomization
- interval + rand(RANDOMIZATION_INTERVAL) if interval.positive?
+ interval + rand(RANDOMIZATION_INTERVAL) if interval > 0
end
def current_clock_value
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 006a24da8fe..a618a3017b2 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -815,7 +815,7 @@ module Gitlab
BEFORE INSERT OR UPDATE
ON #{table}
FOR EACH ROW
- EXECUTE PROCEDURE #{trigger}()
+ EXECUTE FUNCTION #{trigger}()
EOF
end
@@ -1062,7 +1062,7 @@ into similar problems in the future (e.g. when new tables are created).
AND pg_class.relname = '#{table}'
SQL
- connection.select_value(check_sql).positive?
+ connection.select_value(check_sql) > 0
end
# Adds a check constraint to a table
diff --git a/lib/gitlab/database/partitioning/partition_creator.rb b/lib/gitlab/database/partitioning/partition_creator.rb
index 348dd1ba660..4c1b13fe3b5 100644
--- a/lib/gitlab/database/partitioning/partition_creator.rb
+++ b/lib/gitlab/database/partitioning/partition_creator.rb
@@ -24,7 +24,7 @@ module Gitlab
end
def create_partitions
- return unless Feature.enabled?(:postgres_dynamic_partition_creation, default_enabled: true)
+ Gitlab::AppLogger.info("Checking state of dynamic postgres partitions")
models.each do |model|
# Double-checking before getting the lease:
diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
index b676767f41d..e6d8ec55319 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
@@ -16,7 +16,9 @@ module Gitlab
BATCH_SIZE = 50_000
# Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column.
- # One partition is created per month between the given `min_date` and `max_date`.
+ # One partition is created per month between the given `min_date` and `max_date`. Also installs a trigger on
+ # the original table to copy writes into the partitioned table. To copy over historic data from before creation
+ # of the partitioned table, use the `enqueue_partitioning_data_migration` helper in a post-deploy migration.
#
# A copy of the original table is required as PG currently does not support partitioning existing tables.
#
@@ -56,10 +58,10 @@ module Gitlab
create_range_partitioned_copy(table_name, partitioned_table_name, partition_column, primary_key)
create_daterange_partitions(partitioned_table_name, partition_column.name, min_date, max_date)
create_trigger_to_sync_tables(table_name, partitioned_table_name, primary_key)
- enqueue_background_migration(table_name, partitioned_table_name, primary_key)
end
- # Clean up a partitioned copy of an existing table. This deletes the partitioned table and all partitions.
+ # Clean up a partitioned copy of an existing table. First, deletes the database function and trigger that were
+ # used to copy writes to the partitioned table, then removes the partitioned table (also removing partitions).
#
# Example:
#
@@ -69,8 +71,6 @@ module Gitlab
assert_table_is_allowed(table_name)
assert_not_in_transaction_block(scope: ERROR_SCOPE)
- cleanup_migration_jobs(table_name)
-
with_lock_retries do
trigger_name = make_sync_trigger_name(table_name)
drop_trigger(table_name, trigger_name)
@@ -83,6 +83,38 @@ module Gitlab
drop_table(partitioned_table_name)
end
+ # Enqueue the background jobs that will backfill data in the partitioned table, by batch-copying records from
+ # original table. This helper should be called from a post-deploy migration.
+ #
+ # Example:
+ #
+ # enqueue_partitioning_data_migration :audit_events
+ #
+ def enqueue_partitioning_data_migration(table_name)
+ assert_table_is_allowed(table_name)
+
+ assert_not_in_transaction_block(scope: ERROR_SCOPE)
+
+ partitioned_table_name = make_partitioned_table_name(table_name)
+ primary_key = connection.primary_key(table_name)
+ enqueue_background_migration(table_name, partitioned_table_name, primary_key)
+ end
+
+ # Cleanup a previously enqueued background migration to copy data into a partitioned table. This will not
+ # prevent the enqueued jobs from executing, but instead cleans up information in the database used to track the
+ # state of the background migration. It should be safe to also remove the partitioned table even if the
+ # background jobs are still in-progress, as the absence of the table will cause them to safely exit.
+ #
+ # Example:
+ #
+ # cleanup_partitioning_data_migration :audit_events
+ #
+ def cleanup_partitioning_data_migration(table_name)
+ assert_table_is_allowed(table_name)
+
+ cleanup_migration_jobs(table_name)
+ end
+
def create_hash_partitions(table_name, number_of_partitions)
transaction do
(0..number_of_partitions - 1).each do |partition|
diff --git a/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb b/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb
new file mode 100644
index 00000000000..59bd24d3c37
--- /dev/null
+++ b/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module PostgresqlAdapter
+ module DumpSchemaVersionsMixin
+ extend ActiveSupport::Concern
+
+ def dump_schema_information # :nodoc:
+ versions = schema_migration.all_versions
+ Gitlab::Database::SchemaVersionFiles.touch_all(versions) if versions.any?
+
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/postgresql_adapter/schema_versions_copy_mixin.rb b/lib/gitlab/database/postgresql_adapter/schema_versions_copy_mixin.rb
deleted file mode 100644
index d8f96643dcb..00000000000
--- a/lib/gitlab/database/postgresql_adapter/schema_versions_copy_mixin.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module Database
- module PostgresqlAdapter
- module SchemaVersionsCopyMixin
- extend ActiveSupport::Concern
-
- def dump_schema_information # :nodoc:
- versions = schema_migration.all_versions
- copy_versions_sql(versions) if versions.any?
- end
-
- private
-
- def copy_versions_sql(versions)
- sm_table = quote_table_name(schema_migration.table_name)
-
- sql = +"COPY #{sm_table} (version) FROM STDIN;\n"
- sql << versions.map { |v| Integer(v) }.sort.join("\n")
- sql << "\n\\.\n"
-
- sql
- end
- end
- end
- end
-end
diff --git a/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb b/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb
new file mode 100644
index 00000000000..cf8342941c4
--- /dev/null
+++ b/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module PostgresqlDatabaseTasks
+ module LoadSchemaVersionsMixin
+ extend ActiveSupport::Concern
+
+ def structure_load(*args)
+ super(*args)
+ Gitlab::Database::SchemaVersionFiles.load_all
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
index 6b9af51a6ab..4fbbfdc4914 100644
--- a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
+++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb
@@ -45,7 +45,7 @@ module Gitlab
reverts_for_type('namespace') do |path_before_rename, current_path|
matches_path = MigrationClasses::Route.arel_table[:path].matches(current_path)
namespace = MigrationClasses::Namespace.joins(:route)
- .find_by(matches_path)&.becomes(MigrationClasses::Namespace)
+ .find_by(matches_path)&.becomes(MigrationClasses::Namespace) # rubocop: disable Cop/AvoidBecomes
if namespace
perform_rename(namespace, current_path, path_before_rename)
diff --git a/lib/gitlab/database/schema_cleaner.rb b/lib/gitlab/database/schema_cleaner.rb
index ae9d77e635e..7c415287878 100644
--- a/lib/gitlab/database/schema_cleaner.rb
+++ b/lib/gitlab/database/schema_cleaner.rb
@@ -23,6 +23,11 @@ module Gitlab
structure.gsub!(/\n{3,}/, "\n\n")
io << structure
+ io << <<~MSG
+ -- schema_migrations.version information is no longer stored in this file,
+ -- but instead tracked in the db/schema_migrations directory
+ -- see https://gitlab.com/gitlab-org/gitlab/-/issues/218590 for details
+ MSG
nil
end
diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb
index 34daafd06de..dda4d8eecdb 100644
--- a/lib/gitlab/database/schema_helpers.rb
+++ b/lib/gitlab/database/schema_helpers.rb
@@ -25,7 +25,7 @@ module Gitlab
CREATE TRIGGER #{name}
#{fires} ON #{table_name}
FOR EACH ROW
- EXECUTE PROCEDURE #{function_name}()
+ EXECUTE FUNCTION #{function_name}()
SQL
end
diff --git a/lib/gitlab/database/schema_version_files.rb b/lib/gitlab/database/schema_version_files.rb
new file mode 100644
index 00000000000..27a942404ef
--- /dev/null
+++ b/lib/gitlab/database/schema_version_files.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class SchemaVersionFiles
+ SCHEMA_DIRECTORY = 'db/schema_migrations'
+ MIGRATION_DIRECTORIES = %w[db/migrate db/post_migrate].freeze
+ MIGRATION_VERSION_GLOB = '20[0-9][0-9]*'
+
+ def self.touch_all(versions_from_database)
+ versions_from_migration_files = find_versions_from_migration_files
+
+ version_filepaths = find_version_filenames.map { |f| schema_directory.join(f) }
+ FileUtils.rm(version_filepaths)
+
+ versions_to_create = versions_from_database & versions_from_migration_files
+ versions_to_create.each do |version|
+ version_filepath = schema_directory.join(version)
+
+ File.open(version_filepath, 'w') do |file|
+ file << Digest::SHA256.hexdigest(version)
+ end
+ end
+ end
+
+ def self.load_all
+ version_filenames = find_version_filenames
+ return if version_filenames.empty?
+
+ values = version_filenames.map { |vf| "('#{connection.quote_string(vf)}')" }
+ connection.execute(<<~SQL)
+ INSERT INTO schema_migrations (version)
+ VALUES #{values.join(',')}
+ ON CONFLICT DO NOTHING
+ SQL
+ end
+
+ def self.schema_directory
+ @schema_directory ||= Rails.root.join(SCHEMA_DIRECTORY)
+ end
+
+ def self.migration_directories
+ @migration_directories ||= MIGRATION_DIRECTORIES.map { |dir| Rails.root.join(dir) }
+ end
+
+ def self.find_version_filenames
+ Dir.glob(MIGRATION_VERSION_GLOB, base: schema_directory)
+ end
+
+ def self.find_versions_from_migration_files
+ migration_directories.each_with_object([]) do |directory, migration_versions|
+ directory_migrations = Dir.glob(MIGRATION_VERSION_GLOB, base: directory)
+ directory_versions = directory_migrations.map! { |m| m.split('_').first }
+
+ migration_versions.concat(directory_versions)
+ end
+ end
+
+ def self.connection
+ ActiveRecord::Base.connection
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/similarity_score.rb b/lib/gitlab/database/similarity_score.rb
new file mode 100644
index 00000000000..2633c29438a
--- /dev/null
+++ b/lib/gitlab/database/similarity_score.rb
@@ -0,0 +1,110 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ class SimilarityScore
+ EMPTY_STRING = Arel.sql("''").freeze
+ EXPRESSION_ON_INVALID_INPUT = Arel::Nodes::NamedFunction.new('CAST', [Arel.sql('0').as('integer')]).freeze
+ DEFAULT_MULTIPLIER = 1
+
+ # This method returns an Arel expression that can be used in an ActiveRecord query to order the resultset by similarity.
+ #
+ # Note: Calculating similarity score for large volume of records is inefficient. use SimilarityScore only for smaller
+ # resultset which is already filtered by other conditions (< 10_000 records).
+ #
+ # ==== Parameters
+ # * +search+ - [String] the user provided search string
+ # * +rules+ - [{ column: COLUMN, multiplier: 1 }, { column: COLUMN_2, multiplier: 0.5 }] rules for the scoring.
+ # * +column+ - Arel column expression, example: Project.arel_table["name"]
+ # * +multiplier+ - Integer or Float to increase or decrease the score (optional, defaults to 1)
+ #
+ # ==== Use case
+ #
+ # We'd like to search for projects by path, name and description. We want to rank higher the path and name matches, since
+ # it's more likely that the user was remembering the path or the name of the project.
+ #
+ # Rules:
+ # [
+ # { column: Project.arel_table['path'], multiplier: 1 },
+ # { column: Project.arel_table['name'], multiplier: 1 },
+ # { column: Project.arel_table['description'], multiplier: 0.5 }
+ # ]
+ #
+ # ==== Examples
+ #
+ # Similarity calculation based on one column:
+ #
+ # Gitlab::Database::SimilarityScore.build_expession(search: 'my input', rules: [{ column: Project.arel_table['name'] }])
+ #
+ # Similarity calculation based on two column, where the second column has lower priority:
+ #
+ # Gitlab::Database::SimilarityScore.build_expession(search: 'my input', rules: [
+ # { column: Project.arel_table['name'], multiplier: 1 },
+ # { column: Project.arel_table['description'], multiplier: 0.5 }
+ # ])
+ #
+ # Integration with an ActiveRecord query:
+ #
+ # table = Project.arel_table
+ #
+ # order_expression = Gitlab::Database::SimilarityScore.build_expession(search: 'input', rules: [
+ # { column: table['name'], multiplier: 1 },
+ # { column: table['description'], multiplier: 0.5 }
+ # ])
+ #
+ # Project.where("name LIKE ?", '%' + 'input' + '%').order(order_expression.desc)
+ #
+ # The expression can be also used in SELECT:
+ #
+ # results = Project.select(order_expression.as('similarity')).where("name LIKE ?", '%' + 'input' + '%').order(similarity: :desc)
+ # puts results.map(&:similarity)
+ #
+ def self.build_expression(search:, rules:)
+ return EXPRESSION_ON_INVALID_INPUT if search.blank? || rules.empty?
+
+ quoted_search = ActiveRecord::Base.connection.quote(search.to_s)
+
+ first_expression, *expressions = rules.map do |rule|
+ rule_to_arel(quoted_search, rule)
+ end
+
+ # (SIMILARITY ...) + (SIMILARITY ...)
+ expressions.inject(first_expression) do |expression1, expression2|
+ Arel::Nodes::Addition.new(expression1, expression2)
+ end
+ end
+
+ # (SIMILARITY(COALESCE(column, ''), 'search_string') * CAST(multiplier AS numeric))
+ def self.rule_to_arel(search, rule)
+ Arel::Nodes::Grouping.new(
+ Arel::Nodes::Multiplication.new(
+ similarity_function_call(search, column_expression(rule)),
+ multiplier_expression(rule)
+ )
+ )
+ end
+
+ # COALESCE(column, '')
+ def self.column_expression(rule)
+ Arel::Nodes::NamedFunction.new('COALESCE', [rule.fetch(:column), EMPTY_STRING])
+ end
+
+ # SIMILARITY(COALESCE(column, ''), 'search_string')
+ def self.similarity_function_call(search, column)
+ Arel::Nodes::NamedFunction.new('SIMILARITY', [column, Arel.sql(search)])
+ end
+
+ # CAST(multiplier AS numeric)
+ def self.multiplier_expression(rule)
+ quoted_multiplier = ActiveRecord::Base.connection.quote(rule.fetch(:multiplier, DEFAULT_MULTIPLIER).to_s)
+
+ Arel::Nodes::NamedFunction.new('CAST', [Arel.sql(quoted_multiplier).as('numeric')])
+ end
+
+ private_class_method :rule_to_arel
+ private_class_method :column_expression
+ private_class_method :similarity_function_call
+ private_class_method :multiplier_expression
+ end
+ end
+end
diff --git a/lib/gitlab/database/with_lock_retries.rb b/lib/gitlab/database/with_lock_retries.rb
index bebcba6f42e..a9c86e4e267 100644
--- a/lib/gitlab/database/with_lock_retries.rb
+++ b/lib/gitlab/database/with_lock_retries.rb
@@ -2,7 +2,14 @@
module Gitlab
module Database
+ # This class provides a way to automatically execute code that relies on acquiring a database lock in a way
+ # designed to minimize impact on a busy production database.
+ #
+ # A default timing configuration is provided that makes repeated attempts to acquire the necessary lock, with
+ # varying lock_timeout settings, and also serves to limit the maximum number of attempts.
class WithLockRetries
+ AttemptsExhaustedError = Class.new(StandardError)
+
NULL_LOGGER = Gitlab::JsonLogger.new('/dev/null')
# Each element of the array represents a retry iteration.
@@ -63,7 +70,17 @@ module Gitlab
@log_params = { method: 'with_lock_retries', class: klass.to_s }
end
- def run(&block)
+ # Executes a block of code, retrying it whenever a database lock can't be acquired in time
+ #
+ # When a database lock can't be acquired, ActiveRecord throws ActiveRecord::LockWaitTimeout
+ # exception which we intercept to re-execute the block of code, until it finishes or we reach the
+ # max attempt limit. The default behavior when max attempts have been reached is to make a final attempt with the
+ # lock_timeout disabled, but this can be altered with the raise_on_exhaustion parameter.
+ #
+ # @see DEFAULT_TIMING_CONFIGURATION for the timings used when attempting a retry
+ # @param [Boolean] raise_on_exhaustion whether to raise `AttemptsExhaustedError` when exhausting max attempts
+ # @param [Proc] block of code that will be executed
+ def run(raise_on_exhaustion: false, &block)
raise 'no block given' unless block_given?
@block = block
@@ -85,6 +102,9 @@ module Gitlab
retry
else
reset_db_settings
+
+ raise AttemptsExhaustedError, 'configured attempts to obtain locks are exhausted' if raise_on_exhaustion
+
run_block_without_lock_timeout
end