Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2021-03-16 21:18:33 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2021-03-16 21:18:33 +0300
commitf64a639bcfa1fc2bc89ca7db268f594306edfd7c (patch)
treea2c3c2ebcc3b45e596949db485d6ed18ffaacfa1 /lib/gitlab/database
parentbfbc3e0d6583ea1a91f627528bedc3d65ba4b10f (diff)
Add latest changes from gitlab-org/gitlab@13-10-stable-eev13.10.0-rc40
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/background_migration/batched_job.rb23
-rw-r--r--lib/gitlab/database/background_migration/batched_migration.rb56
-rw-r--r--lib/gitlab/database/background_migration/batched_migration_wrapper.rb46
-rw-r--r--lib/gitlab/database/background_migration/scheduler.rb60
-rw-r--r--lib/gitlab/database/migration_helpers.rb5
-rw-r--r--lib/gitlab/database/migrations/background_migration_helpers.rb87
-rw-r--r--lib/gitlab/database/migrations/observation.rb3
-rw-r--r--lib/gitlab/database/migrations/observers.rb3
-rw-r--r--lib/gitlab/database/migrations/observers/query_statistics.rb38
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb2
-rw-r--r--lib/gitlab/database/similarity_score.rb7
11 files changed, 319 insertions, 11 deletions
diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb
new file mode 100644
index 00000000000..3b624df2bfd
--- /dev/null
+++ b/lib/gitlab/database/background_migration/batched_job.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module BackgroundMigration
+ class BatchedJob < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord
+ self.table_name = :batched_background_migration_jobs
+
+ belongs_to :batched_migration, foreign_key: :batched_background_migration_id
+
+ enum status: {
+ pending: 0,
+ running: 1,
+ failed: 2,
+ succeeded: 3
+ }
+
+ delegate :aborted?, :job_class, :table_name, :column_name, :job_arguments,
+ to: :batched_migration, prefix: :migration
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb
new file mode 100644
index 00000000000..0c9add9b355
--- /dev/null
+++ b/lib/gitlab/database/background_migration/batched_migration.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module BackgroundMigration
+ class BatchedMigration < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord
+ JOB_CLASS_MODULE = 'Gitlab::BackgroundMigration'
+ BATCH_CLASS_MODULE = "#{JOB_CLASS_MODULE}::BatchingStrategies".freeze
+
+ self.table_name = :batched_background_migrations
+
+ has_many :batched_jobs, foreign_key: :batched_background_migration_id
+ has_one :last_job, -> { order(id: :desc) },
+ class_name: 'Gitlab::Database::BackgroundMigration::BatchedJob',
+ foreign_key: :batched_background_migration_id
+
+ scope :queue_order, -> { order(id: :asc) }
+
+ enum status: {
+ paused: 0,
+ active: 1,
+ aborted: 2,
+ finished: 3
+ }
+
+ def interval_elapsed?
+ last_job.nil? || last_job.created_at <= Time.current - interval
+ end
+
+ def create_batched_job!(min, max)
+ batched_jobs.create!(min_value: min, max_value: max, batch_size: batch_size, sub_batch_size: sub_batch_size)
+ end
+
+ def next_min_value
+ last_job&.max_value&.next || min_value
+ end
+
+ def job_class
+ "#{JOB_CLASS_MODULE}::#{job_class_name}".constantize
+ end
+
+ def batch_class
+ "#{BATCH_CLASS_MODULE}::#{batch_class_name}".constantize
+ end
+
+ def job_class_name=(class_name)
+ write_attribute(:job_class_name, class_name.demodulize)
+ end
+
+ def batch_class_name=(class_name)
+ write_attribute(:batch_class_name, class_name.demodulize)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb
new file mode 100644
index 00000000000..299bd992197
--- /dev/null
+++ b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module BackgroundMigration
+ class BatchedMigrationWrapper
+ def perform(batch_tracking_record)
+ start_tracking_execution(batch_tracking_record)
+
+ execute_batch(batch_tracking_record)
+
+ batch_tracking_record.status = :succeeded
+ rescue => e
+ batch_tracking_record.status = :failed
+
+ raise e
+ ensure
+ finish_tracking_execution(batch_tracking_record)
+ end
+
+ private
+
+ def start_tracking_execution(tracking_record)
+ tracking_record.update!(attempts: tracking_record.attempts + 1, status: :running, started_at: Time.current)
+ end
+
+ def execute_batch(tracking_record)
+ job_instance = tracking_record.migration_job_class.new
+
+ job_instance.perform(
+ tracking_record.min_value,
+ tracking_record.max_value,
+ tracking_record.migration_table_name,
+ tracking_record.migration_column_name,
+ tracking_record.sub_batch_size,
+ *tracking_record.migration_job_arguments)
+ end
+
+ def finish_tracking_execution(tracking_record)
+ tracking_record.finished_at = Time.current
+ tracking_record.save!
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/background_migration/scheduler.rb b/lib/gitlab/database/background_migration/scheduler.rb
new file mode 100644
index 00000000000..5f8a5ec06a5
--- /dev/null
+++ b/lib/gitlab/database/background_migration/scheduler.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module BackgroundMigration
+ class Scheduler
+ def perform(migration_wrapper: BatchedMigrationWrapper.new)
+ active_migration = BatchedMigration.active.queue_order.first
+
+ return unless active_migration&.interval_elapsed?
+
+ if next_batched_job = create_next_batched_job!(active_migration)
+ migration_wrapper.perform(next_batched_job)
+ else
+ finish_active_migration(active_migration)
+ end
+ end
+
+ private
+
+ def create_next_batched_job!(active_migration)
+ next_batch_range = find_next_batch_range(active_migration)
+
+ return if next_batch_range.nil?
+
+ active_migration.create_batched_job!(next_batch_range.min, next_batch_range.max)
+ end
+
+ def find_next_batch_range(active_migration)
+ batching_strategy = active_migration.batch_class.new
+ batch_min_value = active_migration.next_min_value
+
+ next_batch_bounds = batching_strategy.next_batch(
+ active_migration.table_name,
+ active_migration.column_name,
+ batch_min_value: batch_min_value,
+ batch_size: active_migration.batch_size)
+
+ return if next_batch_bounds.nil?
+
+ clamped_batch_range(active_migration, next_batch_bounds)
+ end
+
+ def clamped_batch_range(active_migration, next_bounds)
+ min_value, max_value = next_bounds
+
+ return if min_value > active_migration.max_value
+
+ max_value = max_value.clamp(min_value, active_migration.max_value)
+
+ (min_value..max_value)
+ end
+
+ def finish_active_migration(active_migration)
+ active_migration.finished!
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 6b169a504f3..31e733050e1 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -87,9 +87,10 @@ module Gitlab
# See Rails' `create_table` for more info on the available arguments.
def create_table_with_constraints(table_name, **options, &block)
helper_context = self
- check_constraints = []
with_lock_retries do
+ check_constraints = []
+
create_table(table_name, **options) do |t|
t.define_singleton_method(:check_constraint) do |name, definition|
helper_context.send(:validate_check_constraint_name!, name) # rubocop:disable GitlabSecurity/PublicSend
@@ -1015,7 +1016,7 @@ module Gitlab
'CopyColumnUsingBackgroundMigrationJob',
interval,
batch_size: batch_size,
- other_job_arguments: [table, primary_key, column, tmp_column, sub_batch_size],
+ other_job_arguments: [table, primary_key, sub_batch_size, column, tmp_column],
track_jobs: true,
primary_column_name: primary_key
)
diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb
index 12dcf68da2f..e8cbea72887 100644
--- a/lib/gitlab/database/migrations/background_migration_helpers.rb
+++ b/lib/gitlab/database/migrations/background_migration_helpers.rb
@@ -4,8 +4,12 @@ module Gitlab
module Database
module Migrations
module BackgroundMigrationHelpers
- BACKGROUND_MIGRATION_BATCH_SIZE = 1_000 # Number of rows to process per job
- BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1_000 # Number of jobs to bulk queue at a time
+ BATCH_SIZE = 1_000 # Number of rows to process per job
+ SUB_BATCH_SIZE = 100 # Number of rows to process per sub-batch
+ JOB_BUFFER_SIZE = 1_000 # Number of jobs to bulk queue at a time
+ BATCH_CLASS_NAME = 'PrimaryKeyBatchingStrategy' # Default batch class for batched migrations
+ BATCH_MIN_VALUE = 1 # Default minimum value for batched migrations
+ BATCH_MIN_DELAY = 2.minutes.freeze # Minimum delay between batched migrations
# Bulk queues background migration jobs for an entire table, batched by ID range.
# "Bulk" meaning many jobs will be pushed at a time for efficiency.
@@ -31,7 +35,7 @@ module Gitlab
# # do something
# end
# end
- def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
+ def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BATCH_SIZE)
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
jobs = []
@@ -40,7 +44,7 @@ module Gitlab
model_class.each_batch(of: batch_size) do |relation|
start_id, end_id = relation.pluck("MIN(#{table_name}.id)", "MAX(#{table_name}.id)").first
- if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
+ if jobs.length >= JOB_BUFFER_SIZE
# Note: This code path generally only helps with many millions of rows
# We push multiple jobs at a time to reduce the time spent in
# Sidekiq/Redis operations. We're using this buffer based approach so we
@@ -89,7 +93,7 @@ module Gitlab
# # do something
# end
# end
- def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, other_job_arguments: [], initial_delay: 0, track_jobs: false, primary_column_name: :id)
+ def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BATCH_SIZE, other_job_arguments: [], initial_delay: 0, track_jobs: false, primary_column_name: :id)
raise "#{model_class} does not have an ID column of #{primary_column_name} to use for batch ranges" unless model_class.column_names.include?(primary_column_name.to_s)
raise "#{primary_column_name} is not an integer column" unless model_class.columns_hash[primary_column_name.to_s].type == :integer
@@ -127,6 +131,79 @@ module Gitlab
final_delay
end
+ # Creates a batched background migration for the given table. A batched migration runs one job
+ # at a time, computing the bounds of the next batch based on the current migration settings and the previous
+ # batch bounds. Each job's execution status is tracked in the database as the migration runs. The given job
+ # class must be present in the Gitlab::BackgroundMigration module, and the batch class (if specified) must be
+ # present in the Gitlab::BackgroundMigration::BatchingStrategies module.
+ #
+ # job_class_name - The background migration job class as a string
+ # batch_table_name - The name of the table the migration will batch over
+ # batch_column_name - The name of the column the migration will batch over
+ # job_arguments - Extra arguments to pass to the job instance when the migration runs
+ # job_interval - The pause interval between each job's execution, minimum of 2 minutes
+ # batch_min_value - The value in the column the batching will begin at
+ # batch_max_value - The value in the column the batching will end at, defaults to `SELECT MAX(batch_column)`
+ # batch_class_name - The name of the class that will be called to find the range of each next batch
+ # batch_size - The maximum number of rows per job
+ # sub_batch_size - The maximum number of rows processed per "iteration" within the job
+ #
+ #
+ # *Returns the created BatchedMigration record*
+ #
+ # Example:
+ #
+ # queue_batched_background_migration(
+ # 'CopyColumnUsingBackgroundMigrationJob',
+ # :events,
+ # :id,
+ # job_interval: 2.minutes,
+ # other_job_arguments: ['column1', 'column2'])
+ #
+ # Where the the background migration exists:
+ #
+ # class Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob
+ # def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, *other_args)
+ # # do something
+ # end
+ # end
+ def queue_batched_background_migration( # rubocop:disable Metrics/ParameterLists
+ job_class_name,
+ batch_table_name,
+ batch_column_name,
+ *job_arguments,
+ job_interval:,
+ batch_min_value: BATCH_MIN_VALUE,
+ batch_max_value: nil,
+ batch_class_name: BATCH_CLASS_NAME,
+ batch_size: BATCH_SIZE,
+ sub_batch_size: SUB_BATCH_SIZE
+ )
+
+ job_interval = BATCH_MIN_DELAY if job_interval < BATCH_MIN_DELAY
+
+ batch_max_value ||= connection.select_value(<<~SQL)
+ SELECT MAX(#{connection.quote_column_name(batch_column_name)})
+ FROM #{connection.quote_table_name(batch_table_name)}
+ SQL
+
+ migration_status = batch_max_value.nil? ? :finished : :active
+ batch_max_value ||= batch_min_value
+
+ Gitlab::Database::BackgroundMigration::BatchedMigration.create!(
+ job_class_name: job_class_name,
+ table_name: batch_table_name,
+ column_name: batch_column_name,
+ interval: job_interval,
+ min_value: batch_min_value,
+ max_value: batch_max_value,
+ batch_class_name: batch_class_name,
+ batch_size: batch_size,
+ sub_batch_size: sub_batch_size,
+ job_arguments: job_arguments,
+ status: migration_status)
+ end
+
def perform_background_migration_inline?
Rails.env.test? || Rails.env.development?
end
diff --git a/lib/gitlab/database/migrations/observation.rb b/lib/gitlab/database/migrations/observation.rb
index 518c2c560d2..046843824a4 100644
--- a/lib/gitlab/database/migrations/observation.rb
+++ b/lib/gitlab/database/migrations/observation.rb
@@ -7,7 +7,8 @@ module Gitlab
:migration,
:walltime,
:success,
- :total_database_size_change
+ :total_database_size_change,
+ :query_statistics
)
end
end
diff --git a/lib/gitlab/database/migrations/observers.rb b/lib/gitlab/database/migrations/observers.rb
index 4b931d3c19c..592993aeac5 100644
--- a/lib/gitlab/database/migrations/observers.rb
+++ b/lib/gitlab/database/migrations/observers.rb
@@ -6,7 +6,8 @@ module Gitlab
module Observers
def self.all_observers
[
- TotalDatabaseSizeChange.new
+ TotalDatabaseSizeChange.new,
+ QueryStatistics.new
]
end
end
diff --git a/lib/gitlab/database/migrations/observers/query_statistics.rb b/lib/gitlab/database/migrations/observers/query_statistics.rb
new file mode 100644
index 00000000000..466f4724256
--- /dev/null
+++ b/lib/gitlab/database/migrations/observers/query_statistics.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Migrations
+ module Observers
+ # This observer gathers statistics from the pg_stat_statements extension.
+ # Notice that this extension is not installed by default. In case it cannot
+ # be found, the observer does nothing and doesn't throw an error.
+ class QueryStatistics < MigrationObserver
+ include Gitlab::Database::SchemaHelpers
+
+ def before
+ return unless enabled?
+
+ connection.execute('select pg_stat_statements_reset()')
+ end
+
+ def record(observation)
+ return unless enabled?
+
+ observation.query_statistics = connection.execute(<<~SQL)
+ SELECT query, calls, total_time, max_time, mean_time, rows
+ FROM pg_stat_statements
+ ORDER BY total_time DESC
+ SQL
+ end
+
+ private
+
+ def enabled?
+ function_exists?(:pg_stat_statements_reset) && connection.view_exists?(:pg_stat_statements)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
index f4cf576dda7..1c289391e21 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
@@ -9,7 +9,7 @@ module Gitlab
include ::Gitlab::Database::MigrationHelpers
include ::Gitlab::Database::Migrations::BackgroundMigrationHelpers
- ALLOWED_TABLES = %w[audit_events].freeze
+ ALLOWED_TABLES = %w[audit_events web_hook_logs].freeze
ERROR_SCOPE = 'table partitioning'
MIGRATION_CLASS_NAME = "::#{module_parent_name}::BackfillPartitionedTable"
diff --git a/lib/gitlab/database/similarity_score.rb b/lib/gitlab/database/similarity_score.rb
index ff78fd0218c..40845c0d5e0 100644
--- a/lib/gitlab/database/similarity_score.rb
+++ b/lib/gitlab/database/similarity_score.rb
@@ -74,9 +74,14 @@ module Gitlab
end
# (SIMILARITY ...) + (SIMILARITY ...)
- expressions.inject(first_expression) do |expression1, expression2|
+ additions = expressions.inject(first_expression) do |expression1, expression2|
Arel::Nodes::Addition.new(expression1, expression2)
end
+
+ score_as_numeric = Arel::Nodes::NamedFunction.new('CAST', [Arel::Nodes::Grouping.new(additions).as('numeric')])
+
+ # Rounding the score to two decimals
+ Arel::Nodes::NamedFunction.new('ROUND', [score_as_numeric, 2])
end
def self.order_by_similarity?(arel_query)