diff options
Diffstat (limited to 'lib/gitlab/database/background_migration')
6 files changed, 202 insertions, 105 deletions
diff --git a/lib/gitlab/database/background_migration/batch_metrics.rb b/lib/gitlab/database/background_migration/batch_metrics.rb index 3e6d7ac3c9f..14fe0c14c24 100644 --- a/lib/gitlab/database/background_migration/batch_metrics.rb +++ b/lib/gitlab/database/background_migration/batch_metrics.rb @@ -5,17 +5,24 @@ module Gitlab module BackgroundMigration class BatchMetrics attr_reader :timings + attr_reader :affected_rows def initialize @timings = {} + @affected_rows = {} end - def time_operation(label) + def time_operation(label, &blk) + instrument_operation(label, instrument_affected_rows: false, &blk) + end + + def instrument_operation(label, instrument_affected_rows: true) start_time = monotonic_time - yield + count = yield timings_for_label(label) << monotonic_time - start_time + affected_rows_for_label(label) << count if instrument_affected_rows && count.is_a?(Integer) end private @@ -24,6 +31,10 @@ module Gitlab timings[label] ||= [] end + def affected_rows_for_label(label) + affected_rows[label] ||= [] + end + def monotonic_time Gitlab::Metrics::System.monotonic_time end diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb index f3160679d64..ebc3ee240bd 100644 --- a/lib/gitlab/database/background_migration/batched_job.rb +++ b/lib/gitlab/database/background_migration/batched_job.rb @@ -25,6 +25,7 @@ module Gitlab scope :except_succeeded, -> { without_status(:succeeded) } scope :successful_in_execution_order, -> { where.not(finished_at: nil).with_status(:succeeded).order(:finished_at) } scope :with_preloads, -> { preload(:batched_migration) } + scope :created_since, ->(date_time) { where('created_at >= ?', date_time) } state_machine :status, initial: :pending do state :pending, value: 0 @@ -62,7 +63,13 @@ module Gitlab job.split_and_retry! if job.can_split?(exception) rescue SplitAndRetryError => error - Gitlab::AppLogger.error(message: error.message, batched_job_id: job.id) + Gitlab::AppLogger.error( + message: error.message, + batched_job_id: job.id, + batched_migration_id: job.batched_migration.id, + job_class_name: job.migration_job_class_name, + job_arguments: job.migration_job_arguments + ) end after_transition do |job, transition| @@ -72,13 +79,23 @@ module Gitlab job.batched_job_transition_logs.create(previous_status: transition.from, next_status: transition.to, exception_class: exception&.class, exception_message: exception&.message) - Gitlab::ErrorTracking.track_exception(exception, batched_job_id: job.id) if exception - - Gitlab::AppLogger.info(message: 'BatchedJob transition', batched_job_id: job.id, previous_state: transition.from_name, new_state: transition.to_name) + Gitlab::ErrorTracking.track_exception(exception, batched_job_id: job.id, job_class_name: job.migration_job_class_name, job_arguments: job.migration_job_arguments) if exception + + Gitlab::AppLogger.info( + message: 'BatchedJob transition', + batched_job_id: job.id, + previous_state: transition.from_name, + new_state: transition.to_name, + batched_migration_id: job.batched_migration.id, + job_class_name: job.migration_job_class_name, + job_arguments: job.migration_job_arguments, + exception_class: exception&.class, + exception_message: exception&.message + ) end end - delegate :job_class, :table_name, :column_name, :job_arguments, + delegate :job_class, :table_name, :column_name, :job_arguments, :job_class_name, to: :batched_migration, prefix: :migration attribute :pause_ms, :integer, default: 100 diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb index 65c15795de6..d94bf060d05 100644 --- a/lib/gitlab/database/background_migration/batched_migration.rb +++ b/lib/gitlab/database/background_migration/batched_migration.rb @@ -6,6 +6,8 @@ module Gitlab class BatchedMigration < SharedModel JOB_CLASS_MODULE = 'Gitlab::BackgroundMigration' BATCH_CLASS_MODULE = "#{JOB_CLASS_MODULE}::BatchingStrategies" + MAXIMUM_FAILED_RATIO = 0.5 + MINIMUM_JOBS = 50 self.table_name = :batched_background_migrations @@ -21,28 +23,60 @@ module Gitlab validate :validate_batched_jobs_status, if: -> { status_changed? && finished? } scope :queue_order, -> { order(id: :asc) } - scope :queued, -> { where(status: [:active, :paused]) } + scope :queued, -> { with_statuses(:active, :paused) } + + # on_hold_until is a temporary runtime status which puts execution "on hold" + scope :executable, -> { with_status(:active).where('on_hold_until IS NULL OR on_hold_until < NOW()') } + scope :for_configuration, ->(job_class_name, table_name, column_name, job_arguments) do where(job_class_name: job_class_name, table_name: table_name, column_name: column_name) .where("job_arguments = ?", job_arguments.to_json) # rubocop:disable Rails/WhereEquals end - enum status: { - paused: 0, - active: 1, - finished: 3, - failed: 4, - finalizing: 5 - } + state_machine :status, initial: :paused do + state :paused, value: 0 + state :active, value: 1 + state :finished, value: 3 + state :failed, value: 4 + state :finalizing, value: 5 + + event :pause do + transition any => :paused + end + + event :execute do + transition any => :active + end + + event :finish do + transition any => :finished + end + + event :failure do + transition any => :failed + end + + event :finalize do + transition any => :finalizing + end + + before_transition any => :active do |migration| + migration.started_at = Time.current if migration.respond_to?(:started_at) + end + end attribute :pause_ms, :integer, default: 100 + def self.valid_status + state_machine.states.map(&:name) + end + def self.find_for_configuration(job_class_name, table_name, column_name, job_arguments) for_configuration(job_class_name, table_name, column_name, job_arguments).first end def self.active_migration - active.queue_order.first + executable.queue_order.first end def self.successful_rows_counts(migrations) @@ -74,11 +108,23 @@ module Gitlab batched_jobs.with_status(:failed).each_batch(of: 100) do |batch| self.class.transaction do batch.lock.each(&:split_and_retry!) - self.active! + self.execute! end end - self.active! + self.execute! + end + + def should_stop? + return unless started_at + + total_jobs = batched_jobs.created_since(started_at).count + + return if total_jobs < MINIMUM_JOBS + + failed_jobs = batched_jobs.with_status(:failed).created_since(started_at).count + + failed_jobs.fdiv(total_jobs) > MAXIMUM_FAILED_RATIO end def next_min_value @@ -136,6 +182,10 @@ module Gitlab BatchOptimizer.new(self).optimize! end + def hold!(until_time: 10.minutes.from_now) + update!(on_hold_until: until_time) + end + private def validate_batched_jobs_status diff --git a/lib/gitlab/database/background_migration/batched_migration_runner.rb b/lib/gitlab/database/background_migration/batched_migration_runner.rb index 06cd40f1e06..59ff9a9744f 100644 --- a/lib/gitlab/database/background_migration/batched_migration_runner.rb +++ b/lib/gitlab/database/background_migration/batched_migration_runner.rb @@ -6,13 +6,13 @@ module Gitlab class BatchedMigrationRunner FailedToFinalize = Class.new(RuntimeError) - def self.finalize(job_class_name, table_name, column_name, job_arguments, connection: ApplicationRecord.connection) + def self.finalize(job_class_name, table_name, column_name, job_arguments, connection:) new(connection: connection).finalize(job_class_name, table_name, column_name, job_arguments) end - def initialize(migration_wrapper = BatchedMigrationWrapper.new, connection: ApplicationRecord.connection) - @migration_wrapper = migration_wrapper + def initialize(connection:, migration_wrapper: BatchedMigrationWrapper.new(connection: connection)) @connection = connection + @migration_wrapper = migration_wrapper end # Runs the next batched_job for a batched_background_migration. @@ -30,6 +30,7 @@ module Gitlab migration_wrapper.perform(next_batched_job) active_migration.optimize! + active_migration.failure! if next_batched_job.failed? && active_migration.should_stop? else finish_active_migration(active_migration) end @@ -67,7 +68,7 @@ module Gitlab elsif migration.finished? Gitlab::AppLogger.warn "Batched background migration for the given configuration is already finished: #{configuration}" else - migration.finalizing! + migration.finalize! migration.batched_jobs.with_status(:pending).each { |job| migration_wrapper.perform(job) } run_migration_while(migration, :finalizing) @@ -78,7 +79,7 @@ module Gitlab private - attr_reader :migration_wrapper, :connection + attr_reader :connection, :migration_wrapper def find_or_create_next_batched_job(active_migration) if next_batch_range = find_next_batch_range(active_migration) @@ -118,14 +119,14 @@ module Gitlab return if active_migration.batched_jobs.active.exists? if active_migration.batched_jobs.with_status(:failed).exists? - active_migration.failed! + active_migration.failure! else - active_migration.finished! + active_migration.finish! end end def run_migration_while(migration, status) - while migration.status == status.to_s + while migration.status_name == status run_migration_job(migration) migration.reload_last_job diff --git a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb index 057f856d859..ec68f401ca2 100644 --- a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb +++ b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb @@ -4,10 +4,9 @@ module Gitlab module Database module BackgroundMigration class BatchedMigrationWrapper - extend Gitlab::Utils::StrongMemoize - - def initialize(connection: ApplicationRecord.connection) + def initialize(connection:, metrics: PrometheusMetrics.new) @connection = connection + @metrics = metrics end # Wraps the execution of a batched_background_migration. @@ -28,12 +27,12 @@ module Gitlab raise ensure - track_prometheus_metrics(batch_tracking_record) + metrics.track(batch_tracking_record) end private - attr_reader :connection + attr_reader :connection, :metrics def start_tracking_execution(tracking_record) tracking_record.run! @@ -63,80 +62,6 @@ module Gitlab job_class.new end end - - def track_prometheus_metrics(tracking_record) - migration = tracking_record.batched_migration - base_labels = migration.prometheus_labels - - metric_for(:gauge_batch_size).set(base_labels, tracking_record.batch_size) - metric_for(:gauge_sub_batch_size).set(base_labels, tracking_record.sub_batch_size) - metric_for(:gauge_interval).set(base_labels, tracking_record.batched_migration.interval) - metric_for(:gauge_job_duration).set(base_labels, (tracking_record.finished_at - tracking_record.started_at).to_i) - metric_for(:counter_updated_tuples).increment(base_labels, tracking_record.batch_size) - metric_for(:gauge_migrated_tuples).set(base_labels, tracking_record.batched_migration.migrated_tuple_count) - metric_for(:gauge_total_tuple_count).set(base_labels, tracking_record.batched_migration.total_tuple_count) - metric_for(:gauge_last_update_time).set(base_labels, Time.current.to_i) - - if metrics = tracking_record.metrics - metrics['timings']&.each do |key, timings| - summary = metric_for(:histogram_timings) - labels = base_labels.merge(operation: key) - - timings.each do |timing| - summary.observe(labels, timing) - end - end - end - end - - def metric_for(name) - self.class.metrics[name] - end - - def self.metrics - strong_memoize(:metrics) do - { - gauge_batch_size: Gitlab::Metrics.gauge( - :batched_migration_job_batch_size, - 'Batch size for a batched migration job' - ), - gauge_sub_batch_size: Gitlab::Metrics.gauge( - :batched_migration_job_sub_batch_size, - 'Sub-batch size for a batched migration job' - ), - gauge_interval: Gitlab::Metrics.gauge( - :batched_migration_job_interval_seconds, - 'Interval for a batched migration job' - ), - gauge_job_duration: Gitlab::Metrics.gauge( - :batched_migration_job_duration_seconds, - 'Duration for a batched migration job' - ), - counter_updated_tuples: Gitlab::Metrics.counter( - :batched_migration_job_updated_tuples_total, - 'Number of tuples updated by batched migration job' - ), - gauge_migrated_tuples: Gitlab::Metrics.gauge( - :batched_migration_migrated_tuples_total, - 'Total number of tuples migrated by a batched migration' - ), - histogram_timings: Gitlab::Metrics.histogram( - :batched_migration_job_query_duration_seconds, - 'Query timings for a batched migration job', - {}, - [0.1, 0.25, 0.5, 1, 5].freeze - ), - gauge_total_tuple_count: Gitlab::Metrics.gauge( - :batched_migration_total_tuple_count, - 'Total tuple count the migration needs to touch' - ), - gauge_last_update_time: Gitlab::Metrics.gauge( - :batched_migration_last_update_time_seconds, - 'Unix epoch time in seconds' - ) - } - end - end end end end diff --git a/lib/gitlab/database/background_migration/prometheus_metrics.rb b/lib/gitlab/database/background_migration/prometheus_metrics.rb new file mode 100644 index 00000000000..ce1da4c59eb --- /dev/null +++ b/lib/gitlab/database/background_migration/prometheus_metrics.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module BackgroundMigration + class PrometheusMetrics + extend Gitlab::Utils::StrongMemoize + + QUERY_TIMING_BUCKETS = [0.1, 0.25, 0.5, 1, 5].freeze + + def track(job_record) + migration_record = job_record.batched_migration + base_labels = migration_record.prometheus_labels + + metric_for(:gauge_batch_size).set(base_labels, job_record.batch_size) + metric_for(:gauge_sub_batch_size).set(base_labels, job_record.sub_batch_size) + metric_for(:gauge_interval).set(base_labels, job_record.batched_migration.interval) + metric_for(:gauge_job_duration).set(base_labels, (job_record.finished_at - job_record.started_at).to_i) + metric_for(:counter_updated_tuples).increment(base_labels, job_record.batch_size) + metric_for(:gauge_migrated_tuples).set(base_labels, migration_record.migrated_tuple_count) + metric_for(:gauge_total_tuple_count).set(base_labels, migration_record.total_tuple_count) + metric_for(:gauge_last_update_time).set(base_labels, Time.current.to_i) + + track_timing_metrics(base_labels, job_record.metrics) + end + + def self.metrics + strong_memoize(:metrics) do + { + gauge_batch_size: Gitlab::Metrics.gauge( + :batched_migration_job_batch_size, + 'Batch size for a batched migration job' + ), + gauge_sub_batch_size: Gitlab::Metrics.gauge( + :batched_migration_job_sub_batch_size, + 'Sub-batch size for a batched migration job' + ), + gauge_interval: Gitlab::Metrics.gauge( + :batched_migration_job_interval_seconds, + 'Interval for a batched migration job' + ), + gauge_job_duration: Gitlab::Metrics.gauge( + :batched_migration_job_duration_seconds, + 'Duration for a batched migration job' + ), + counter_updated_tuples: Gitlab::Metrics.counter( + :batched_migration_job_updated_tuples_total, + 'Number of tuples updated by batched migration job' + ), + gauge_migrated_tuples: Gitlab::Metrics.gauge( + :batched_migration_migrated_tuples_total, + 'Total number of tuples migrated by a batched migration' + ), + histogram_timings: Gitlab::Metrics.histogram( + :batched_migration_job_query_duration_seconds, + 'Query timings for a batched migration job', + {}, + QUERY_TIMING_BUCKETS + ), + gauge_total_tuple_count: Gitlab::Metrics.gauge( + :batched_migration_total_tuple_count, + 'Total tuple count the migration needs to touch' + ), + gauge_last_update_time: Gitlab::Metrics.gauge( + :batched_migration_last_update_time_seconds, + 'Unix epoch time in seconds' + ) + } + end + end + + private + + def track_timing_metrics(base_labels, metrics) + return unless metrics && metrics['timings'] + + metrics['timings'].each do |key, timings| + summary = metric_for(:histogram_timings) + labels = base_labels.merge(operation: key) + + timings.each do |timing| + summary.observe(labels, timing) + end + end + end + + def metric_for(name) + self.class.metrics[name] + end + end + end + end +end |