diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-07-20 12:55:51 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-07-20 12:55:51 +0300 |
commit | e8d2c2579383897a1dd7f9debd359abe8ae8373d (patch) | |
tree | c42be41678c2586d49a75cabce89322082698334 /lib/gitlab/database | |
parent | fc845b37ec3a90aaa719975f607740c22ba6a113 (diff) |
Add latest changes from gitlab-org/gitlab@14-1-stable-eev14.1.0-rc42
Diffstat (limited to 'lib/gitlab/database')
39 files changed, 722 insertions, 694 deletions
diff --git a/lib/gitlab/database/as_with_materialized.rb b/lib/gitlab/database/as_with_materialized.rb index e7e3c1766a9..eda991efbd5 100644 --- a/lib/gitlab/database/as_with_materialized.rb +++ b/lib/gitlab/database/as_with_materialized.rb @@ -24,6 +24,8 @@ module Gitlab end # Note: to be deleted after the minimum PG version is set to 12.0 + # Update the documentation together when deleting the method + # https://docs.gitlab.com/ee/development/merge_request_performance_guidelines.html#use-ctes-wisely def self.materialized_if_supported materialized_supported? ? 'MATERIALIZED' : '' end diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb index 9a1dc4ee17d..03bd02d7554 100644 --- a/lib/gitlab/database/background_migration/batched_job.rb +++ b/lib/gitlab/database/background_migration/batched_job.rb @@ -44,6 +44,51 @@ module Gitlab # TODO: Switch to individual job interval (prereq: https://gitlab.com/gitlab-org/gitlab/-/issues/328801) duration.to_f / batched_migration.interval end + + def split_and_retry! + with_lock do + raise 'Only failed jobs can be split' unless failed? + + new_batch_size = batch_size / 2 + + raise 'Job cannot be split further' if new_batch_size < 1 + + batching_strategy = batched_migration.batch_class.new + next_batch_bounds = batching_strategy.next_batch( + batched_migration.table_name, + batched_migration.column_name, + batch_min_value: min_value, + batch_size: new_batch_size + ) + midpoint = next_batch_bounds.last + + # We don't want the midpoint to go over the existing max_value because + # those IDs would already be in the next batched migration job. + # This could happen when a lot of records in the current batch are deleted. + # + # In this case, we just lower the batch size so that future calls to this + # method could eventually split the job if it continues to fail. + if midpoint >= max_value + update!(batch_size: new_batch_size, attempts: 0) + else + old_max_value = max_value + + update!( + batch_size: new_batch_size, + max_value: midpoint, + attempts: 0, + started_at: nil, + finished_at: nil, + metrics: {} + ) + + new_record = dup + new_record.min_value = midpoint.next + new_record.max_value = old_max_value + new_record.save! + end + end + end end end end diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb index 36e89023c86..9d66824da51 100644 --- a/lib/gitlab/database/background_migration/batched_migration.rb +++ b/lib/gitlab/database/background_migration/batched_migration.rb @@ -10,7 +10,7 @@ module Gitlab self.table_name = :batched_background_migrations has_many :batched_jobs, foreign_key: :batched_background_migration_id - has_one :last_job, -> { order(id: :desc) }, + has_one :last_job, -> { order(max_value: :desc) }, class_name: 'Gitlab::Database::BackgroundMigration::BatchedJob', foreign_key: :batched_background_migration_id @@ -29,11 +29,16 @@ module Gitlab paused: 0, active: 1, finished: 3, - failed: 4 + failed: 4, + finalizing: 5 } attribute :pause_ms, :integer, default: 100 + def self.find_for_configuration(job_class_name, table_name, column_name, job_arguments) + for_configuration(job_class_name, table_name, column_name, job_arguments).first + end + def self.active_migration active.queue_order.first end diff --git a/lib/gitlab/database/background_migration/batched_migration_runner.rb b/lib/gitlab/database/background_migration/batched_migration_runner.rb index 67fe6c536e6..14e3919986e 100644 --- a/lib/gitlab/database/background_migration/batched_migration_runner.rb +++ b/lib/gitlab/database/background_migration/batched_migration_runner.rb @@ -4,6 +4,12 @@ module Gitlab module Database module BackgroundMigration class BatchedMigrationRunner + FailedToFinalize = Class.new(RuntimeError) + + def self.finalize(job_class_name, table_name, column_name, job_arguments) + new.finalize(job_class_name, table_name, column_name, job_arguments) + end + def initialize(migration_wrapper = BatchedMigrationWrapper.new) @migration_wrapper = migration_wrapper end @@ -37,10 +43,35 @@ module Gitlab raise 'this method is not intended for use in real environments' end - while migration.active? - run_migration_job(migration) + run_migration_while(migration, :active) + end - migration.reload_last_job + # Finalize migration for given configuration. + # + # If the migration is already finished, do nothing. Otherwise change its status to `finalizing` + # in order to prevent it being picked up by the background worker. Perform all pending jobs, + # then keep running until migration is finished. + def finalize(job_class_name, table_name, column_name, job_arguments) + migration = BatchedMigration.find_for_configuration(job_class_name, table_name, column_name, job_arguments) + + configuration = { + job_class_name: job_class_name, + table_name: table_name, + column_name: column_name, + job_arguments: job_arguments + } + + if migration.nil? + Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}" + elsif migration.finished? + Gitlab::AppLogger.warn "Batched background migration for the given configuration is already finished: #{configuration}" + else + migration.finalizing! + migration.batched_jobs.pending.each { |job| migration_wrapper.perform(job) } + + run_migration_while(migration, :finalizing) + + raise FailedToFinalize unless migration.finished? end end @@ -90,6 +121,14 @@ module Gitlab active_migration.finished! end end + + def run_migration_while(migration, status) + while migration.status == status.to_s + run_migration_job(migration) + + migration.reload_last_job + end + end end end end diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb index 9002d39e1ee..49f56b5be97 100644 --- a/lib/gitlab/database/batch_count.rb +++ b/lib/gitlab/database/batch_count.rb @@ -18,7 +18,7 @@ # batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id) # batch_count(Namespace.group(:type)) # batch_distinct_count(::Project, :creator_id) -# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id)) +# batch_distinct_count(::Project.aimed_for_deletion.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id)) # batch_distinct_count(Project.group(:visibility_level), :creator_id) # batch_sum(User, :sign_in_count) # batch_sum(Issue.group(:state_id), :weight)) @@ -41,159 +41,5 @@ module Gitlab include BatchCount end end - - class BatchCounter - FALLBACK = -1 - MIN_REQUIRED_BATCH_SIZE = 1_250 - DEFAULT_SUM_BATCH_SIZE = 1_000 - MAX_ALLOWED_LOOPS = 10_000 - SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep - ALLOWED_MODES = [:itself, :distinct].freeze - FALLBACK_FINISH = 0 - OFFSET_BY_ONE = 1 - - # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 - DEFAULT_DISTINCT_BATCH_SIZE = 10_000 - DEFAULT_BATCH_SIZE = 100_000 - - def initialize(relation, column: nil, operation: :count, operation_args: nil) - @relation = relation - @column = column || relation.primary_key - @operation = operation - @operation_args = operation_args - end - - def unwanted_configuration?(finish, batch_size, start) - (@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) || - (@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) || - (finish - start) / batch_size >= MAX_ALLOWED_LOOPS || - start >= finish - end - - def count(batch_size: nil, mode: :itself, start: nil, finish: nil) - raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? - - check_mode!(mode) - - # non-distinct have better performance - batch_size ||= batch_size_for_mode_and_operation(mode, @operation) - - start = actual_start(start) - finish = actual_finish(finish) - - raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0 - return FALLBACK if unwanted_configuration?(finish, batch_size, start) - - results = nil - batch_start = start - - while batch_start < finish - begin - batch_end = [batch_start + batch_size, finish].min - batch_relation = build_relation_batch(batch_start, batch_end, mode) - - op_args = @operation_args - if @operation == :count && @operation_args.blank? && use_loose_index_scan_for_distinct_values?(mode) - op_args = [Gitlab::Database::LooseIndexScanDistinctCount::COLUMN_ALIAS] - end - - results = merge_results(results, batch_relation.send(@operation, *op_args)) # rubocop:disable GitlabSecurity/PublicSend - batch_start = batch_end - rescue ActiveRecord::QueryCanceled => error - # retry with a safe batch size & warmer cache - if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE - batch_size /= 2 - else - log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error) - return FALLBACK - end - rescue Gitlab::Database::LooseIndexScanDistinctCount::ColumnConfigurationError => error - Gitlab::AppJsonLogger - .error( - event: 'batch_count', - relation: @relation.table_name, - operation: @operation, - operation_args: @operation_args, - mode: mode, - message: "LooseIndexScanDistinctCount column error: #{error.message}" - ) - - return FALLBACK - end - - sleep(SLEEP_TIME_IN_SECONDS) - end - - results - end - - def merge_results(results, object) - return object unless results - - if object.is_a?(Hash) - results.merge!(object) { |_, a, b| a + b } - else - results + object - end - end - - private - - def build_relation_batch(start, finish, mode) - if use_loose_index_scan_for_distinct_values?(mode) - Gitlab::Database::LooseIndexScanDistinctCount.new(@relation, @column).build_query(from: start, to: finish) - else - @relation.select(@column).public_send(mode).where(between_condition(start, finish)) # rubocop:disable GitlabSecurity/PublicSend - end - end - - def batch_size_for_mode_and_operation(mode, operation) - return DEFAULT_SUM_BATCH_SIZE if operation == :sum - - mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE - end - - def between_condition(start, finish) - return @column.between(start...finish) if @column.is_a?(Arel::Attributes::Attribute) - - { @column => start...finish } - end - - def actual_start(start) - start || @relation.unscope(:group, :having).minimum(@column) || 0 - end - - def actual_finish(finish) - (finish || @relation.unscope(:group, :having).maximum(@column) || FALLBACK_FINISH) + OFFSET_BY_ONE - end - - def check_mode!(mode) - raise "The mode #{mode.inspect} is not supported" unless ALLOWED_MODES.include?(mode) - raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct - raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct - end - - def log_canceled_batch_fetch(batch_start, mode, query, error) - Gitlab::AppJsonLogger - .error( - event: 'batch_count', - relation: @relation.table_name, - operation: @operation, - operation_args: @operation_args, - start: batch_start, - mode: mode, - query: query, - message: "Query has been canceled with message: #{error.message}" - ) - end - - def use_loose_index_scan_for_distinct_values?(mode) - Feature.enabled?(:loose_index_scan_for_distinct_values) && not_group_by_query? && mode == :distinct - end - - def not_group_by_query? - !@relation.is_a?(ActiveRecord::Relation) || @relation.group_values.blank? - end - end end end diff --git a/lib/gitlab/database/batch_counter.rb b/lib/gitlab/database/batch_counter.rb new file mode 100644 index 00000000000..5f2e404c9da --- /dev/null +++ b/lib/gitlab/database/batch_counter.rb @@ -0,0 +1,159 @@ +# frozen_string_literal: true + +module Gitlab + module Database + class BatchCounter + FALLBACK = -1 + MIN_REQUIRED_BATCH_SIZE = 1_250 + DEFAULT_SUM_BATCH_SIZE = 1_000 + MAX_ALLOWED_LOOPS = 10_000 + SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep + ALLOWED_MODES = [:itself, :distinct].freeze + FALLBACK_FINISH = 0 + OFFSET_BY_ONE = 1 + + # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 + DEFAULT_DISTINCT_BATCH_SIZE = 10_000 + DEFAULT_BATCH_SIZE = 100_000 + + def initialize(relation, column: nil, operation: :count, operation_args: nil) + @relation = relation + @column = column || relation.primary_key + @operation = operation + @operation_args = operation_args + end + + def unwanted_configuration?(finish, batch_size, start) + (@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) || + (@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) || + (finish - start) / batch_size >= MAX_ALLOWED_LOOPS || + start >= finish + end + + def count(batch_size: nil, mode: :itself, start: nil, finish: nil) + raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? + + check_mode!(mode) + + # non-distinct have better performance + batch_size ||= batch_size_for_mode_and_operation(mode, @operation) + + start = actual_start(start) + finish = actual_finish(finish) + + raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0 + return FALLBACK if unwanted_configuration?(finish, batch_size, start) + + results = nil + batch_start = start + + while batch_start < finish + begin + batch_end = [batch_start + batch_size, finish].min + batch_relation = build_relation_batch(batch_start, batch_end, mode) + + op_args = @operation_args + if @operation == :count && @operation_args.blank? && use_loose_index_scan_for_distinct_values?(mode) + op_args = [Gitlab::Database::LooseIndexScanDistinctCount::COLUMN_ALIAS] + end + + results = merge_results(results, batch_relation.send(@operation, *op_args)) # rubocop:disable GitlabSecurity/PublicSend + batch_start = batch_end + rescue ActiveRecord::QueryCanceled => error + # retry with a safe batch size & warmer cache + if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE + batch_size /= 2 + else + log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error) + return FALLBACK + end + rescue Gitlab::Database::LooseIndexScanDistinctCount::ColumnConfigurationError => error + Gitlab::AppJsonLogger + .error( + event: 'batch_count', + relation: @relation.table_name, + operation: @operation, + operation_args: @operation_args, + mode: mode, + message: "LooseIndexScanDistinctCount column error: #{error.message}" + ) + + return FALLBACK + end + + sleep(SLEEP_TIME_IN_SECONDS) + end + + results + end + + def merge_results(results, object) + return object unless results + + if object.is_a?(Hash) + results.merge!(object) { |_, a, b| a + b } + else + results + object + end + end + + private + + def build_relation_batch(start, finish, mode) + if use_loose_index_scan_for_distinct_values?(mode) + Gitlab::Database::LooseIndexScanDistinctCount.new(@relation, @column).build_query(from: start, to: finish) + else + @relation.select(@column).public_send(mode).where(between_condition(start, finish)) # rubocop:disable GitlabSecurity/PublicSend + end + end + + def batch_size_for_mode_and_operation(mode, operation) + return DEFAULT_SUM_BATCH_SIZE if operation == :sum + + mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE + end + + def between_condition(start, finish) + return @column.between(start...finish) if @column.is_a?(Arel::Attributes::Attribute) + + { @column => start...finish } + end + + def actual_start(start) + start || @relation.unscope(:group, :having).minimum(@column) || 0 + end + + def actual_finish(finish) + (finish || @relation.unscope(:group, :having).maximum(@column) || FALLBACK_FINISH) + OFFSET_BY_ONE + end + + def check_mode!(mode) + raise "The mode #{mode.inspect} is not supported" unless ALLOWED_MODES.include?(mode) + raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct + raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct + end + + def log_canceled_batch_fetch(batch_start, mode, query, error) + Gitlab::AppJsonLogger + .error( + event: 'batch_count', + relation: @relation.table_name, + operation: @operation, + operation_args: @operation_args, + start: batch_start, + mode: mode, + query: query, + message: "Query has been canceled with message: #{error.message}" + ) + end + + def use_loose_index_scan_for_distinct_values?(mode) + Feature.enabled?(:loose_index_scan_for_distinct_values) && not_group_by_query? && mode == :distinct + end + + def not_group_by_query? + !@relation.is_a?(ActiveRecord::Relation) || @relation.group_values.blank? + end + end + end +end diff --git a/lib/gitlab/database/custom_structure.rb b/lib/gitlab/database/custom_structure.rb deleted file mode 100644 index e4404e73a63..00000000000 --- a/lib/gitlab/database/custom_structure.rb +++ /dev/null @@ -1,43 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - class CustomStructure - CUSTOM_DUMP_FILE = 'db/gitlab_structure.sql' - - def dump - File.open(self.class.custom_dump_filepath, 'wb') do |io| - io << "-- this file tracks custom GitLab data, such as foreign keys referencing partitioned tables\n" - io << "-- more details can be found in the issue: https://gitlab.com/gitlab-org/gitlab/-/issues/201872\n\n" - - dump_partitioned_foreign_keys(io) if partitioned_foreign_keys_exist? - end - end - - def self.custom_dump_filepath - Rails.root.join(CUSTOM_DUMP_FILE) - end - - private - - def dump_partitioned_foreign_keys(io) - io << "COPY partitioned_foreign_keys (#{partitioned_fk_columns.join(", ")}) FROM STDIN;\n" - - PartitioningMigrationHelpers::PartitionedForeignKey.find_each do |fk| - io << fk.attributes.values_at(*partitioned_fk_columns).join("\t") << "\n" - end - io << "\\.\n" - end - - def partitioned_foreign_keys_exist? - return false unless PartitioningMigrationHelpers::PartitionedForeignKey.table_exists? - - PartitioningMigrationHelpers::PartitionedForeignKey.exists? - end - - def partitioned_fk_columns - @partitioned_fk_columns ||= PartitioningMigrationHelpers::PartitionedForeignKey.column_names - end - end - end -end diff --git a/lib/gitlab/database/dynamic_model_helpers.rb b/lib/gitlab/database/dynamic_model_helpers.rb index 7439591be99..220062f1bc6 100644 --- a/lib/gitlab/database/dynamic_model_helpers.rb +++ b/lib/gitlab/database/dynamic_model_helpers.rb @@ -3,6 +3,8 @@ module Gitlab module Database module DynamicModelHelpers + BATCH_SIZE = 1_000 + def define_batchable_model(table_name) Class.new(ActiveRecord::Base) do include EachBatch @@ -12,7 +14,7 @@ module Gitlab end end - def each_batch(table_name, scope: ->(table) { table.all }, of: 1000) + def each_batch(table_name, scope: ->(table) { table.all }, of: BATCH_SIZE) if transaction_open? raise <<~MSG.squish each_batch should not run inside a transaction, you can disable @@ -25,7 +27,7 @@ module Gitlab .each_batch(of: of) { |batch| yield batch } end - def each_batch_range(table_name, scope: ->(table) { table.all }, of: 1000) + def each_batch_range(table_name, scope: ->(table) { table.all }, of: BATCH_SIZE) each_batch(table_name, scope: scope, of: of) do |batch| yield batch.pluck('MIN(id), MAX(id)').first end diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb index 88743cd2e75..31d41a6d6c0 100644 --- a/lib/gitlab/database/load_balancing.rb +++ b/lib/gitlab/database/load_balancing.rb @@ -85,7 +85,6 @@ module Gitlab # Returns true if load balancing is to be enabled. def self.enable? return false if Gitlab::Runtime.rake? - return false if Gitlab::Runtime.sidekiq? && !Gitlab::Utils.to_boolean(ENV['ENABLE_LOAD_BALANCING_FOR_SIDEKIQ'], default: false) return false unless self.configured? true diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb index a833bb8491f..a5d67ebc050 100644 --- a/lib/gitlab/database/load_balancing/load_balancer.rb +++ b/lib/gitlab/database/load_balancing/load_balancer.rb @@ -147,15 +147,15 @@ module Gitlab raise 'Failed to determine the write location of the primary database' end - # Returns true if all hosts have caught up to the given transaction - # write location. - def all_caught_up?(location) - @host_list.hosts.all? { |host| host.caught_up?(location) } - end - # Returns true if there was at least one host that has caught up with the given transaction. # # In case of a retry, this method also stores the set of hosts that have caught up. + # + # UPD: `select_caught_up_hosts` seems to have redundant logic managing host list (`:gitlab_load_balancer_valid_hosts`), + # while we only need a single host: https://gitlab.com/gitlab-org/gitlab/-/issues/326125#note_615271604 + # Also, shuffling the list afterwards doesn't seem to be necessary. + # This may be improved by merging this method with `select_up_to_date_host`. + # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out def select_caught_up_hosts(location) all_hosts = @host_list.hosts valid_hosts = all_hosts.select { |host| host.caught_up?(location) } @@ -179,6 +179,8 @@ module Gitlab # Returns true if there was at least one host that has caught up with the given transaction. # Similar to `#select_caught_up_hosts`, picks a random host, to rotate replicas we use. # Unlike `#select_caught_up_hosts`, does not iterate over all hosts if finds any. + # + # It is going to be merged with `select_caught_up_hosts`, because they intend to do the same. def select_up_to_date_host(location) all_hosts = @host_list.hosts.shuffle host = all_hosts.find { |host| host.caught_up?(location) } @@ -190,6 +192,7 @@ module Gitlab true end + # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out def set_consistent_hosts_for_request(hosts) RequestStore[VALID_HOSTS_CACHE_KEY] = hosts end diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb index 4734ff99bd3..8e7e6865402 100644 --- a/lib/gitlab/database/load_balancing/rack_middleware.rb +++ b/lib/gitlab/database/load_balancing/rack_middleware.rb @@ -39,6 +39,8 @@ module Gitlab result = @app.call(env) + ActiveSupport::Notifications.instrument('web_transaction_completed.load_balancing') + stick_if_necessary(env) result diff --git a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb index 524d69c00c0..0e36ebbc3ee 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb @@ -5,27 +5,29 @@ module Gitlab module LoadBalancing class SidekiqClientMiddleware def call(worker_class, job, _queue, _redis_pool) + # Mailers can't be constantized worker_class = worker_class.to_s.safe_constantize - mark_data_consistency_location(worker_class, job) + if load_balancing_enabled?(worker_class) + job['worker_data_consistency'] = worker_class.get_data_consistency + set_data_consistency_location!(job) unless location_already_provided?(job) + else + job['worker_data_consistency'] = ::WorkerAttributes::DEFAULT_DATA_CONSISTENCY + end yield end private - def mark_data_consistency_location(worker_class, job) - # Mailers can't be constantized - return unless worker_class - return unless worker_class.include?(::ApplicationWorker) - return unless worker_class.get_data_consistency_feature_flag_enabled? - - return if location_already_provided?(job) - - job['worker_data_consistency'] = worker_class.get_data_consistency - - return unless worker_class.utilizes_load_balancing_capabilities? + def load_balancing_enabled?(worker_class) + worker_class && + worker_class.include?(::ApplicationWorker) && + worker_class.utilizes_load_balancing_capabilities? && + worker_class.get_data_consistency_feature_flag_enabled? + end + def set_data_consistency_location!(job) if Session.current.use_primary? job['database_write_location'] = load_balancer.primary_write_location else diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb index 9bd0adf8dbd..0551750568a 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb @@ -7,8 +7,18 @@ module Gitlab JobReplicaNotUpToDate = Class.new(StandardError) def call(worker, job, _queue) - if requires_primary?(worker.class, job) + worker_class = worker.class + strategy = select_load_balancing_strategy(worker_class, job) + + job['load_balancing_strategy'] = strategy.to_s + + if use_primary?(strategy) Session.current.use_primary! + elsif strategy == :retry + raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\ + " Replica was not up to date." + else + # this means we selected an up-to-date replica, but there is nothing to do in this case. end yield @@ -23,31 +33,42 @@ module Gitlab Session.clear_session end - def requires_primary?(worker_class, job) - return true unless worker_class.include?(::ApplicationWorker) - return true unless worker_class.utilizes_load_balancing_capabilities? - return true unless worker_class.get_data_consistency_feature_flag_enabled? - - location = job['database_write_location'] || job['database_replica_location'] + def use_primary?(strategy) + strategy.start_with?('primary') + end - return true unless location + def select_load_balancing_strategy(worker_class, job) + return :primary unless load_balancing_available?(worker_class) - job_data_consistency = worker_class.get_data_consistency - job[:data_consistency] = job_data_consistency.to_s + location = job['database_write_location'] || job['database_replica_location'] + return :primary_no_wal unless location if replica_caught_up?(location) - job[:database_chosen] = 'replica' - false - elsif job_data_consistency == :delayed && not_yet_retried?(job) - job[:database_chosen] = 'retry' - raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\ - " Replica was not up to date." + # Happy case: we can read from a replica. + retried_before?(worker_class, job) ? :replica_retried : :replica + elsif can_retry?(worker_class, job) + # Optimistic case: The worker allows retries and we have retries left. + :retry else - job[:database_chosen] = 'primary' - true + # Sad case: we need to fall back to the primary. + :primary end end + def load_balancing_available?(worker_class) + worker_class.include?(::ApplicationWorker) && + worker_class.utilizes_load_balancing_capabilities? && + worker_class.get_data_consistency_feature_flag_enabled? + end + + def can_retry?(worker_class, job) + worker_class.get_data_consistency == :delayed && not_yet_retried?(job) + end + + def retried_before?(worker_class, job) + worker_class.get_data_consistency == :delayed && !not_yet_retried?(job) + end + def not_yet_retried?(job) # if `retry_count` is `nil` it indicates that this job was never retried # the `0` indicates that this is a first retry @@ -59,11 +80,7 @@ module Gitlab end def replica_caught_up?(location) - if Feature.enabled?(:sidekiq_load_balancing_rotate_up_to_date_replica) - load_balancer.select_up_to_date_host(location) - else - load_balancer.host.caught_up?(location) - end + load_balancer.select_up_to_date_host(location) end end end diff --git a/lib/gitlab/database/load_balancing/srv_resolver.rb b/lib/gitlab/database/load_balancing/srv_resolver.rb index 20da525f4d2..1f599ef4a27 100644 --- a/lib/gitlab/database/load_balancing/srv_resolver.rb +++ b/lib/gitlab/database/load_balancing/srv_resolver.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'net/dns' + module Gitlab module Database module LoadBalancing diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb index efbd7099300..8e1aa079216 100644 --- a/lib/gitlab/database/load_balancing/sticking.rb +++ b/lib/gitlab/database/load_balancing/sticking.rb @@ -33,8 +33,10 @@ module Gitlab return true unless location - load_balancer.all_caught_up?(location).tap do |caught_up| - unstick(namespace, id) if caught_up + load_balancer.select_up_to_date_host(location).tap do |found| + ActiveSupport::Notifications.instrument('caught_up_replica_pick.load_balancing', { result: found } ) + + unstick(namespace, id) if found end end @@ -51,8 +53,14 @@ module Gitlab # write location. If no such location exists, err on the side of caution. return false unless location - load_balancer.select_caught_up_hosts(location).tap do |selected| - unstick(namespace, id) if selected + if ::Feature.enabled?(:load_balancing_refine_load_balancer_methods) + load_balancer.select_up_to_date_host(location).tap do |selected| + unstick(namespace, id) if selected + end + else + load_balancer.select_caught_up_hosts(location).tap do |selected| + unstick(namespace, id) if selected + end end end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index d155abefdc8..842ab4f7b80 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -217,11 +217,12 @@ module Gitlab # source - The source table containing the foreign key. # target - The target table the key points to. # column - The name of the column to create the foreign key on. + # target_column - The name of the referenced column, defaults to "id". # on_delete - The action to perform when associated data is removed, # defaults to "CASCADE". # name - The name of the foreign key. # - def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, name: nil, validate: true) + def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_column: :id, name: nil, validate: true) # Transactions would result in ALTER TABLE locks being held for the # duration of the transaction, defeating the purpose of this method. if transaction_open? @@ -231,7 +232,8 @@ module Gitlab options = { column: column, on_delete: on_delete, - name: name.presence || concurrent_foreign_key_name(source, column) + name: name.presence || concurrent_foreign_key_name(source, column), + primary_key: target_column } if foreign_key_exists?(source, target, **options) @@ -252,7 +254,7 @@ module Gitlab ALTER TABLE #{source} ADD CONSTRAINT #{options[:name]} FOREIGN KEY (#{options[:column]}) - REFERENCES #{target} (id) + REFERENCES #{target} (#{target_column}) #{on_delete_statement(options[:on_delete])} NOT VALID; EOF @@ -389,12 +391,14 @@ module Gitlab # * +logger+ - [Gitlab::JsonLogger] # * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES` def with_lock_retries(*args, **kwargs, &block) + raise_on_exhaustion = !!kwargs.delete(:raise_on_exhaustion) merged_args = { klass: self.class, logger: Gitlab::BackgroundMigration::Logger }.merge(kwargs) - Gitlab::Database::WithLockRetries.new(**merged_args).run(&block) + Gitlab::Database::WithLockRetries.new(**merged_args) + .run(raise_on_exhaustion: raise_on_exhaustion, &block) end def true_value @@ -1106,7 +1110,16 @@ module Gitlab Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}" elsif !migration.finished? raise "Expected batched background migration for the given configuration to be marked as 'finished', " \ - "but it is '#{migration.status}': #{configuration}" + "but it is '#{migration.status}':" \ + "\t#{configuration}" \ + "\n\n" \ + "Finalize it manualy by running" \ + "\n\n" \ + "\tsudo gitlab-rake gitlab:background_migrations:finalize[#{job_class_name},#{table_name},#{column_name},'#{job_arguments.inspect.gsub(',', '\,')}']" \ + "\n\n" \ + "For more information, check the documentation" \ + "\n\n" \ + "\thttps://docs.gitlab.com/ee/user/admin_area/monitoring/background_migrations.html#database-migrations-failing-because-of-batched-background-migration-not-finished" end end @@ -1610,6 +1623,13 @@ into similar problems in the future (e.g. when new tables are created). raise end + def rename_constraint(table_name, old_name, new_name) + execute <<~SQL + ALTER TABLE #{quote_table_name(table_name)} + RENAME CONSTRAINT #{quote_column_name(old_name)} TO #{quote_column_name(new_name)} + SQL + end + private def validate_check_constraint_name!(constraint_name) diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb index fa30ffb62f5..28491a934a0 100644 --- a/lib/gitlab/database/migrations/background_migration_helpers.rb +++ b/lib/gitlab/database/migrations/background_migration_helpers.rb @@ -107,7 +107,10 @@ module Gitlab batch_counter = 0 model_class.each_batch(of: batch_size) do |relation, index| - start_id, end_id = relation.pluck(Arel.sql("MIN(#{primary_column_name}), MAX(#{primary_column_name})")).first + max = relation.arel_table[primary_column_name].maximum + min = relation.arel_table[primary_column_name].minimum + + start_id, end_id = relation.pluck(min, max).first # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for # the same time, which is not helpful in most cases where we wish to diff --git a/lib/gitlab/database/partitioning/monthly_strategy.rb b/lib/gitlab/database/partitioning/monthly_strategy.rb index 82ea1ce26fb..4c68399cb68 100644 --- a/lib/gitlab/database/partitioning/monthly_strategy.rb +++ b/lib/gitlab/database/partitioning/monthly_strategy.rb @@ -4,16 +4,17 @@ module Gitlab module Database module Partitioning class MonthlyStrategy - attr_reader :model, :partitioning_key + attr_reader :model, :partitioning_key, :retain_for # We create this many partitions in the future HEADROOM = 6.months delegate :table_name, to: :model - def initialize(model, partitioning_key) + def initialize(model, partitioning_key, retain_for: nil) @model = model @partitioning_key = partitioning_key + @retain_for = retain_for end def current_partitions @@ -27,13 +28,21 @@ module Gitlab desired_partitions - current_partitions end + def extra_partitions + current_partitions - desired_partitions + end + private def desired_partitions [].tap do |parts| min_date, max_date = relevant_range - parts << partition_for(upper_bound: min_date) + if pruning_old_partitions? && min_date <= oldest_active_date + min_date = oldest_active_date.beginning_of_month + else + parts << partition_for(upper_bound: min_date) + end while min_date < max_date next_date = min_date.next_month @@ -52,13 +61,17 @@ module Gitlab # to start from MINVALUE to a specific date `x`. The range returned # does not include the range of the first, half-unbounded partition. def relevant_range - if first_partition = current_partitions.min + if (first_partition = current_partitions.min) # Case 1: First partition starts with MINVALUE, i.e. from is nil -> start with first real partition # Case 2: Rather unexpectedly, first partition does not start with MINVALUE, i.e. from is not nil # In this case, use first partition beginning as a start min_date = first_partition.from || first_partition.to end + if pruning_old_partitions? + min_date ||= oldest_active_date + end + # In case we don't have a partition yet min_date ||= Date.today min_date = min_date.beginning_of_month @@ -72,6 +85,14 @@ module Gitlab TimePartition.new(table_name, lower_bound, upper_bound) end + def pruning_old_partitions? + Feature.enabled?(:partition_pruning_dry_run) && retain_for.present? + end + + def oldest_active_date + (Date.today - retain_for).beginning_of_month + end + def connection ActiveRecord::Base.connection end diff --git a/lib/gitlab/database/partitioning/partition_creator.rb b/lib/gitlab/database/partitioning/partition_manager.rb index d4b2b8d50e2..c2a9422a42a 100644 --- a/lib/gitlab/database/partitioning/partition_creator.rb +++ b/lib/gitlab/database/partitioning/partition_manager.rb @@ -3,7 +3,7 @@ module Gitlab module Database module Partitioning - class PartitionCreator + class PartitionManager def self.register(model) raise ArgumentError, "Only models with a #partitioning_strategy can be registered." unless model.respond_to?(:partitioning_strategy) @@ -15,7 +15,7 @@ module Gitlab end LEASE_TIMEOUT = 1.minute - LEASE_KEY = 'database_partition_creation_%s' + MANAGEMENT_LEASE_KEY = 'database_partition_management_%s' attr_reader :models @@ -23,23 +23,25 @@ module Gitlab @models = models end - def create_partitions + def sync_partitions Gitlab::AppLogger.info("Checking state of dynamic postgres partitions") models.each do |model| # Double-checking before getting the lease: - # The prevailing situation is no missing partitions - next if missing_partitions(model).empty? + # The prevailing situation is no missing partitions and no extra partitions + next if missing_partitions(model).empty? && extra_partitions(model).empty? - only_with_exclusive_lease(model) do + only_with_exclusive_lease(model, lease_key: MANAGEMENT_LEASE_KEY) do partitions_to_create = missing_partitions(model) + create(partitions_to_create) unless partitions_to_create.empty? - next if partitions_to_create.empty? - - create(model, partitions_to_create) + if Feature.enabled?(:partition_pruning_dry_run) + partitions_to_detach = extra_partitions(model) + detach(partitions_to_detach) unless partitions_to_detach.empty? + end end rescue StandardError => e - Gitlab::AppLogger.error("Failed to create partition(s) for #{model.table_name}: #{e.class}: #{e.message}") + Gitlab::AppLogger.error("Failed to create / detach partition(s) for #{model.table_name}: #{e.class}: #{e.message}") end end @@ -51,15 +53,22 @@ module Gitlab model.partitioning_strategy.missing_partitions end - def only_with_exclusive_lease(model) - lease = Gitlab::ExclusiveLease.new(LEASE_KEY % model.table_name, timeout: LEASE_TIMEOUT) + def extra_partitions(model) + return [] unless Feature.enabled?(:partition_pruning_dry_run) + return [] unless connection.table_exists?(model.table_name) + + model.partitioning_strategy.extra_partitions + end + + def only_with_exclusive_lease(model, lease_key:) + lease = Gitlab::ExclusiveLease.new(lease_key % model.table_name, timeout: LEASE_TIMEOUT) yield if lease.try_obtain ensure lease&.cancel end - def create(model, partitions) + def create(partitions) connection.transaction do with_lock_retries do partitions.each do |partition| @@ -71,6 +80,18 @@ module Gitlab end end + def detach(partitions) + connection.transaction do + with_lock_retries do + partitions.each { |p| detach_one_partition(p) } + end + end + end + + def detach_one_partition(partition) + Gitlab::AppLogger.info("Planning to detach #{partition.partition_name} for table #{partition.table}") + end + def with_lock_retries(&block) Gitlab::Database::WithLockRetries.new( klass: self.class, diff --git a/lib/gitlab/database/partitioning/partition_monitoring.rb b/lib/gitlab/database/partitioning/partition_monitoring.rb index 9ec9ae684a5..ad122fd47fe 100644 --- a/lib/gitlab/database/partitioning/partition_monitoring.rb +++ b/lib/gitlab/database/partitioning/partition_monitoring.rb @@ -6,7 +6,7 @@ module Gitlab class PartitionMonitoring attr_reader :models - def initialize(models = PartitionCreator.models) + def initialize(models = PartitionManager.models) @models = models end diff --git a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb index 4402c42b136..f1aa7871245 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb @@ -79,135 +79,6 @@ module Gitlab "#{prefix}#{hashed_identifier}" end - - # Creates a "foreign key" that references a partitioned table. Because foreign keys referencing partitioned - # tables are not supported in PG11, this does not create a true database foreign key, but instead implements the - # same functionality at the database level by using triggers. - # - # Example: - # - # add_partitioned_foreign_key :issues, :projects - # - # Available options: - # - # :column - name of the referencing column (otherwise inferred from the referenced table name) - # :primary_key - name of the primary key in the referenced table (defaults to id) - # :on_delete - supports either :cascade for ON DELETE CASCADE or :nullify for ON DELETE SET NULL - # - def add_partitioned_foreign_key(from_table, to_table, column: nil, primary_key: :id, on_delete: :cascade) - cascade_delete = extract_cascade_option(on_delete) - - update_foreign_keys(from_table, to_table, column, primary_key, cascade_delete) do |current_keys, existing_key, specified_key| - if existing_key.nil? - unless specified_key.save - raise "failed to create foreign key: #{specified_key.errors.full_messages.to_sentence}" - end - - current_keys << specified_key - else - Gitlab::AppLogger.warn "foreign key not added because it already exists: #{specified_key}" - current_keys - end - end - end - - # Drops a "foreign key" that references a partitioned table. This method ONLY applies to foreign keys previously - # created through the `add_partitioned_foreign_key` method. Standard database foreign keys should be managed - # through the familiar Rails helpers. - # - # Example: - # - # remove_partitioned_foreign_key :issues, :projects - # - # Available options: - # - # :column - name of the referencing column (otherwise inferred from the referenced table name) - # :primary_key - name of the primary key in the referenced table (defaults to id) - # - def remove_partitioned_foreign_key(from_table, to_table, column: nil, primary_key: :id) - update_foreign_keys(from_table, to_table, column, primary_key) do |current_keys, existing_key, specified_key| - if existing_key - existing_key.delete - current_keys.delete(existing_key) - else - Gitlab::AppLogger.warn "foreign key not removed because it doesn't exist: #{specified_key}" - end - - current_keys - end - end - - private - - def fk_function_name(table) - object_name(table, 'fk_cascade_function') - end - - def fk_trigger_name(table) - object_name(table, 'fk_cascade_trigger') - end - - def fk_from_spec(from_table, to_table, from_column, to_column, cascade_delete) - PartitionedForeignKey.new(from_table: from_table.to_s, to_table: to_table.to_s, from_column: from_column.to_s, - to_column: to_column.to_s, cascade_delete: cascade_delete) - end - - def update_foreign_keys(from_table, to_table, from_column, to_column, cascade_delete = nil) - assert_not_in_transaction_block(scope: 'partitioned foreign key') - - from_column ||= "#{to_table.to_s.singularize}_id" - specified_key = fk_from_spec(from_table, to_table, from_column, to_column, cascade_delete) - - current_keys = PartitionedForeignKey.by_referenced_table(to_table).to_a - existing_key = find_existing_key(current_keys, specified_key) - - final_keys = yield current_keys, existing_key, specified_key - - fn_name = fk_function_name(to_table) - trigger_name = fk_trigger_name(to_table) - - with_lock_retries do - drop_trigger(to_table, trigger_name, if_exists: true) - - if final_keys.empty? - drop_function(fn_name, if_exists: true) - else - create_or_replace_fk_function(fn_name, final_keys) - create_trigger(to_table, trigger_name, fn_name, fires: 'AFTER DELETE') - end - end - end - - def extract_cascade_option(on_delete) - case on_delete - when :cascade then true - when :nullify then false - else raise ArgumentError, "invalid option #{on_delete} for :on_delete" - end - end - - def find_existing_key(keys, key) - keys.find { |k| k.from_table == key.from_table && k.from_column == key.from_column } - end - - def create_or_replace_fk_function(fn_name, fk_specs) - create_trigger_function(fn_name, replace: true) do - cascade_statements = build_cascade_statements(fk_specs) - cascade_statements << 'RETURN OLD;' - - cascade_statements.join("\n") - end - end - - def build_cascade_statements(foreign_keys) - foreign_keys.map do |fks| - if fks.cascade_delete? - "DELETE FROM #{fks.from_table} WHERE #{fks.from_column} = OLD.#{fks.to_column};" - else - "UPDATE #{fks.from_table} SET #{fks.from_column} = NULL WHERE #{fks.from_column} = OLD.#{fks.to_column};" - end - end - end end end end diff --git a/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key.rb b/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key.rb deleted file mode 100644 index f9a90511f9b..00000000000 --- a/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - module PartitioningMigrationHelpers - class PartitionedForeignKey < ApplicationRecord - validates_with PartitionedForeignKeyValidator - - scope :by_referenced_table, ->(table) { where(to_table: table) } - end - end - end -end diff --git a/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key_validator.rb b/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key_validator.rb deleted file mode 100644 index 089cf2b8931..00000000000 --- a/lib/gitlab/database/partitioning_migration_helpers/partitioned_foreign_key_validator.rb +++ /dev/null @@ -1,28 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - module PartitioningMigrationHelpers - class PartitionedForeignKeyValidator < ActiveModel::Validator - def validate(record) - validate_key_part(record, :from_table, :from_column) - validate_key_part(record, :to_table, :to_column) - end - - private - - def validate_key_part(record, table_field, column_field) - if !connection.table_exists?(record[table_field]) - record.errors.add(table_field, 'must be a valid table') - elsif !connection.column_exists?(record[table_field], record[column_field]) - record.errors.add(column_field, 'must be a valid column') - end - end - - def connection - ActiveRecord::Base.connection - end - end - end - end -end diff --git a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb index aa46b98be5d..580cab5622d 100644 --- a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb +++ b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb @@ -11,17 +11,17 @@ module Gitlab # In order to not use a possible complex time consuming query when calculating min and max values, # the start and finish can be sent specifically, start and finish should contain max and min values for PRIMARY KEY of # relation (most cases `id` column) rather than counted attribute eg: - # estimate_distinct_count(start: ::Project.with_active_services.minimum(:id), finish: ::Project.with_active_services.maximum(:id)) + # estimate_distinct_count(start: ::Project.aimed_for_deletion.minimum(:id), finish: ::Project.aimed_for_deletion.maximum(:id)) # # Grouped relations are NOT supported yet. # # @example Usage # ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project, :creator_id).execute - # ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project.with_active_services.service_desk_enabled.where(time_period)) + # ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project.aimed_for_deletion.service_desk_enabled.where(time_period)) # .execute( # batch_size: 1_000, - # start: ::Project.with_active_services.service_desk_enabled.where(time_period).minimum(:id), - # finish: ::Project.with_active_services.service_desk_enabled.where(time_period).maximum(:id) + # start: ::Project.aimed_for_deletion.service_desk_enabled.where(time_period).minimum(:id), + # finish: ::Project.aimed_for_deletion.service_desk_enabled.where(time_period).maximum(:id) # ) # # @note HyperLogLog is an PROBABILISTIC algorithm that ESTIMATES distinct count of given attribute value for supplied relation diff --git a/lib/gitlab/database/postgres_index.rb b/lib/gitlab/database/postgres_index.rb index 6e734834841..58e4e7e7924 100644 --- a/lib/gitlab/database/postgres_index.rb +++ b/lib/gitlab/database/postgres_index.rb @@ -7,6 +7,7 @@ module Gitlab self.table_name = 'postgres_indexes' self.primary_key = 'identifier' + self.inheritance_column = :_type_disabled has_one :bloat_estimate, class_name: 'Gitlab::Database::PostgresIndexBloatEstimate', foreign_key: :identifier has_many :reindexing_actions, class_name: 'Gitlab::Database::Reindexing::ReindexAction', foreign_key: :index_identifier @@ -17,12 +18,12 @@ module Gitlab find(identifier) end - # A 'regular' index is a non-unique index, - # that does not serve an exclusion constraint and - # is defined on a table that is not partitioned. - scope :regular, -> { where(unique: false, partitioned: false, exclusion: false)} + # Indexes with reindexing support + scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) } - scope :not_match, ->(regex) { where("name !~ ?", regex)} + scope :not_match, ->(regex) { where("name !~ ?", regex) } + + scope :match, ->(regex) { where("name ~* ?", regex) } scope :not_recently_reindexed, -> do recent_actions = Reindexing::ReindexAction.recent.where('index_identifier = identifier') @@ -30,10 +31,19 @@ module Gitlab where('NOT EXISTS (?)', recent_actions) end + def reset + reload # rubocop:disable Cop/ActiveRecordAssociationReload + clear_memoization(:bloat_size) + end + def bloat_size strong_memoize(:bloat_size) { bloat_estimate&.bloat_size || 0 } end + def relative_bloat_level + bloat_size / ondisk_size_bytes.to_f + end + def to_s name end diff --git a/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb b/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb index 59bd24d3c37..a2e7f4befab 100644 --- a/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb +++ b/lib/gitlab/database/postgresql_adapter/dump_schema_versions_mixin.rb @@ -7,8 +7,7 @@ module Gitlab extend ActiveSupport::Concern def dump_schema_information # :nodoc: - versions = schema_migration.all_versions - Gitlab::Database::SchemaVersionFiles.touch_all(versions) if versions.any? + Gitlab::Database::SchemaMigrations.touch_all(self) nil end diff --git a/lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb b/lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb index 9f664fa2137..71d2554844e 100644 --- a/lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb +++ b/lib/gitlab/database/postgresql_adapter/force_disconnectable_mixin.rb @@ -22,7 +22,7 @@ module Gitlab end def force_disconnect_timer - @force_disconnect_timer ||= ConnectionTimer.starting_now + @force_disconnect_timer ||= ::Gitlab::Database::ConnectionTimer.starting_now end end end diff --git a/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb b/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb index cf8342941c4..30060ecb34f 100644 --- a/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb +++ b/lib/gitlab/database/postgresql_database_tasks/load_schema_versions_mixin.rb @@ -6,9 +6,10 @@ module Gitlab module LoadSchemaVersionsMixin extend ActiveSupport::Concern - def structure_load(*args) - super(*args) - Gitlab::Database::SchemaVersionFiles.load_all + def structure_load(...) + super(...) + + Gitlab::Database::SchemaMigrations.load_all(connection) end end end diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb index 0cfad690283..841e04ccbd1 100644 --- a/lib/gitlab/database/reindexing.rb +++ b/lib/gitlab/database/reindexing.rb @@ -6,6 +6,8 @@ module Gitlab # Number of indexes to reindex per invocation DEFAULT_INDEXES_PER_INVOCATION = 2 + SUPPORTED_TYPES = %w(btree gist).freeze + # candidate_indexes: Array of Gitlab::Database::PostgresIndex def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION) IndexSelection.new(candidate_indexes).take(how_many).each do |index| @@ -15,10 +17,8 @@ module Gitlab def self.candidate_indexes Gitlab::Database::PostgresIndex - .regular - .where('NOT expression') - .not_match("^#{ConcurrentReindex::TEMPORARY_INDEX_PREFIX}") - .not_match("^#{ConcurrentReindex::REPLACED_INDEX_PREFIX}") + .not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$") + .reindexing_support end end end diff --git a/lib/gitlab/database/reindexing/concurrent_reindex.rb b/lib/gitlab/database/reindexing/concurrent_reindex.rb deleted file mode 100644 index 7e2dd55d21b..00000000000 --- a/lib/gitlab/database/reindexing/concurrent_reindex.rb +++ /dev/null @@ -1,154 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - module Reindexing - class ConcurrentReindex - include Gitlab::Utils::StrongMemoize - - ReindexError = Class.new(StandardError) - - PG_IDENTIFIER_LENGTH = 63 - TEMPORARY_INDEX_PREFIX = 'tmp_reindex_' - REPLACED_INDEX_PREFIX = 'old_reindex_' - STATEMENT_TIMEOUT = 9.hours - - # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock, - # which only conflicts with DDL and vacuum. We therefore execute this with a rather - # high lock timeout and a long pause in between retries. This is an alternative to - # setting a high statement timeout, which would lead to a long running query with effects - # on e.g. vacuum. - REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30 - - attr_reader :index, :logger - - def initialize(index, logger: Gitlab::AppLogger) - @index = index - @logger = logger - end - - def perform - raise ReindexError, 'UNIQUE indexes are currently not supported' if index.unique? - raise ReindexError, 'partitioned indexes are currently not supported' if index.partitioned? - raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion? - raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name.start_with?(TEMPORARY_INDEX_PREFIX, REPLACED_INDEX_PREFIX) - - logger.info "Starting reindex of #{index}" - - with_rebuilt_index do |replacement_index| - swap_index(replacement_index) - end - end - - private - - def with_rebuilt_index - if Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name) - logger.debug("dropping dangling index from previous run (if it exists): #{replacement_index_name}") - remove_index(index.schema, replacement_index_name) - end - - create_replacement_index_statement = index.definition - .sub(/CREATE INDEX #{index.name}/, "CREATE INDEX CONCURRENTLY #{replacement_index_name}") - - logger.info("creating replacement index #{replacement_index_name}") - logger.debug("replacement index definition: #{create_replacement_index_statement}") - - set_statement_timeout do - connection.execute(create_replacement_index_statement) - end - - replacement_index = Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name) - - unless replacement_index.valid_index? - message = 'replacement index was created as INVALID' - logger.error("#{message}, cleaning up") - raise ReindexError, "failed to reindex #{index}: #{message}" - end - - # Some expression indexes (aka functional indexes) - # require additional statistics. The existing statistics - # are tightly bound to the original index. We have to - # rebuild statistics for the new index before dropping - # the original one. - rebuild_statistics if index.expression? - - yield replacement_index - ensure - begin - remove_index(index.schema, replacement_index_name) - rescue StandardError => e - logger.error(e) - end - end - - def swap_index(replacement_index) - logger.info("swapping replacement index #{replacement_index} with #{index}") - - with_lock_retries do - rename_index(index.schema, index.name, replaced_index_name) - rename_index(replacement_index.schema, replacement_index.name, index.name) - rename_index(index.schema, replaced_index_name, replacement_index.name) - end - end - - def rename_index(schema, old_index_name, new_index_name) - connection.execute(<<~SQL) - ALTER INDEX #{quote_table_name(schema)}.#{quote_table_name(old_index_name)} - RENAME TO #{quote_table_name(new_index_name)} - SQL - end - - def remove_index(schema, name) - logger.info("Removing index #{schema}.#{name}") - - retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( - timing_configuration: REMOVE_INDEX_RETRY_CONFIG, - klass: self.class, - logger: logger - ) - - retries.run(raise_on_exhaustion: false) do - connection.execute(<<~SQL) - DROP INDEX CONCURRENTLY - IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)} - SQL - end - end - - def rebuild_statistics - logger.info("rebuilding table statistics for #{index.schema}.#{index.tablename}") - - connection.execute(<<~SQL) - ANALYZE #{quote_table_name(index.schema)}.#{quote_table_name(index.tablename)} - SQL - end - - def replacement_index_name - @replacement_index_name ||= "#{TEMPORARY_INDEX_PREFIX}#{index.indexrelid}" - end - - def replaced_index_name - @replaced_index_name ||= "#{REPLACED_INDEX_PREFIX}#{index.indexrelid}" - end - - def with_lock_retries(&block) - arguments = { klass: self.class, logger: logger } - Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block) - end - - def set_statement_timeout - execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT) - yield - ensure - execute('RESET statement_timeout') - end - - delegate :execute, :quote_table_name, to: :connection - def connection - @connection ||= ActiveRecord::Base.connection - end - end - end - end -end diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb index d68f47b5b6c..13298f67ca9 100644 --- a/lib/gitlab/database/reindexing/coordinator.rb +++ b/lib/gitlab/database/reindexing/coordinator.rb @@ -41,7 +41,7 @@ module Gitlab end def perform_for(index, action) - ConcurrentReindex.new(index).perform + ReindexConcurrently.new(index).perform rescue StandardError action.state = :failed diff --git a/lib/gitlab/database/reindexing/index_selection.rb b/lib/gitlab/database/reindexing/index_selection.rb index 406e70791df..2186384e7d7 100644 --- a/lib/gitlab/database/reindexing/index_selection.rb +++ b/lib/gitlab/database/reindexing/index_selection.rb @@ -6,6 +6,12 @@ module Gitlab class IndexSelection include Enumerable + # Only reindex indexes with a relative bloat level (bloat estimate / size) higher than this + MINIMUM_RELATIVE_BLOAT = 0.2 + + # Only consider indexes with a total ondisk size in this range (before reindexing) + INDEX_SIZE_RANGE = (1.gigabyte..100.gigabyte).freeze + delegate :each, to: :indexes def initialize(candidates) @@ -24,11 +30,12 @@ module Gitlab # we force a N+1 pattern here and estimate bloat on a per-index # basis. - @indexes ||= filter_candidates.sort_by(&:bloat_size).reverse - end - - def filter_candidates - candidates.not_recently_reindexed + @indexes ||= candidates + .not_recently_reindexed + .where(ondisk_size_bytes: INDEX_SIZE_RANGE) + .sort_by(&:relative_bloat_level) # forced N+1 + .reverse + .select { |candidate| candidate.relative_bloat_level >= MINIMUM_RELATIVE_BLOAT } end end end diff --git a/lib/gitlab/database/reindexing/reindex_action.rb b/lib/gitlab/database/reindexing/reindex_action.rb index 7e58201889f..ff465fffb74 100644 --- a/lib/gitlab/database/reindexing/reindex_action.rb +++ b/lib/gitlab/database/reindexing/reindex_action.rb @@ -10,7 +10,7 @@ module Gitlab enum state: { started: 0, finished: 1, failed: 2 } # Amount of time to consider a previous reindexing *recent* - RECENT_THRESHOLD = 7.days + RECENT_THRESHOLD = 10.days scope :recent, -> { where(state: :finished).where('action_end > ?', Time.zone.now - RECENT_THRESHOLD) } diff --git a/lib/gitlab/database/reindexing/reindex_concurrently.rb b/lib/gitlab/database/reindexing/reindex_concurrently.rb new file mode 100644 index 00000000000..8d9f9f5abdd --- /dev/null +++ b/lib/gitlab/database/reindexing/reindex_concurrently.rb @@ -0,0 +1,138 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Reindexing + # This is a >= PG12 reindexing strategy based on `REINDEX CONCURRENTLY` + class ReindexConcurrently + ReindexError = Class.new(StandardError) + + TEMPORARY_INDEX_PATTERN = '\_ccnew[0-9]*' + STATEMENT_TIMEOUT = 9.hours + PG_MAX_INDEX_NAME_LENGTH = 63 + + # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock, + # which only conflicts with DDL and vacuum. We therefore execute this with a rather + # high lock timeout and a long pause in between retries. This is an alternative to + # setting a high statement timeout, which would lead to a long running query with effects + # on e.g. vacuum. + REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30 + + attr_reader :index, :logger + + def initialize(index, logger: Gitlab::AppLogger) + @index = index + @logger = logger + end + + def perform + raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion? + raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name =~ /#{TEMPORARY_INDEX_PATTERN}/ + + # Expression indexes require additional statistics in `pg_statistic`: + # select * from pg_statistic where starelid = (select oid from pg_class where relname = 'some_index'); + # + # In PG12, this has been fixed in https://gitlab.com/postgres/postgres/-/commit/b17ff07aa3eb142d2cde2ea00e4a4e8f63686f96. + # Discussion happened in https://www.postgresql.org/message-id/flat/CAFcNs%2BqpFPmiHd1oTXvcPdvAHicJDA9qBUSujgAhUMJyUMb%2BSA%40mail.gmail.com + # following a GitLab.com incident that surfaced this (https://gitlab.com/gitlab-com/gl-infra/production/-/issues/2885). + # + # While this has been backpatched, we continue to disable expression indexes until further review. + raise ReindexError, 'expression indexes are currently not supported' if index.expression? + + begin + with_logging do + set_statement_timeout do + execute("REINDEX INDEX CONCURRENTLY #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}") + end + end + ensure + cleanup_dangling_indexes + end + end + + private + + def with_logging + bloat_size = index.bloat_size + ondisk_size_before = index.ondisk_size_bytes + + logger.info( + message: "Starting reindex of #{index}", + index: index.identifier, + table: index.tablename, + estimated_bloat_bytes: bloat_size, + index_size_before_bytes: ondisk_size_before, + relative_bloat_level: index.relative_bloat_level + ) + + duration = Benchmark.realtime do + yield + end + + index.reset + + logger.info( + message: "Finished reindex of #{index}", + index: index.identifier, + table: index.tablename, + estimated_bloat_bytes: bloat_size, + index_size_before_bytes: ondisk_size_before, + index_size_after_bytes: index.ondisk_size_bytes, + relative_bloat_level: index.relative_bloat_level, + duration_s: duration.round(2) + ) + end + + def cleanup_dangling_indexes + Gitlab::Database::PostgresIndex.match("#{TEMPORARY_INDEX_PATTERN}$").each do |lingering_index| + # Example lingering index name: some_index_ccnew1 + + # Example prefix: 'some_index' + prefix = lingering_index.name.gsub(/#{TEMPORARY_INDEX_PATTERN}/, '') + + # Example suffix: '_ccnew1' + suffix = lingering_index.name.match(/#{TEMPORARY_INDEX_PATTERN}/)[0] + + # Only remove if the lingering index name could have been chosen + # as a result of a REINDEX operation (considering that PostgreSQL + # truncates index names to 63 chars and adds a suffix). + if index.name[0...PG_MAX_INDEX_NAME_LENGTH - suffix.length] == prefix + remove_index(lingering_index) + end + end + end + + def remove_index(index) + logger.info("Removing dangling index #{index.identifier}") + + retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( + timing_configuration: REMOVE_INDEX_RETRY_CONFIG, + klass: self.class, + logger: logger + ) + + retries.run(raise_on_exhaustion: false) do + execute("DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}") + end + end + + def with_lock_retries(&block) + arguments = { klass: self.class, logger: logger } + Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block) + end + + def set_statement_timeout + execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT) + yield + ensure + execute('RESET statement_timeout') + end + + delegate :execute, :quote_table_name, to: :connection + def connection + @connection ||= ActiveRecord::Base.connection + end + end + end + end +end diff --git a/lib/gitlab/database/schema_cleaner.rb b/lib/gitlab/database/schema_cleaner.rb index 8f93da2b66c..c3cdcf1450d 100644 --- a/lib/gitlab/database/schema_cleaner.rb +++ b/lib/gitlab/database/schema_cleaner.rb @@ -30,11 +30,7 @@ module Gitlab structure.gsub!(/\n{3,}/, "\n\n") io << structure.strip - io << <<~MSG - -- schema_migrations.version information is no longer stored in this file, - -- but instead tracked in the db/schema_migrations directory - -- see https://gitlab.com/gitlab-org/gitlab/-/issues/218590 for details - MSG + io << "\n" nil end diff --git a/lib/gitlab/database/schema_migrations.rb b/lib/gitlab/database/schema_migrations.rb new file mode 100644 index 00000000000..1c49ed8d946 --- /dev/null +++ b/lib/gitlab/database/schema_migrations.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module SchemaMigrations + def self.touch_all(connection) + context = Gitlab::Database::SchemaMigrations::Context.new(connection) + + Gitlab::Database::SchemaMigrations::Migrations.new(context).touch_all + end + + def self.load_all(connection) + context = Gitlab::Database::SchemaMigrations::Context.new(connection) + + Gitlab::Database::SchemaMigrations::Migrations.new(context).load_all + end + end + end +end diff --git a/lib/gitlab/database/schema_migrations/context.rb b/lib/gitlab/database/schema_migrations/context.rb new file mode 100644 index 00000000000..bd8b9bed2c1 --- /dev/null +++ b/lib/gitlab/database/schema_migrations/context.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module SchemaMigrations + class Context + attr_reader :connection + + def initialize(connection) + @connection = connection + end + + def schema_directory + @schema_directory ||= + if ActiveRecord::Base.configurations.primary?(database_name) + File.join(db_dir, 'schema_migrations') + else + File.join(db_dir, "#{database_name}_schema_migrations") + end + end + + def versions_to_create + versions_from_database = @connection.schema_migration.all_versions + versions_from_migration_files = @connection.migration_context.migrations.map { |m| m.version.to_s } + + versions_from_database & versions_from_migration_files + end + + private + + def database_name + @database_name ||= @connection.pool.db_config.name + end + + def db_dir + @db_dir ||= Rails.application.config.paths["db"].first + end + end + end + end +end diff --git a/lib/gitlab/database/schema_migrations/migrations.rb b/lib/gitlab/database/schema_migrations/migrations.rb new file mode 100644 index 00000000000..3b16b7f1b81 --- /dev/null +++ b/lib/gitlab/database/schema_migrations/migrations.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module SchemaMigrations + class Migrations + MIGRATION_VERSION_GLOB = '20[0-9][0-9]*' + + def initialize(context) + @context = context + end + + def touch_all + return unless @context.versions_to_create.any? + + version_filepaths = version_filenames.map { |f| File.join(schema_directory, f) } + FileUtils.rm(version_filepaths) + + @context.versions_to_create.each do |version| + version_filepath = File.join(schema_directory, version) + + File.open(version_filepath, 'w') do |file| + file << Digest::SHA256.hexdigest(version) + end + end + end + + def load_all + return if version_filenames.empty? + + values = version_filenames.map { |vf| "('#{@context.connection.quote_string(vf)}')" } + + @context.connection.execute(<<~SQL) + INSERT INTO schema_migrations (version) + VALUES #{values.join(',')} + ON CONFLICT DO NOTHING + SQL + end + + private + + def schema_directory + @context.schema_directory + end + + def version_filenames + @version_filenames ||= Dir.glob(MIGRATION_VERSION_GLOB, base: schema_directory) + end + end + end + end +end diff --git a/lib/gitlab/database/schema_version_files.rb b/lib/gitlab/database/schema_version_files.rb deleted file mode 100644 index 27a942404ef..00000000000 --- a/lib/gitlab/database/schema_version_files.rb +++ /dev/null @@ -1,64 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - class SchemaVersionFiles - SCHEMA_DIRECTORY = 'db/schema_migrations' - MIGRATION_DIRECTORIES = %w[db/migrate db/post_migrate].freeze - MIGRATION_VERSION_GLOB = '20[0-9][0-9]*' - - def self.touch_all(versions_from_database) - versions_from_migration_files = find_versions_from_migration_files - - version_filepaths = find_version_filenames.map { |f| schema_directory.join(f) } - FileUtils.rm(version_filepaths) - - versions_to_create = versions_from_database & versions_from_migration_files - versions_to_create.each do |version| - version_filepath = schema_directory.join(version) - - File.open(version_filepath, 'w') do |file| - file << Digest::SHA256.hexdigest(version) - end - end - end - - def self.load_all - version_filenames = find_version_filenames - return if version_filenames.empty? - - values = version_filenames.map { |vf| "('#{connection.quote_string(vf)}')" } - connection.execute(<<~SQL) - INSERT INTO schema_migrations (version) - VALUES #{values.join(',')} - ON CONFLICT DO NOTHING - SQL - end - - def self.schema_directory - @schema_directory ||= Rails.root.join(SCHEMA_DIRECTORY) - end - - def self.migration_directories - @migration_directories ||= MIGRATION_DIRECTORIES.map { |dir| Rails.root.join(dir) } - end - - def self.find_version_filenames - Dir.glob(MIGRATION_VERSION_GLOB, base: schema_directory) - end - - def self.find_versions_from_migration_files - migration_directories.each_with_object([]) do |directory, migration_versions| - directory_migrations = Dir.glob(MIGRATION_VERSION_GLOB, base: directory) - directory_versions = directory_migrations.map! { |m| m.split('_').first } - - migration_versions.concat(directory_versions) - end - end - - def self.connection - ActiveRecord::Base.connection - end - end - end -end |