diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2022-01-20 12:16:11 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2022-01-20 12:16:11 +0300 |
commit | edaa33dee2ff2f7ea3fac488d41558eb5f86d68c (patch) | |
tree | 11f143effbfeba52329fb7afbd05e6e2a3790241 /lib/gitlab/database | |
parent | d8a5691316400a0f7ec4f83832698f1988eb27c1 (diff) |
Add latest changes from gitlab-org/gitlab@14-7-stable-eev14.7.0-rc42
Diffstat (limited to 'lib/gitlab/database')
18 files changed, 229 insertions, 403 deletions
diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb index 503172dd750..290fa51692a 100644 --- a/lib/gitlab/database/background_migration/batched_job.rb +++ b/lib/gitlab/database/background_migration/batched_job.rb @@ -12,17 +12,6 @@ module Gitlab MAX_ATTEMPTS = 3 STUCK_JOBS_TIMEOUT = 1.hour.freeze - belongs_to :batched_migration, foreign_key: :batched_background_migration_id - - scope :active, -> { where(status: [:pending, :running]) } - scope :stuck, -> { active.where('updated_at <= ?', STUCK_JOBS_TIMEOUT.ago) } - scope :retriable, -> { - failed_jobs = where(status: :failed).where('attempts < ?', MAX_ATTEMPTS) - - from_union([failed_jobs, self.stuck]) - } - scope :except_succeeded, -> { where(status: self.statuses.except(:succeeded).values) } - enum status: { pending: 0, running: 1, @@ -30,7 +19,14 @@ module Gitlab succeeded: 3 } + belongs_to :batched_migration, foreign_key: :batched_background_migration_id + + scope :active, -> { where(status: [:pending, :running]) } + scope :stuck, -> { active.where('updated_at <= ?', STUCK_JOBS_TIMEOUT.ago) } + scope :retriable, -> { from_union([failed.where('attempts < ?', MAX_ATTEMPTS), self.stuck]) } + scope :except_succeeded, -> { where(status: self.statuses.except(:succeeded).values) } scope :successful_in_execution_order, -> { where.not(finished_at: nil).succeeded.order(:finished_at) } + scope :with_preloads, -> { preload(:batched_migration) } delegate :job_class, :table_name, :column_name, :job_arguments, to: :batched_migration, prefix: :migration diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb index 2844cbe4a74..2f066039874 100644 --- a/lib/gitlab/database/background_migration/batched_migration.rb +++ b/lib/gitlab/database/background_migration/batched_migration.rb @@ -113,7 +113,7 @@ module Gitlab end def smoothed_time_efficiency(number_of_jobs: 10, alpha: 0.2) - jobs = batched_jobs.successful_in_execution_order.reverse_order.limit(number_of_jobs) + jobs = batched_jobs.successful_in_execution_order.reverse_order.limit(number_of_jobs).with_preloads return if jobs.size < number_of_jobs diff --git a/lib/gitlab/database/background_migration_job.rb b/lib/gitlab/database/background_migration_job.rb index c046571a111..c0e3016fd3d 100644 --- a/lib/gitlab/database/background_migration_job.rb +++ b/lib/gitlab/database/background_migration_job.rb @@ -2,7 +2,7 @@ module Gitlab module Database - class BackgroundMigrationJob < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord + class BackgroundMigrationJob < SharedModel include EachBatch include BulkInsertSafe diff --git a/lib/gitlab/database/batch_counter.rb b/lib/gitlab/database/batch_counter.rb index 6c0ce9e481a..417511618e4 100644 --- a/lib/gitlab/database/batch_counter.rb +++ b/lib/gitlab/database/batch_counter.rb @@ -52,12 +52,7 @@ module Gitlab batch_end = [batch_start + batch_size, finish].min batch_relation = build_relation_batch(batch_start, batch_end, mode) - op_args = @operation_args - if @operation == :count && @operation_args.blank? && use_loose_index_scan_for_distinct_values?(mode) - op_args = [Gitlab::Database::LooseIndexScanDistinctCount::COLUMN_ALIAS] - end - - results = merge_results(results, batch_relation.send(@operation, *op_args)) # rubocop:disable GitlabSecurity/PublicSend + results = merge_results(results, batch_relation.send(@operation, *@operation_args)) # rubocop:disable GitlabSecurity/PublicSend batch_start = batch_end rescue ActiveRecord::QueryCanceled => error # retry with a safe batch size & warmer cache @@ -67,18 +62,6 @@ module Gitlab log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error) return FALLBACK end - rescue Gitlab::Database::LooseIndexScanDistinctCount::ColumnConfigurationError => error - Gitlab::AppJsonLogger - .error( - event: 'batch_count', - relation: @relation.table_name, - operation: @operation, - operation_args: @operation_args, - mode: mode, - message: "LooseIndexScanDistinctCount column error: #{error.message}" - ) - - return FALLBACK end sleep(SLEEP_TIME_IN_SECONDS) @@ -104,11 +87,7 @@ module Gitlab private def build_relation_batch(start, finish, mode) - if use_loose_index_scan_for_distinct_values?(mode) - Gitlab::Database::LooseIndexScanDistinctCount.new(@relation, @column).build_query(from: start, to: finish) - else - @relation.select(@column).public_send(mode).where(between_condition(start, finish)) # rubocop:disable GitlabSecurity/PublicSend - end + @relation.select(@column).public_send(mode).where(between_condition(start, finish)) # rubocop:disable GitlabSecurity/PublicSend end def batch_size_for_mode_and_operation(mode, operation) @@ -151,10 +130,6 @@ module Gitlab ) end - def use_loose_index_scan_for_distinct_values?(mode) - Feature.enabled?(:loose_index_scan_for_distinct_values) && not_group_by_query? && mode == :distinct - end - def not_group_by_query? !@relation.is_a?(ActiveRecord::Relation) || @relation.group_values.blank? end diff --git a/lib/gitlab/database/gitlab_loose_foreign_keys.yml b/lib/gitlab/database/gitlab_loose_foreign_keys.yml index 0343c054f23..d694165574d 100644 --- a/lib/gitlab/database/gitlab_loose_foreign_keys.yml +++ b/lib/gitlab/database/gitlab_loose_foreign_keys.yml @@ -1,3 +1,12 @@ +--- +dast_site_profiles_pipelines: + - table: ci_pipelines + column: ci_pipeline_id + on_delete: async_delete +vulnerability_feedback: + - table: ci_pipelines + column: pipeline_id + on_delete: async_nullify ci_pipeline_chat_data: - table: chat_names column: chat_name_id @@ -6,7 +15,7 @@ dast_scanner_profiles_builds: - table: ci_builds column: ci_build_id on_delete: async_delete -dast_scanner_profiles_builds: +dast_site_profiles_builds: - table: ci_builds column: ci_build_id on_delete: async_delete @@ -18,10 +27,48 @@ clusters_applications_runners: - table: ci_runners column: runner_id on_delete: async_nullify +ci_job_token_project_scope_links: + - table: users + column: added_by_id + on_delete: async_nullify +ci_daily_build_group_report_results: + - table: namespaces + column: group_id + on_delete: async_delete + - table: projects + column: project_id + on_delete: async_delete +ci_freeze_periods: + - table: projects + column: project_id + on_delete: async_delete +ci_pending_builds: + - table: namespaces + column: namespace_id + on_delete: async_delete + - table: projects + column: project_id + on_delete: async_delete +ci_resource_groups: + - table: projects + column: project_id + on_delete: async_delete +ci_runner_namespaces: + - table: namespaces + column: namespace_id + on_delete: async_delete +ci_running_builds: + - table: projects + column: project_id + on_delete: async_delete ci_namespace_mirrors: - table: namespaces column: namespace_id on_delete: async_delete +ci_build_report_results: + - table: projects + column: project_id + on_delete: async_delete ci_builds: - table: users column: user_id @@ -43,6 +90,22 @@ ci_project_mirrors: - table: namespaces column: namespace_id on_delete: async_delete +ci_unit_tests: + - table: projects + column: project_id + on_delete: async_delete +merge_requests: + - table: ci_pipelines + column: head_pipeline_id + on_delete: async_nullify +vulnerability_statistics: + - table: ci_pipelines + column: latest_pipeline_id + on_delete: async_nullify +vulnerability_occurrence_pipelines: + - table: ci_pipelines + column: pipeline_id + on_delete: async_delete packages_build_infos: - table: ci_pipelines column: pipeline_id @@ -67,3 +130,31 @@ project_pages_metadata: - table: ci_job_artifacts column: artifacts_archive_id on_delete: async_nullify +ci_pipeline_schedules: + - table: users + column: owner_id + on_delete: async_nullify +ci_group_variables: + - table: namespaces + column: group_id + on_delete: async_delete +ci_minutes_additional_packs: + - table: namespaces + column: namespace_id + on_delete: async_delete +requirements_management_test_reports: + - table: ci_builds + column: build_id + on_delete: async_nullify +security_scans: + - table: ci_builds + column: build_id + on_delete: async_delete +ci_secure_files: + - table: projects + column: project_id + on_delete: async_delete +ci_pipeline_artifacts: + - table: projects + column: project_id + on_delete: async_delete diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml index 24c2d634780..fb5d8cfa32f 100644 --- a/lib/gitlab/database/gitlab_schemas.yml +++ b/lib/gitlab/database/gitlab_schemas.yml @@ -107,6 +107,7 @@ ci_runner_projects: :gitlab_ci ci_runners: :gitlab_ci ci_running_builds: :gitlab_ci ci_sources_pipelines: :gitlab_ci +ci_secure_files: :gitlab_ci ci_sources_projects: :gitlab_ci ci_stages: :gitlab_ci ci_subscriptions_projects: :gitlab_ci @@ -200,7 +201,7 @@ experiment_subjects: :gitlab_main experiment_users: :gitlab_main external_approval_rules: :gitlab_main external_approval_rules_protected_branches: :gitlab_main -external_pull_requests: :gitlab_main +external_pull_requests: :gitlab_ci external_status_checks: :gitlab_main external_status_checks_protected_branches: :gitlab_main feature_gates: :gitlab_main @@ -231,6 +232,7 @@ gpg_key_subkeys: :gitlab_main gpg_signatures: :gitlab_main grafana_integrations: :gitlab_main group_custom_attributes: :gitlab_main +group_crm_settings: :gitlab_main group_deletion_schedules: :gitlab_main group_deploy_keys: :gitlab_main group_deploy_keys_groups: :gitlab_main @@ -460,6 +462,8 @@ security_findings: :gitlab_main security_orchestration_policy_configurations: :gitlab_main security_orchestration_policy_rule_schedules: :gitlab_main security_scans: :gitlab_main +security_training_providers: :gitlab_main +security_trainings: :gitlab_main self_managed_prometheus_alert_events: :gitlab_main sent_notifications: :gitlab_main sentry_issues: :gitlab_main @@ -521,13 +525,7 @@ vulnerabilities: :gitlab_main vulnerability_exports: :gitlab_main vulnerability_external_issue_links: :gitlab_main vulnerability_feedback: :gitlab_main -vulnerability_finding_evidence_assets: :gitlab_main -vulnerability_finding_evidence_headers: :gitlab_main -vulnerability_finding_evidence_requests: :gitlab_main -vulnerability_finding_evidence_responses: :gitlab_main vulnerability_finding_evidences: :gitlab_main -vulnerability_finding_evidence_sources: :gitlab_main -vulnerability_finding_evidence_supporting_messages: :gitlab_main vulnerability_finding_links: :gitlab_main vulnerability_finding_signatures: :gitlab_main vulnerability_findings_remediations: :gitlab_main diff --git a/lib/gitlab/database/grant.rb b/lib/gitlab/database/grant.rb index c8a30c68bc6..0093848ee6f 100644 --- a/lib/gitlab/database/grant.rb +++ b/lib/gitlab/database/grant.rb @@ -10,7 +10,7 @@ module Gitlab # We _must not_ use quote_table_name as this will produce double # quotes on PostgreSQL and for "has_table_privilege" we need single # quotes. - connection = ActiveRecord::Base.connection # rubocop: disable Database/MultipleDatabases + connection = ApplicationRecord.connection quoted_table = connection.quote(table) begin diff --git a/lib/gitlab/database/load_balancing/setup.rb b/lib/gitlab/database/load_balancing/setup.rb index ef38f42f50b..126c8bb2aa6 100644 --- a/lib/gitlab/database/load_balancing/setup.rb +++ b/lib/gitlab/database/load_balancing/setup.rb @@ -104,11 +104,9 @@ module Gitlab end end - # rubocop:disable Database/MultipleDatabases def connection - use_model_load_balancing? ? super : ActiveRecord::Base.connection + use_model_load_balancing? ? super : ApplicationRecord.connection end - # rubocop:enable Database/MultipleDatabases end end end diff --git a/lib/gitlab/database/loose_index_scan_distinct_count.rb b/lib/gitlab/database/loose_index_scan_distinct_count.rb deleted file mode 100644 index 26be07f91c4..00000000000 --- a/lib/gitlab/database/loose_index_scan_distinct_count.rb +++ /dev/null @@ -1,102 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - # This class builds efficient batched distinct query by using loose index scan. - # Consider the following example: - # > Issue.distinct(:project_id).where(project_id: (1...100)).count - # - # Note: there is an index on project_id - # - # This query will read each element in the index matching the project_id filter. - # If for a project_id has 100_000 issues, all 100_000 elements will be read. - # - # A loose index scan will only read one entry from the index for each project_id to reduce the number of disk reads. - # - # Usage: - # - # Gitlab::Database::LooseIndexScanDisctinctCount.new(Issue, :project_id).count(from: 1, to: 100) - # - # The query will return the number of distinct projects_ids between 1 and 100 - # - # Getting the Arel query: - # - # Gitlab::Database::LooseIndexScanDisctinctCount.new(Issue, :project_id).build_query(from: 1, to: 100) - class LooseIndexScanDistinctCount - COLUMN_ALIAS = 'distinct_count_column' - - ColumnConfigurationError = Class.new(StandardError) - - def initialize(scope, column) - if scope.is_a?(ActiveRecord::Relation) - @scope = scope - @model = scope.model - else - @scope = scope.where({}) - @model = scope - end - - @column = transform_column(column) - end - - def count(from:, to:) - build_query(from: from, to: to).count(COLUMN_ALIAS) - end - - def build_query(from:, to:) # rubocop:disable Metrics/AbcSize - cte = Gitlab::SQL::RecursiveCTE.new(:counter_cte, union_args: { remove_order: false }) - table = model.arel_table - - cte << @scope - .dup - .select(column.as(COLUMN_ALIAS)) - .where(column.gteq(from)) - .where(column.lt(to)) - .order(column) - .limit(1) - - inner_query = @scope - .dup - .where(column.gt(cte.table[COLUMN_ALIAS])) - .where(column.lt(to)) - .select(column.as(COLUMN_ALIAS)) - .order(column) - .limit(1) - - cte << cte.table - .project(Arel::Nodes::Grouping.new(Arel.sql(inner_query.to_sql)).as(COLUMN_ALIAS)) - .where(cte.table[COLUMN_ALIAS].lt(to)) - - model - .with - .recursive(cte.to_arel) - .from(cte.alias_to(table)) - .unscope(where: :source_type) - .unscope(where: model.inheritance_column) # Remove STI query, not needed here - end - - private - - attr_reader :column, :model - - # Transforms the column so it can be used in Arel expressions - # - # 'table.column' => 'table.column' - # 'column' => 'table_name.column' - # :column => 'table_name.column' - # Arel::Attributes::Attribute => name of the column - def transform_column(column) - if column.is_a?(String) || column.is_a?(Symbol) - column_as_string = column.to_s - column_as_string = "#{model.table_name}.#{column_as_string}" unless column_as_string.include?('.') - - Arel.sql(column_as_string) - elsif column.is_a?(Arel::Attributes::Attribute) - column - else - raise ColumnConfigurationError, "Cannot transform the column: #{column.inspect}, please provide the column name as string" - end - end - end - end -end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 4245dd80714..aa5ac1e3486 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -778,186 +778,6 @@ module Gitlab install_rename_triggers(table, old, new) end - # Changes the column type of a table using a background migration. - # - # Because this method uses a background migration it's more suitable for - # large tables. For small tables it's better to use - # `change_column_type_concurrently` since it can complete its work in a - # much shorter amount of time and doesn't rely on Sidekiq. - # - # Example usage: - # - # class Issue < ActiveRecord::Base - # self.table_name = 'issues' - # - # include EachBatch - # - # def self.to_migrate - # where('closed_at IS NOT NULL') - # end - # end - # - # change_column_type_using_background_migration( - # Issue.to_migrate, - # :closed_at, - # :datetime_with_timezone - # ) - # - # Reverting a migration like this is done exactly the same way, just with - # a different type to migrate to (e.g. `:datetime` in the above example). - # - # relation - An ActiveRecord relation to use for scheduling jobs and - # figuring out what table we're modifying. This relation _must_ - # have the EachBatch module included. - # - # column - The name of the column for which the type will be changed. - # - # new_type - The new type of the column. - # - # batch_size - The number of rows to schedule in a single background - # migration. - # - # interval - The time interval between every background migration. - def change_column_type_using_background_migration( - relation, - column, - new_type, - batch_size: 10_000, - interval: 10.minutes - ) - - unless relation.model < EachBatch - raise TypeError, 'The relation must include the EachBatch module' - end - - temp_column = "#{column}_for_type_change" - table = relation.table_name - max_index = 0 - - add_column(table, temp_column, new_type) - install_rename_triggers(table, column, temp_column) - - # Schedule the jobs that will copy the data from the old column to the - # new one. Rows with NULL values in our source column are skipped since - # the target column is already NULL at this point. - relation.where.not(column => nil).each_batch(of: batch_size) do |batch, index| - start_id, end_id = batch.pluck('MIN(id), MAX(id)').first - max_index = index - - migrate_in( - index * interval, - 'CopyColumn', - [table, column, temp_column, start_id, end_id] - ) - end - - # Schedule the renaming of the column to happen (initially) 1 hour after - # the last batch finished. - migrate_in( - (max_index * interval) + 1.hour, - 'CleanupConcurrentTypeChange', - [table, column, temp_column] - ) - - if perform_background_migration_inline? - # To ensure the schema is up to date immediately we perform the - # migration inline in dev / test environments. - Gitlab::BackgroundMigration.steal('CopyColumn') - Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange') - end - end - - # Renames a column using a background migration. - # - # Because this method uses a background migration it's more suitable for - # large tables. For small tables it's better to use - # `rename_column_concurrently` since it can complete its work in a much - # shorter amount of time and doesn't rely on Sidekiq. - # - # Example usage: - # - # rename_column_using_background_migration( - # :users, - # :feed_token, - # :rss_token - # ) - # - # table - The name of the database table containing the column. - # - # old - The old column name. - # - # new - The new column name. - # - # type - The type of the new column. If no type is given the old column's - # type is used. - # - # batch_size - The number of rows to schedule in a single background - # migration. - # - # interval - The time interval between every background migration. - def rename_column_using_background_migration( - table, - old_column, - new_column, - type: nil, - batch_size: 10_000, - interval: 10.minutes - ) - - check_trigger_permissions!(table) - - old_col = column_for(table, old_column) - new_type = type || old_col.type - max_index = 0 - - add_column(table, new_column, new_type, - limit: old_col.limit, - precision: old_col.precision, - scale: old_col.scale) - - # We set the default value _after_ adding the column so we don't end up - # updating any existing data with the default value. This isn't - # necessary since we copy over old values further down. - change_column_default(table, new_column, old_col.default) if old_col.default - - install_rename_triggers(table, old_column, new_column) - - model = Class.new(ActiveRecord::Base) do - self.table_name = table - - include ::EachBatch - end - - # Schedule the jobs that will copy the data from the old column to the - # new one. Rows with NULL values in our source column are skipped since - # the target column is already NULL at this point. - model.where.not(old_column => nil).each_batch(of: batch_size) do |batch, index| - start_id, end_id = batch.pluck('MIN(id), MAX(id)').first - max_index = index - - migrate_in( - index * interval, - 'CopyColumn', - [table, old_column, new_column, start_id, end_id] - ) - end - - # Schedule the renaming of the column to happen (initially) 1 hour after - # the last batch finished. - migrate_in( - (max_index * interval) + 1.hour, - 'CleanupConcurrentRename', - [table, old_column, new_column] - ) - - if perform_background_migration_inline? - # To ensure the schema is up to date immediately we perform the - # migration inline in dev / test environments. - Gitlab::BackgroundMigration.steal('CopyColumn') - Gitlab::BackgroundMigration.steal('CleanupConcurrentRename') - end - end - def convert_to_bigint_column(column) "#{column}_convert_to_bigint" end diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb index 8c33c41ce77..4f1b490cc8f 100644 --- a/lib/gitlab/database/migrations/background_migration_helpers.rb +++ b/lib/gitlab/database/migrations/background_migration_helpers.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true - module Gitlab module Database module Migrations @@ -45,11 +44,11 @@ module Gitlab raise "#{model_class} does not have an ID column of #{primary_column_name} to use for batch ranges" unless model_class.column_names.include?(primary_column_name.to_s) raise "#{primary_column_name} is not an integer column" unless model_class.columns_hash[primary_column_name.to_s].type == :integer + job_coordinator = coordinator_for_tracking_database + # To not overload the worker too much we enforce a minimum interval both # when scheduling and performing jobs. - if delay_interval < BackgroundMigrationWorker.minimum_interval - delay_interval = BackgroundMigrationWorker.minimum_interval - end + delay_interval = [delay_interval, job_coordinator.minimum_interval].max final_delay = 0 batch_counter = 0 @@ -60,14 +59,14 @@ module Gitlab start_id, end_id = relation.pluck(min, max).first - # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for + # `SingleDatabaseWorker.bulk_perform_in` schedules all jobs for # the same time, which is not helpful in most cases where we wish to # spread the work over time. final_delay = initial_delay + delay_interval * index full_job_arguments = [start_id, end_id] + other_job_arguments track_in_database(job_class_name, full_job_arguments) if track_jobs - migrate_in(final_delay, job_class_name, full_job_arguments) + migrate_in(final_delay, job_class_name, full_job_arguments, coordinator: job_coordinator) batch_counter += 1 end @@ -91,9 +90,11 @@ module Gitlab # delay_interval - The duration between each job's scheduled time # batch_size - The maximum number of jobs to fetch to memory from the database. def requeue_background_migration_jobs_by_range_at_intervals(job_class_name, delay_interval, batch_size: BATCH_SIZE, initial_delay: 0) + job_coordinator = coordinator_for_tracking_database + # To not overload the worker too much we enforce a minimum interval both # when scheduling and performing jobs. - delay_interval = [delay_interval, BackgroundMigrationWorker.minimum_interval].max + delay_interval = [delay_interval, job_coordinator.minimum_interval].max final_delay = 0 job_counter = 0 @@ -103,7 +104,7 @@ module Gitlab job_batch.each do |job| final_delay = initial_delay + delay_interval * job_counter - migrate_in(final_delay, job_class_name, job.arguments) + migrate_in(final_delay, job_class_name, job.arguments, coordinator: job_coordinator) job_counter += 1 end @@ -132,56 +133,33 @@ module Gitlab # This method does not garauntee that all jobs completed successfully. # It can only be used if the previous background migration used the queue_background_migration_jobs_by_range_at_intervals helper. def finalize_background_migration(class_name, delete_tracking_jobs: ['succeeded']) + job_coordinator = coordinator_for_tracking_database + # Empty the sidekiq queue. - Gitlab::BackgroundMigration.steal(class_name) + job_coordinator.steal(class_name) # Process pending tracked jobs. jobs = Gitlab::Database::BackgroundMigrationJob.pending.for_migration_class(class_name) + jobs.find_each do |job| - BackgroundMigrationWorker.new.perform(job.class_name, job.arguments) + job_coordinator.perform(job.class_name, job.arguments) end # Empty the sidekiq queue. - Gitlab::BackgroundMigration.steal(class_name) + job_coordinator.steal(class_name) # Delete job tracking rows. delete_job_tracking(class_name, status: delete_tracking_jobs) if delete_tracking_jobs end - def perform_background_migration_inline? - Rails.env.test? || Rails.env.development? - end - - def migrate_async(*args) - with_migration_context do - BackgroundMigrationWorker.perform_async(*args) - end - end - - def migrate_in(*args) - with_migration_context do - BackgroundMigrationWorker.perform_in(*args) - end - end - - def bulk_migrate_in(*args) + def migrate_in(*args, coordinator: coordinator_for_tracking_database) with_migration_context do - BackgroundMigrationWorker.bulk_perform_in(*args) + coordinator.perform_in(*args) end end - def bulk_migrate_async(*args) - with_migration_context do - BackgroundMigrationWorker.bulk_perform_async(*args) - end - end - - def with_migration_context(&block) - Gitlab::ApplicationContext.with_context(caller_id: self.class.to_s, &block) - end - def delete_queued_jobs(class_name) - Gitlab::BackgroundMigration.steal(class_name) do |job| + coordinator_for_tracking_database.steal(class_name) do |job| job.delete false @@ -196,9 +174,21 @@ module Gitlab private + def with_migration_context(&block) + Gitlab::ApplicationContext.with_context(caller_id: self.class.to_s, &block) + end + def track_in_database(class_name, arguments) Gitlab::Database::BackgroundMigrationJob.create!(class_name: class_name, arguments: arguments) end + + def coordinator_for_tracking_database + Gitlab::BackgroundMigration.coordinator_for_database(tracking_database) + end + + def tracking_database + Gitlab::BackgroundMigration::DEFAULT_TRACKING_DATABASE + end end end end diff --git a/lib/gitlab/database/partitioning/partition_manager.rb b/lib/gitlab/database/partitioning/partition_manager.rb index aa824dfbd2f..ba6fa0cf278 100644 --- a/lib/gitlab/database/partitioning/partition_manager.rb +++ b/lib/gitlab/database/partitioning/partition_manager.rb @@ -64,6 +64,10 @@ module Gitlab # with_lock_retries starts a requires_new transaction most of the time, but not on the last iteration with_lock_retries do connection.transaction(requires_new: false) do # so we open a transaction here if not already in progress + # Partitions might not get created (IF NOT EXISTS) so explicit locking will not happen. + # This LOCK TABLE ensures to have exclusive lock as the first step. + connection.execute "LOCK TABLE #{connection.quote_table_name(model.table_name)} IN ACCESS EXCLUSIVE MODE" + partitions.each do |partition| connection.execute partition.to_sql diff --git a/lib/gitlab/database/partitioning/sliding_list_strategy.rb b/lib/gitlab/database/partitioning/sliding_list_strategy.rb index 21b86b43ae7..e9865fb91d6 100644 --- a/lib/gitlab/database/partitioning/sliding_list_strategy.rb +++ b/lib/gitlab/database/partitioning/sliding_list_strategy.rb @@ -44,7 +44,18 @@ module Gitlab def extra_partitions possibly_extra = current_partitions[0...-1] # Never consider the most recent partition - possibly_extra.take_while { |p| detach_partition_if.call(p.value) } + extra = possibly_extra.take_while { |p| detach_partition_if.call(p.value) } + + default_value = current_default_value + if extra.any? { |p| p.value == default_value } + Gitlab::AppLogger.error(message: "Inconsistent partition detected: partition with value #{current_default_value} should not be deleted because it's used as the default value.", + partition_number: current_default_value, + table_name: model.table_name) + + extra = extra.reject { |p| p.value == default_value } + end + + extra end def after_adding_partitions @@ -64,6 +75,21 @@ module Gitlab private + def current_default_value + column_name = model.connection.quote(partitioning_key) + table_name = model.connection.quote(model.table_name) + + value = model.connection.select_value <<~SQL + SELECT columns.column_default AS default_value + FROM information_schema.columns columns + WHERE columns.column_name = #{column_name} AND columns.table_name = #{table_name} + SQL + + raise "No default value found for the #{partitioning_key} column within #{model.name}" if value.nil? + + Integer(value) + end + def ensure_partitioning_column_ignored! unless model.ignored_columns.include?(partitioning_key.to_s) raise "Add #{partitioning_key} to #{model.name}.ignored_columns to use it with SlidingListStrategy" diff --git a/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb b/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb index 17a42d997e6..f551fa06cad 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table.rb @@ -4,7 +4,7 @@ module Gitlab module Database module PartitioningMigrationHelpers # Class that will generically copy data from a given table into its corresponding partitioned table - class BackfillPartitionedTable + class BackfillPartitionedTable < ::Gitlab::BackgroundMigration::BaseJob include ::Gitlab::Database::DynamicModelHelpers SUB_BATCH_SIZE = 2_500 @@ -21,7 +21,7 @@ module Gitlab return end - bulk_copy = BulkCopy.new(source_table, partitioned_table, source_column) + bulk_copy = BulkCopy.new(source_table, partitioned_table, source_column, connection: connection) parent_batch_relation = relation_scoped_to_range(source_table, source_column, start_id, stop_id) parent_batch_relation.each_batch(of: SUB_BATCH_SIZE) do |sub_batch| @@ -36,10 +36,6 @@ module Gitlab private - def connection - ActiveRecord::Base.connection - end - def transaction_open? connection.transaction_open? end @@ -53,7 +49,8 @@ module Gitlab end def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id) - define_batchable_model(source_table).where(source_key_column => start_id..stop_id) + define_batchable_model(source_table) + .where(source_key_column => start_id..stop_id) end def mark_jobs_as_succeeded(*arguments) @@ -64,12 +61,13 @@ module Gitlab class BulkCopy DELIMITER = ', ' - attr_reader :source_table, :destination_table, :source_column + attr_reader :source_table, :destination_table, :source_column, :connection - def initialize(source_table, destination_table, source_column) + def initialize(source_table, destination_table, source_column, connection:) @source_table = source_table @destination_table = destination_table @source_column = source_column + @connection = connection end def copy_between(start_id, stop_id) @@ -85,10 +83,6 @@ module Gitlab private - def connection - @connection ||= ActiveRecord::Base.connection - end - def column_listing @column_listing ||= connection.columns(source_table).map(&:name).join(DELIMITER) end diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb index c382d2f0715..984c708aa48 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb @@ -406,7 +406,8 @@ module Gitlab end def copy_missed_records(source_table_name, partitioned_table_name, source_column) - backfill_table = BackfillPartitionedTable.new + backfill_table = BackfillPartitionedTable.new(connection: connection) + relation = ::Gitlab::Database::BackgroundMigrationJob.pending .for_partitioning_migration(MIGRATION_CLASS_NAME, source_table_name) diff --git a/lib/gitlab/database/reflection.rb b/lib/gitlab/database/reflection.rb index 48a4de28541..3ea7277571f 100644 --- a/lib/gitlab/database/reflection.rb +++ b/lib/gitlab/database/reflection.rb @@ -105,6 +105,35 @@ module Gitlab row['system_identifier'] end + def flavor + { + # Based on https://aws.amazon.com/premiumsupport/knowledge-center/aurora-version-number/ + 'Amazon Aurora PostgreSQL' => { statement: 'SELECT AURORA_VERSION()', error: /PG::UndefinedFunction/ }, + # Based on https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_PostgreSQL.html#PostgreSQL.Concepts.General.FeatureSupport.Extensions, + # this is also available for both Aurora and RDS, so we need to check for the former first. + 'PostgreSQL on Amazon RDS' => { statement: 'SHOW rds.extensions', error: /PG::UndefinedObject/ }, + # Based on https://cloud.google.com/sql/docs/postgres/flags#postgres-c this should be specific + # to Cloud SQL for PostgreSQL + 'Cloud SQL for PostgreSQL' => { statement: 'SHOW cloudsql.iam_authentication', error: /PG::UndefinedObject/ }, + # Based on + # - https://docs.microsoft.com/en-us/azure/postgresql/flexible-server/concepts-extensions + # - https://docs.microsoft.com/en-us/azure/postgresql/concepts-extensions + # this should be available only for Azure Database for PostgreSQL - Flexible Server. + 'Azure Database for PostgreSQL - Flexible Server' => { statement: 'SHOW azure.extensions', error: /PG::UndefinedObject/ }, + # Based on + # - https://docs.microsoft.com/en-us/azure/postgresql/flexible-server/concepts-servers + # - https://docs.microsoft.com/en-us/azure/postgresql/concepts-servers#managing-your-server + # this database is present on both Flexible and Single server, so we should check the former first. + 'Azure Database for PostgreSQL - Single Server' => { statement: "SELECT datname FROM pg_database WHERE datname = 'azure_maintenance'" } + }.each do |flavor, conditions| + return flavor if connection.execute(conditions[:statement]).to_a.present? + rescue ActiveRecord::StatementInvalid => e + raise if conditions[:error] && !e.message.match?(conditions[:error]) + end + + nil + end + private def connection diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb index 6ffe14249f0..91c3fcc7d72 100644 --- a/lib/gitlab/database/reindexing.rb +++ b/lib/gitlab/database/reindexing.rb @@ -76,20 +76,7 @@ module Gitlab def self.cleanup_leftovers! PostgresIndex.reindexing_leftovers.each do |index| - Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity") - - retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( - connection: index.connection, - timing_configuration: REMOVE_INDEX_RETRY_CONFIG, - klass: self.class, - logger: Gitlab::AppLogger - ) - - retries.run(raise_on_exhaustion: false) do - index.connection.tap do |conn| - conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}") - end - end + Coordinator.new(index).drop end end end diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb index 3e4a83aa2e7..b4f7da999df 100644 --- a/lib/gitlab/database/reindexing/coordinator.rb +++ b/lib/gitlab/database/reindexing/coordinator.rb @@ -31,6 +31,25 @@ module Gitlab end end + def drop + try_obtain_lease do + Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity") + + retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( + connection: index.connection, + timing_configuration: REMOVE_INDEX_RETRY_CONFIG, + klass: self.class, + logger: Gitlab::AppLogger + ) + + retries.run(raise_on_exhaustion: false) do + index.connection.tap do |conn| + conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}") + end + end + end + end + private def with_notifications(action) |