diff options
Diffstat (limited to 'lib/gitlab/database')
34 files changed, 603 insertions, 211 deletions
diff --git a/lib/gitlab/database/async_indexes/index_creator.rb b/lib/gitlab/database/async_indexes/index_creator.rb index 994a1deba57..2fb4cc8f675 100644 --- a/lib/gitlab/database/async_indexes/index_creator.rb +++ b/lib/gitlab/database/async_indexes/index_creator.rb @@ -47,6 +47,10 @@ module Gitlab TIMEOUT_PER_ACTION end + def lease_key + [super, async_index.connection_db_config.name].join('/') + end + def set_statement_timeout connection.execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT) yield diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb index 32765cb6a56..503172dd750 100644 --- a/lib/gitlab/database/background_migration/batched_job.rb +++ b/lib/gitlab/database/background_migration/batched_job.rb @@ -21,6 +21,7 @@ module Gitlab from_union([failed_jobs, self.stuck]) } + scope :except_succeeded, -> { where(status: self.statuses.except(:succeeded).values) } enum status: { pending: 0, diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb index d9fc2ea48f6..2844cbe4a74 100644 --- a/lib/gitlab/database/background_migration/batched_migration.rb +++ b/lib/gitlab/database/background_migration/batched_migration.rb @@ -18,6 +18,8 @@ module Gitlab scope: [:job_class_name, :table_name, :column_name] } + validate :validate_batched_jobs_status, if: -> { status_changed? && finished? } + scope :queue_order, -> { order(id: :asc) } scope :queued, -> { where(status: [:active, :paused]) } scope :for_configuration, ->(job_class_name, table_name, column_name, job_arguments) do @@ -92,11 +94,11 @@ module Gitlab end def job_class_name=(class_name) - write_attribute(:job_class_name, class_name.demodulize) + write_attribute(:job_class_name, class_name.delete_prefix("::")) end def batch_class_name=(class_name) - write_attribute(:batch_class_name, class_name.demodulize) + write_attribute(:batch_class_name, class_name.delete_prefix("::")) end def migrated_tuple_count @@ -133,6 +135,12 @@ module Gitlab def optimize! BatchOptimizer.new(self).optimize! end + + private + + def validate_batched_jobs_status + errors.add(:batched_jobs, 'jobs need to be succeeded') if batched_jobs.except_succeeded.exists? + end end end end diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb index 870cf25984b..68a0c15480a 100644 --- a/lib/gitlab/database/count/reltuples_count_strategy.rb +++ b/lib/gitlab/database/count/reltuples_count_strategy.rb @@ -32,12 +32,12 @@ module Gitlab # Models using single-type inheritance (STI) don't work with # reltuple count estimates. We just have to ignore them and # use another strategy to compute them. - def non_sti_models + def non_sti_models(models) models.reject { |model| sti_model?(model) } end - def non_sti_table_names - non_sti_models.map(&:table_name) + def non_sti_table_names(models) + non_sti_models(models).map(&:table_name) end def sti_model?(model) @@ -45,21 +45,34 @@ module Gitlab model.base_class != model end - def table_names - models.map(&:table_name) + def table_to_model_mapping + @table_to_model_mapping ||= models.each_with_object({}) { |model, h| h[model.table_name] = model } + end + + def table_to_model(table_name) + table_to_model_mapping[table_name] end def size_estimates(check_statistics: true) - table_to_model = models.each_with_object({}) { |model, h| h[model.table_name] = model } - - # Querying tuple stats only works on the primary. Due to load balancing, the - # easiest way to do this is to start a transaction. - ActiveRecord::Base.transaction do # rubocop: disable Database/MultipleDatabases - get_statistics(non_sti_table_names, check_statistics: check_statistics).each_with_object({}) do |row, data| - model = table_to_model[row.table_name] - data[model] = row.estimate + results = {} + + models.group_by { |model| model.connection_db_config.name }.map do |db_name, models_for_db| + base_model = Gitlab::Database.database_base_models[db_name] + tables = non_sti_table_names(models_for_db) + + # Querying tuple stats only works on the primary. Due to load balancing, the + # easiest way to do this is to start a transaction. + base_model.transaction do + Gitlab::Database::SharedModel.using_connection(base_model.connection) do + get_statistics(tables, check_statistics: check_statistics).each do |row| + model = table_to_model(row.table_name) + results[model] = row.estimate + end + end end end + + results end # Generates the PostgreSQL query to return the tuples for tables diff --git a/lib/gitlab/database/count/tablesample_count_strategy.rb b/lib/gitlab/database/count/tablesample_count_strategy.rb index 489bc0aacea..92c8de9aeac 100644 --- a/lib/gitlab/database/count/tablesample_count_strategy.rb +++ b/lib/gitlab/database/count/tablesample_count_strategy.rb @@ -61,7 +61,7 @@ module Gitlab #{where_clause(model)} SQL - rows = ActiveRecord::Base.connection.select_all(query) # rubocop: disable Database/MultipleDatabases + rows = model.connection.select_all(query) Integer(rows.first['count']) end diff --git a/lib/gitlab/database/gitlab_loose_foreign_keys.yml b/lib/gitlab/database/gitlab_loose_foreign_keys.yml new file mode 100644 index 00000000000..0343c054f23 --- /dev/null +++ b/lib/gitlab/database/gitlab_loose_foreign_keys.yml @@ -0,0 +1,69 @@ +ci_pipeline_chat_data: + - table: chat_names + column: chat_name_id + on_delete: async_delete +dast_scanner_profiles_builds: + - table: ci_builds + column: ci_build_id + on_delete: async_delete +dast_scanner_profiles_builds: + - table: ci_builds + column: ci_build_id + on_delete: async_delete +dast_profiles_pipelines: + - table: ci_pipelines + column: ci_pipeline_id + on_delete: async_delete +clusters_applications_runners: + - table: ci_runners + column: runner_id + on_delete: async_nullify +ci_namespace_mirrors: + - table: namespaces + column: namespace_id + on_delete: async_delete +ci_builds: + - table: users + column: user_id + on_delete: async_nullify +ci_pipelines: + - table: merge_requests + column: merge_request_id + on_delete: async_delete + - table: external_pull_requests + column: external_pull_request_id + on_delete: async_nullify + - table: users + column: user_id + on_delete: async_nullify +ci_project_mirrors: + - table: projects + column: project_id + on_delete: async_delete + - table: namespaces + column: namespace_id + on_delete: async_delete +packages_build_infos: + - table: ci_pipelines + column: pipeline_id + on_delete: async_nullify +packages_package_file_build_infos: + - table: ci_pipelines + column: pipeline_id + on_delete: async_nullify +pages_deployments: + - table: ci_builds + column: ci_build_id + on_delete: async_nullify +terraform_state_versions: + - table: ci_builds + column: ci_build_id + on_delete: async_nullify +merge_request_metrics: + - table: ci_pipelines + column: pipeline_id + on_delete: async_delete +project_pages_metadata: + - table: ci_job_artifacts + column: artifacts_archive_id + on_delete: async_nullify diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml index 66157e998a0..24c2d634780 100644 --- a/lib/gitlab/database/gitlab_schemas.yml +++ b/lib/gitlab/database/gitlab_schemas.yml @@ -1,4 +1,5 @@ abuse_reports: :gitlab_main +agent_activity_events: :gitlab_main agent_group_authorizations: :gitlab_main agent_project_authorizations: :gitlab_main alert_management_alert_assignees: :gitlab_main @@ -85,6 +86,7 @@ ci_job_token_project_scope_links: :gitlab_ci ci_job_variables: :gitlab_ci ci_minutes_additional_packs: :gitlab_ci ci_namespace_monthly_usages: :gitlab_ci +ci_namespace_mirrors: :gitlab_ci ci_pending_builds: :gitlab_ci ci_pipeline_artifacts: :gitlab_ci ci_pipeline_chat_data: :gitlab_ci @@ -96,6 +98,7 @@ ci_pipelines: :gitlab_ci ci_pipeline_variables: :gitlab_ci ci_platform_metrics: :gitlab_ci ci_project_monthly_usages: :gitlab_ci +ci_project_mirrors: :gitlab_ci ci_refs: :gitlab_ci ci_resource_groups: :gitlab_ci ci_resources: :gitlab_ci @@ -161,6 +164,7 @@ dependency_proxy_group_settings: :gitlab_main dependency_proxy_image_ttl_group_policies: :gitlab_main dependency_proxy_manifests: :gitlab_main deploy_keys_projects: :gitlab_main +deployment_approvals: :gitlab_main deployment_clusters: :gitlab_main deployment_merge_requests: :gitlab_main deployments: :gitlab_main @@ -249,6 +253,7 @@ incident_management_oncall_schedules: :gitlab_main incident_management_oncall_shifts: :gitlab_main incident_management_pending_alert_escalations: :gitlab_main incident_management_pending_issue_escalations: :gitlab_main +incident_management_timeline_events: :gitlab_main index_statuses: :gitlab_main in_product_marketing_emails: :gitlab_main insights: :gitlab_main @@ -260,6 +265,7 @@ issuable_severities: :gitlab_main issuable_slas: :gitlab_main issue_assignees: :gitlab_main issue_customer_relations_contacts: :gitlab_main +issue_emails: :gitlab_main issue_email_participants: :gitlab_main issue_links: :gitlab_main issue_metrics: :gitlab_main @@ -281,6 +287,7 @@ ldap_group_links: :gitlab_main lfs_file_locks: :gitlab_main lfs_objects: :gitlab_main lfs_objects_projects: :gitlab_main +lfs_object_states: :gitlab_main licenses: :gitlab_main lists: :gitlab_main list_user_preferences: :gitlab_main @@ -290,6 +297,7 @@ members: :gitlab_main merge_request_assignees: :gitlab_main merge_request_blocks: :gitlab_main merge_request_cleanup_schedules: :gitlab_main +merge_requests_compliance_violations: :gitlab_main merge_request_context_commit_diff_files: :gitlab_main merge_request_context_commits: :gitlab_main merge_request_diff_commits: :gitlab_main @@ -314,6 +322,7 @@ namespace_package_settings: :gitlab_main namespace_root_storage_statistics: :gitlab_main namespace_settings: :gitlab_main namespaces: :gitlab_main +namespaces_sync_events: :gitlab_main namespace_statistics: :gitlab_main note_diff_files: :gitlab_main notes: :gitlab_main @@ -363,6 +372,7 @@ packages_pypi_metadata: :gitlab_main packages_rubygems_metadata: :gitlab_main packages_tags: :gitlab_main pages_deployments: :gitlab_main +pages_deployment_states: :gitlab_main pages_domain_acme_orders: :gitlab_main pages_domains: :gitlab_main partitioned_foreign_keys: :gitlab_main @@ -408,6 +418,7 @@ project_repository_storage_moves: :gitlab_main project_security_settings: :gitlab_main project_settings: :gitlab_main projects: :gitlab_main +projects_sync_events: :gitlab_main project_statistics: :gitlab_main project_topics: :gitlab_main project_tracing_settings: :gitlab_main @@ -485,6 +496,7 @@ trending_projects: :gitlab_main u2f_registrations: :gitlab_main upcoming_reconciliations: :gitlab_main uploads: :gitlab_main +upload_states: :gitlab_main user_agent_details: :gitlab_main user_callouts: :gitlab_main user_canonical_emails: :gitlab_main @@ -526,6 +538,7 @@ vulnerability_issue_links: :gitlab_main vulnerability_occurrence_identifiers: :gitlab_main vulnerability_occurrence_pipelines: :gitlab_main vulnerability_occurrences: :gitlab_main +vulnerability_reads: :gitlab_main vulnerability_remediations: :gitlab_main vulnerability_scanners: :gitlab_main vulnerability_statistics: :gitlab_main diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb index 52eb0764ae3..e16db5af8ce 100644 --- a/lib/gitlab/database/load_balancing.rb +++ b/lib/gitlab/database/load_balancing.rb @@ -30,6 +30,10 @@ module Gitlab end end + def self.primary_only? + each_load_balancer.all?(&:primary_only?) + end + def self.release_hosts each_load_balancer(&:release_host) end diff --git a/lib/gitlab/database/load_balancing/configuration.rb b/lib/gitlab/database/load_balancing/configuration.rb index da313361073..e769cb5c35c 100644 --- a/lib/gitlab/database/load_balancing/configuration.rb +++ b/lib/gitlab/database/load_balancing/configuration.rb @@ -107,7 +107,11 @@ module Gitlab hosts.any? || service_discovery_enabled? end + # This is disabled for Rake tasks to ensure e.g. database migrations + # always produce consistent results. def service_discovery_enabled? + return false if Gitlab::Runtime.rake? + service_discovery[:record].present? end diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb index b9acc36b4cc..5d91292b8de 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb @@ -6,6 +6,8 @@ module Gitlab class SidekiqServerMiddleware JobReplicaNotUpToDate = Class.new(StandardError) + MINIMUM_DELAY_INTERVAL_SECONDS = 0.8 + def call(worker, job, _queue) worker_class = worker.class strategy = select_load_balancing_strategy(worker_class, job) @@ -42,11 +44,15 @@ module Gitlab wal_locations = get_wal_locations(job) - return :primary_no_wal unless wal_locations + return :primary_no_wal if wal_locations.blank? + + # Happy case: we can read from a replica. + return replica_strategy(worker_class, job) if databases_in_sync?(wal_locations) + + sleep_if_needed(job) if databases_in_sync?(wal_locations) - # Happy case: we can read from a replica. - retried_before?(worker_class, job) ? :replica_retried : :replica + replica_strategy(worker_class, job) elsif can_retry?(worker_class, job) # Optimistic case: The worker allows retries and we have retries left. :retry @@ -56,17 +62,14 @@ module Gitlab end end - def get_wal_locations(job) - job['dedup_wal_locations'] || job['wal_locations'] || legacy_wal_location(job) - end + def sleep_if_needed(job) + remaining_delay = MINIMUM_DELAY_INTERVAL_SECONDS - (Time.current.to_f - job['created_at'].to_f) - # Already scheduled jobs could still contain legacy database write location. - # TODO: remove this in the next iteration - # https://gitlab.com/gitlab-org/gitlab/-/issues/338213 - def legacy_wal_location(job) - wal_location = job['database_write_location'] || job['database_replica_location'] + sleep remaining_delay if remaining_delay > 0 && remaining_delay < MINIMUM_DELAY_INTERVAL_SECONDS + end - { ::Gitlab::Database::MAIN_DATABASE_NAME.to_sym => wal_location } if wal_location + def get_wal_locations(job) + job['dedup_wal_locations'] || job['wal_locations'] end def load_balancing_available?(worker_class) @@ -79,6 +82,10 @@ module Gitlab worker_class.get_data_consistency == :delayed && not_yet_retried?(job) end + def replica_strategy(worker_class, job) + retried_before?(worker_class, job) ? :replica_retried : :replica + end + def retried_before?(worker_class, job) worker_class.get_data_consistency == :delayed && !not_yet_retried?(job) end diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb index 834e9c6d3c6..8e5dc98e96e 100644 --- a/lib/gitlab/database/load_balancing/sticking.rb +++ b/lib/gitlab/database/load_balancing/sticking.rb @@ -123,21 +123,18 @@ module Gitlab def unstick(namespace, id) Gitlab::Redis::SharedState.with do |redis| redis.del(redis_key_for(namespace, id)) - redis.del(old_redis_key_for(namespace, id)) end end def set_write_location_for(namespace, id, location) Gitlab::Redis::SharedState.with do |redis| redis.set(redis_key_for(namespace, id), location, ex: EXPIRATION) - redis.set(old_redis_key_for(namespace, id), location, ex: EXPIRATION) end end def last_write_location_for(namespace, id) Gitlab::Redis::SharedState.with do |redis| - redis.get(redis_key_for(namespace, id)) || - redis.get(old_redis_key_for(namespace, id)) + redis.get(redis_key_for(namespace, id)) end end @@ -146,10 +143,6 @@ module Gitlab "database-load-balancing/write-location/#{name}/#{namespace}/#{id}" end - - def old_redis_key_for(namespace, id) - "database-load-balancing/write-location/#{namespace}/#{id}" - end end end end diff --git a/lib/gitlab/database/loose_foreign_keys.rb b/lib/gitlab/database/loose_foreign_keys.rb new file mode 100644 index 00000000000..1ecfb5ce47f --- /dev/null +++ b/lib/gitlab/database/loose_foreign_keys.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LooseForeignKeys + def self.definitions_by_table + @definitions_by_table ||= definitions.group_by(&:to_table).with_indifferent_access.freeze + end + + def self.definitions + @definitions ||= loose_foreign_keys_yaml.flat_map do |child_table_name, configs| + configs.map { |config| build_definition(child_table_name, config) } + end.freeze + end + + def self.build_definition(child_table_name, config) + parent_table_name = config.fetch('table') + + ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( + child_table_name, + parent_table_name, + { + column: config.fetch('column'), + on_delete: config.fetch('on_delete').to_sym, + gitlab_schema: GitlabSchema.table_schema(child_table_name) + } + ) + end + + def self.loose_foreign_keys_yaml + @loose_foreign_keys_yaml ||= YAML.load_file(Rails.root.join('lib/gitlab/database/gitlab_loose_foreign_keys.yml')) + end + + private_class_method :build_definition + private_class_method :loose_foreign_keys_yaml + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 7dce4fa0ce2..4245dd80714 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -4,6 +4,7 @@ module Gitlab module Database module MigrationHelpers include Migrations::BackgroundMigrationHelpers + include Migrations::BatchedBackgroundMigrationHelpers include DynamicModelHelpers include RenameTableHelpers include AsyncIndexes::MigrationHelpers diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb index bdaf0d35a83..8c33c41ce77 100644 --- a/lib/gitlab/database/migrations/background_migration_helpers.rb +++ b/lib/gitlab/database/migrations/background_migration_helpers.rb @@ -5,59 +5,7 @@ module Gitlab module Migrations module BackgroundMigrationHelpers BATCH_SIZE = 1_000 # Number of rows to process per job - SUB_BATCH_SIZE = 100 # Number of rows to process per sub-batch JOB_BUFFER_SIZE = 1_000 # Number of jobs to bulk queue at a time - BATCH_CLASS_NAME = 'PrimaryKeyBatchingStrategy' # Default batch class for batched migrations - BATCH_MIN_VALUE = 1 # Default minimum value for batched migrations - BATCH_MIN_DELAY = 2.minutes.freeze # Minimum delay between batched migrations - - # Bulk queues background migration jobs for an entire table, batched by ID range. - # "Bulk" meaning many jobs will be pushed at a time for efficiency. - # If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`. - # - # model_class - The table being iterated over - # job_class_name - The background migration job class as a string - # batch_size - The maximum number of rows per job - # - # Example: - # - # class Route < ActiveRecord::Base - # include EachBatch - # self.table_name = 'routes' - # end - # - # bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes') - # - # Where the model_class includes EachBatch, and the background migration exists: - # - # class Gitlab::BackgroundMigration::ProcessRoutes - # def perform(start_id, end_id) - # # do something - # end - # end - def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BATCH_SIZE) - raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') - - jobs = [] - table_name = model_class.quoted_table_name - - model_class.each_batch(of: batch_size) do |relation| - start_id, end_id = relation.pluck("MIN(#{table_name}.id)", "MAX(#{table_name}.id)").first - - if jobs.length >= JOB_BUFFER_SIZE - # Note: This code path generally only helps with many millions of rows - # We push multiple jobs at a time to reduce the time spent in - # Sidekiq/Redis operations. We're using this buffer based approach so we - # don't need to run additional queries for every range. - bulk_migrate_async(jobs) - jobs.clear - end - - jobs << [job_class_name, [start_id, end_id]] - end - - bulk_migrate_async(jobs) unless jobs.empty? - end # Queues background migration jobs for an entire table in batches. # The default batching column used is the standard primary key `id`. @@ -137,6 +85,7 @@ module Gitlab # Requeue pending jobs previously queued with #queue_background_migration_jobs_by_range_at_intervals # # This method is useful to schedule jobs that had previously failed. + # It can only be used if the previous background migration used job tracking like the queue_background_migration_jobs_by_range_at_intervals helper. # # job_class_name - The background migration job class as a string # delay_interval - The duration between each job's scheduled time @@ -170,100 +119,6 @@ module Gitlab duration end - # Creates a batched background migration for the given table. A batched migration runs one job - # at a time, computing the bounds of the next batch based on the current migration settings and the previous - # batch bounds. Each job's execution status is tracked in the database as the migration runs. The given job - # class must be present in the Gitlab::BackgroundMigration module, and the batch class (if specified) must be - # present in the Gitlab::BackgroundMigration::BatchingStrategies module. - # - # If migration with same job_class_name, table_name, column_name, and job_aruments already exists, this helper - # will log an warning and not create a new one. - # - # job_class_name - The background migration job class as a string - # batch_table_name - The name of the table the migration will batch over - # batch_column_name - The name of the column the migration will batch over - # job_arguments - Extra arguments to pass to the job instance when the migration runs - # job_interval - The pause interval between each job's execution, minimum of 2 minutes - # batch_min_value - The value in the column the batching will begin at - # batch_max_value - The value in the column the batching will end at, defaults to `SELECT MAX(batch_column)` - # batch_class_name - The name of the class that will be called to find the range of each next batch - # batch_size - The maximum number of rows per job - # sub_batch_size - The maximum number of rows processed per "iteration" within the job - # - # - # *Returns the created BatchedMigration record* - # - # Example: - # - # queue_batched_background_migration( - # 'CopyColumnUsingBackgroundMigrationJob', - # :events, - # :id, - # job_interval: 2.minutes, - # other_job_arguments: ['column1', 'column2']) - # - # Where the the background migration exists: - # - # class Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob - # def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, *other_args) - # # do something - # end - # end - def queue_batched_background_migration( # rubocop:disable Metrics/ParameterLists - job_class_name, - batch_table_name, - batch_column_name, - *job_arguments, - job_interval:, - batch_min_value: BATCH_MIN_VALUE, - batch_max_value: nil, - batch_class_name: BATCH_CLASS_NAME, - batch_size: BATCH_SIZE, - sub_batch_size: SUB_BATCH_SIZE - ) - - if Gitlab::Database::BackgroundMigration::BatchedMigration.for_configuration(job_class_name, batch_table_name, batch_column_name, job_arguments).exists? - Gitlab::AppLogger.warn "Batched background migration not enqueued because it already exists: " \ - "job_class_name: #{job_class_name}, table_name: #{batch_table_name}, column_name: #{batch_column_name}, " \ - "job_arguments: #{job_arguments.inspect}" - return - end - - job_interval = BATCH_MIN_DELAY if job_interval < BATCH_MIN_DELAY - - batch_max_value ||= connection.select_value(<<~SQL) - SELECT MAX(#{connection.quote_column_name(batch_column_name)}) - FROM #{connection.quote_table_name(batch_table_name)} - SQL - - migration_status = batch_max_value.nil? ? :finished : :active - batch_max_value ||= batch_min_value - - migration = Gitlab::Database::BackgroundMigration::BatchedMigration.create!( - job_class_name: job_class_name, - table_name: batch_table_name, - column_name: batch_column_name, - job_arguments: job_arguments, - interval: job_interval, - min_value: batch_min_value, - max_value: batch_max_value, - batch_class_name: batch_class_name, - batch_size: batch_size, - sub_batch_size: sub_batch_size, - status: migration_status) - - # This guard is necessary since #total_tuple_count was only introduced schema-wise, - # after this migration helper had been used for the first time. - return migration unless migration.respond_to?(:total_tuple_count) - - # We keep track of the estimated number of tuples to reason later - # about the overall progress of a migration. - migration.total_tuple_count = Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate - migration.save! - - migration - end - # Force a background migration to complete. # # WARNING: This method will block the caller and move the background migration from an @@ -275,6 +130,7 @@ module Gitlab # 4. Optionally remove job tracking information. # # This method does not garauntee that all jobs completed successfully. + # It can only be used if the previous background migration used the queue_background_migration_jobs_by_range_at_intervals helper. def finalize_background_migration(class_name, delete_tracking_jobs: ['succeeded']) # Empty the sidekiq queue. Gitlab::BackgroundMigration.steal(class_name) diff --git a/lib/gitlab/database/migrations/batched_background_migration_helpers.rb b/lib/gitlab/database/migrations/batched_background_migration_helpers.rb new file mode 100644 index 00000000000..dcaf7fad05f --- /dev/null +++ b/lib/gitlab/database/migrations/batched_background_migration_helpers.rb @@ -0,0 +1,118 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + # BatchedBackgroundMigrations are a new approach to scheduling and executing background migrations, which uses + # persistent state in the database to track each migration. This avoids having to batch over an entire table and + # schedule a large number of sidekiq jobs upfront. It also provides for more flexibility as the migration runs, + # as it can be paused and restarted, and have configuration values like the batch size updated dynamically as the + # migration runs. + # + # For now, these migrations are not considered ready for general use, for more information see the tracking epic: + # https://gitlab.com/groups/gitlab-org/-/epics/6751 + module BatchedBackgroundMigrationHelpers + BATCH_SIZE = 1_000 # Number of rows to process per job + SUB_BATCH_SIZE = 100 # Number of rows to process per sub-batch + BATCH_CLASS_NAME = 'PrimaryKeyBatchingStrategy' # Default batch class for batched migrations + BATCH_MIN_VALUE = 1 # Default minimum value for batched migrations + BATCH_MIN_DELAY = 2.minutes.freeze # Minimum delay between batched migrations + + # Creates a batched background migration for the given table. A batched migration runs one job + # at a time, computing the bounds of the next batch based on the current migration settings and the previous + # batch bounds. Each job's execution status is tracked in the database as the migration runs. The given job + # class must be present in the Gitlab::BackgroundMigration module, and the batch class (if specified) must be + # present in the Gitlab::BackgroundMigration::BatchingStrategies module. + # + # If migration with same job_class_name, table_name, column_name, and job_aruments already exists, this helper + # will log an warning and not create a new one. + # + # job_class_name - The background migration job class as a string + # batch_table_name - The name of the table the migration will batch over + # batch_column_name - The name of the column the migration will batch over + # job_arguments - Extra arguments to pass to the job instance when the migration runs + # job_interval - The pause interval between each job's execution, minimum of 2 minutes + # batch_min_value - The value in the column the batching will begin at + # batch_max_value - The value in the column the batching will end at, defaults to `SELECT MAX(batch_column)` + # batch_class_name - The name of the class that will be called to find the range of each next batch + # batch_size - The maximum number of rows per job + # sub_batch_size - The maximum number of rows processed per "iteration" within the job + # + # *Returns the created BatchedMigration record* + # + # Example: + # + # queue_batched_background_migration( + # 'CopyColumnUsingBackgroundMigrationJob', + # :events, + # :id, + # job_interval: 2.minutes, + # other_job_arguments: ['column1', 'column2']) + # + # Where the the background migration exists: + # + # class Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob + # def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, *other_args) + # # do something + # end + # end + def queue_batched_background_migration( # rubocop:disable Metrics/ParameterLists + job_class_name, + batch_table_name, + batch_column_name, + *job_arguments, + job_interval:, + batch_min_value: BATCH_MIN_VALUE, + batch_max_value: nil, + batch_class_name: BATCH_CLASS_NAME, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + + if Gitlab::Database::BackgroundMigration::BatchedMigration.for_configuration(job_class_name, batch_table_name, batch_column_name, job_arguments).exists? + Gitlab::AppLogger.warn "Batched background migration not enqueued because it already exists: " \ + "job_class_name: #{job_class_name}, table_name: #{batch_table_name}, column_name: #{batch_column_name}, " \ + "job_arguments: #{job_arguments.inspect}" + return + end + + job_interval = BATCH_MIN_DELAY if job_interval < BATCH_MIN_DELAY + + batch_max_value ||= connection.select_value(<<~SQL) + SELECT MAX(#{connection.quote_column_name(batch_column_name)}) + FROM #{connection.quote_table_name(batch_table_name)} + SQL + + migration_status = batch_max_value.nil? ? :finished : :active + batch_max_value ||= batch_min_value + + migration = Gitlab::Database::BackgroundMigration::BatchedMigration.create!( + job_class_name: job_class_name, + table_name: batch_table_name, + column_name: batch_column_name, + job_arguments: job_arguments, + interval: job_interval, + min_value: batch_min_value, + max_value: batch_max_value, + batch_class_name: batch_class_name, + batch_size: batch_size, + sub_batch_size: sub_batch_size, + status: migration_status) + + # This guard is necessary since #total_tuple_count was only introduced schema-wise, + # after this migration helper had been used for the first time. + return migration unless migration.respond_to?(:total_tuple_count) + + # We keep track of the estimated number of tuples to reason later + # about the overall progress of a migration. + migration.total_tuple_count = Gitlab::Database::SharedModel.using_connection(connection) do + Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate + end + migration.save! + + migration + end + end + end + end +end diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb index 6e5ffb74411..1f7e81cae84 100644 --- a/lib/gitlab/database/migrations/instrumentation.rb +++ b/lib/gitlab/database/migrations/instrumentation.rb @@ -14,11 +14,11 @@ module Gitlab @result_dir = result_dir end - def observe(version:, name:, &block) + def observe(version:, name:, connection:, &block) observation = Observation.new(version, name) observation.success = true - observers = observer_classes.map { |c| c.new(observation, @result_dir) } + observers = observer_classes.map { |c| c.new(observation, @result_dir, connection) } exception = nil diff --git a/lib/gitlab/database/migrations/observers/migration_observer.rb b/lib/gitlab/database/migrations/observers/migration_observer.rb index 106f8f1f829..0006af73f6c 100644 --- a/lib/gitlab/database/migrations/observers/migration_observer.rb +++ b/lib/gitlab/database/migrations/observers/migration_observer.rb @@ -7,8 +7,8 @@ module Gitlab class MigrationObserver attr_reader :connection, :observation, :output_dir - def initialize(observation, output_dir) - @connection = ActiveRecord::Base.connection + def initialize(observation, output_dir, connection) + @connection = connection @observation = observation @output_dir = output_dir end diff --git a/lib/gitlab/database/migrations/runner.rb b/lib/gitlab/database/migrations/runner.rb index b267a64256b..f0bac594119 100644 --- a/lib/gitlab/database/migrations/runner.rb +++ b/lib/gitlab/database/migrations/runner.rb @@ -69,7 +69,7 @@ module Gitlab instrumentation = Instrumentation.new(result_dir: result_dir) sorted_migrations.each do |migration| - instrumentation.observe(version: migration.version, name: migration.name) do + instrumentation.observe(version: migration.version, name: migration.name, connection: ActiveRecord::Migration.connection) do ActiveRecord::Migrator.new(direction, migration_context.migrations, migration_context.schema_migration, migration.version).run end end diff --git a/lib/gitlab/database/partitioning/detached_partition_dropper.rb b/lib/gitlab/database/partitioning/detached_partition_dropper.rb index 593824384b5..5e32ecad4ca 100644 --- a/lib/gitlab/database/partitioning/detached_partition_dropper.rb +++ b/lib/gitlab/database/partitioning/detached_partition_dropper.rb @@ -4,8 +4,6 @@ module Gitlab module Partitioning class DetachedPartitionDropper def perform - return unless Feature.enabled?(:drop_detached_partitions, default_enabled: :yaml) - Gitlab::AppLogger.info(message: "Checking for previously detached partitions to drop") Postgresql::DetachedPartition.ready_to_drop.find_each do |detached_partition| diff --git a/lib/gitlab/database/partitioning/monthly_strategy.rb b/lib/gitlab/database/partitioning/monthly_strategy.rb index c93e775d7ed..9c8cccb3dc6 100644 --- a/lib/gitlab/database/partitioning/monthly_strategy.rb +++ b/lib/gitlab/database/partitioning/monthly_strategy.rb @@ -36,6 +36,10 @@ module Gitlab partitions end + def after_adding_partitions + # No-op, required by the partition manager + end + private def desired_partitions diff --git a/lib/gitlab/database/partitioning/partition_manager.rb b/lib/gitlab/database/partitioning/partition_manager.rb index 8742c0ff166..aa824dfbd2f 100644 --- a/lib/gitlab/database/partitioning/partition_manager.rb +++ b/lib/gitlab/database/partitioning/partition_manager.rb @@ -25,10 +25,8 @@ module Gitlab partitions_to_create = missing_partitions create(partitions_to_create) unless partitions_to_create.empty? - if Feature.enabled?(:partition_pruning, default_enabled: :yaml) - partitions_to_detach = extra_partitions - detach(partitions_to_detach) unless partitions_to_detach.empty? - end + partitions_to_detach = extra_partitions + detach(partitions_to_detach) unless partitions_to_detach.empty? end rescue StandardError => e Gitlab::AppLogger.error(message: "Failed to create / detach partition(s)", @@ -73,6 +71,8 @@ module Gitlab partition_name: partition.partition_name, table_name: partition.table) end + + model.partitioning_strategy.after_adding_partitions end end end diff --git a/lib/gitlab/database/partitioning/single_numeric_list_partition.rb b/lib/gitlab/database/partitioning/single_numeric_list_partition.rb new file mode 100644 index 00000000000..23ac73a0e53 --- /dev/null +++ b/lib/gitlab/database/partitioning/single_numeric_list_partition.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Partitioning + class SingleNumericListPartition + include Comparable + + def self.from_sql(table, partition_name, definition) + # A list partition can support multiple values, but we only support a single number + matches = definition.match(/FOR VALUES IN \('(?<value>\d+)'\)/) + + raise ArgumentError, 'Unknown partition definition' unless matches + + value = Integer(matches[:value]) + + new(table, value, partition_name: partition_name) + end + + attr_reader :table, :value + + def initialize(table, value, partition_name: nil ) + @table = table + @value = value + @partition_name = partition_name + end + + def partition_name + @partition_name || "#{table}_#{value}" + end + + def to_sql + <<~SQL + CREATE TABLE IF NOT EXISTS #{fully_qualified_partition} + PARTITION OF #{conn.quote_table_name(table)} + FOR VALUES IN (#{conn.quote(value)}) + SQL + end + + def to_detach_sql + <<~SQL + ALTER TABLE #{conn.quote_table_name(table)} + DETACH PARTITION #{fully_qualified_partition} + SQL + end + + def ==(other) + table == other.table && + partition_name == other.partition_name && + value == other.value + end + alias_method :eql?, :== + + def hash + [table, partition_name, value].hash + end + + def <=>(other) + return if table != other.table + + value <=> other.value + end + + private + + def fully_qualified_partition + "%s.%s" % [conn.quote_table_name(Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA), conn.quote_table_name(partition_name)] + end + + def conn + @conn ||= Gitlab::Database::SharedModel.connection + end + end + end + end +end diff --git a/lib/gitlab/database/partitioning/sliding_list_strategy.rb b/lib/gitlab/database/partitioning/sliding_list_strategy.rb new file mode 100644 index 00000000000..21b86b43ae7 --- /dev/null +++ b/lib/gitlab/database/partitioning/sliding_list_strategy.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Partitioning + class SlidingListStrategy + attr_reader :model, :partitioning_key, :next_partition_if, :detach_partition_if + + delegate :table_name, to: :model + + def initialize(model, partitioning_key, next_partition_if:, detach_partition_if:) + @model = model + @partitioning_key = partitioning_key + @next_partition_if = next_partition_if + @detach_partition_if = detach_partition_if + + ensure_partitioning_column_ignored! + end + + def current_partitions + Gitlab::Database::PostgresPartition.for_parent_table(table_name).map do |partition| + SingleNumericListPartition.from_sql(table_name, partition.name, partition.condition) + end.sort + end + + def missing_partitions + if no_partitions_exist? + [initial_partition] + elsif next_partition_if.call(active_partition.value) + [next_partition] + else + [] + end + end + + def initial_partition + SingleNumericListPartition.new(table_name, 1) + end + + def next_partition + SingleNumericListPartition.new(table_name, active_partition.value + 1) + end + + def extra_partitions + possibly_extra = current_partitions[0...-1] # Never consider the most recent partition + + possibly_extra.take_while { |p| detach_partition_if.call(p.value) } + end + + def after_adding_partitions + active_value = active_partition.value + model.connection.change_column_default(model.table_name, partitioning_key, active_value) + end + + def active_partition + # The current partitions list is sorted, so the last partition has the highest value + # This is the only partition that receives inserts. + current_partitions.last + end + + def no_partitions_exist? + current_partitions.empty? + end + + private + + def ensure_partitioning_column_ignored! + unless model.ignored_columns.include?(partitioning_key.to_s) + raise "Add #{partitioning_key} to #{model.name}.ignored_columns to use it with SlidingListStrategy" + end + end + end + end + end +end diff --git a/lib/gitlab/database/pg_class.rb b/lib/gitlab/database/pg_class.rb index 0ce9eebc14c..bd582d903c6 100644 --- a/lib/gitlab/database/pg_class.rb +++ b/lib/gitlab/database/pg_class.rb @@ -2,7 +2,7 @@ module Gitlab module Database - class PgClass < ActiveRecord::Base + class PgClass < SharedModel self.table_name = 'pg_class' def self.for_table(relname) diff --git a/lib/gitlab/database/postgres_hll/buckets.rb b/lib/gitlab/database/postgres_hll/buckets.rb index 429e823379f..76818bbf340 100644 --- a/lib/gitlab/database/postgres_hll/buckets.rb +++ b/lib/gitlab/database/postgres_hll/buckets.rb @@ -65,8 +65,7 @@ module Gitlab ).to_i if num_zero_buckets > 0 && num_uniques < 2.5 * TOTAL_BUCKETS - ((0.7213 / (1 + 1.079 / TOTAL_BUCKETS)) * (TOTAL_BUCKETS * - Math.log2(TOTAL_BUCKETS.to_f / num_zero_buckets))) + TOTAL_BUCKETS * Math.log(TOTAL_BUCKETS.to_f / num_zero_buckets) else num_uniques end diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb index 0f285688876..2736f9d18dc 100644 --- a/lib/gitlab/database/query_analyzer.rb +++ b/lib/gitlab/database/query_analyzer.rb @@ -58,17 +58,15 @@ module Gitlab return unless parsed analyzers.each do |analyzer| - next if analyzer.suppressed? + next if analyzer.suppressed? && !analyzer.requires_tracking?(parsed) analyzer.analyze(parsed) - rescue StandardError => e + rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e # We catch all standard errors to prevent validation errors to introduce fatal errors in production Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) end end - private - # Enable query analyzers def begin! analyzers = all_analyzers.select do |analyzer| @@ -77,7 +75,7 @@ module Gitlab true end - rescue StandardError => e + rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) false @@ -90,13 +88,15 @@ module Gitlab def end! enabled_analyzers.select do |analyzer| analyzer.end! - rescue StandardError => e + rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) end Thread.current[:query_analyzer_enabled_analyzers] = nil end + private + def enabled_analyzers Thread.current[:query_analyzer_enabled_analyzers] end diff --git a/lib/gitlab/database/query_analyzers/base.rb b/lib/gitlab/database/query_analyzers/base.rb index e8066f7a706..0802d3c8013 100644 --- a/lib/gitlab/database/query_analyzers/base.rb +++ b/lib/gitlab/database/query_analyzers/base.rb @@ -4,10 +4,17 @@ module Gitlab module Database module QueryAnalyzers class Base + # `Exception` to ensure that is not easily rescued when running in test env + QueryAnalyzerError = Class.new(Exception) # rubocop:disable Lint/InheritException + def self.suppressed? Thread.current[self.suppress_key] end + def self.requires_tracking?(parsed) + false + end + def self.suppress=(value) Thread.current[self.suppress_key] = value end diff --git a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb index 2233f3c4646..2e3db2a5c6e 100644 --- a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb +++ b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb @@ -4,7 +4,7 @@ module Gitlab module Database module QueryAnalyzers class PreventCrossDatabaseModification < Database::QueryAnalyzers::Base - CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError) + CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(QueryAnalyzerError) # This method will allow cross database modifications within the block # Example: @@ -36,29 +36,36 @@ module Gitlab Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml) end + def self.requires_tracking?(parsed) + # The transaction boundaries always needs to be tracked regardless of suppress behavior + self.transaction_begin?(parsed) || self.transaction_end?(parsed) + end + # rubocop:disable Metrics/AbcSize def self.analyze(parsed) - return if in_factory_bot_create? - database = ::Gitlab::Database.db_config_name(parsed.connection) sql = parsed.sql # We ignore BEGIN in tests as this is the outer transaction for # DatabaseCleaner - if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN')) + if self.transaction_begin?(parsed) context[:transaction_depth_by_db][database] += 1 return - elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') || (!Rails.env.test? && sql.start_with?('ROLLBACK', 'COMMIT')) + elsif self.transaction_end?(parsed) context[:transaction_depth_by_db][database] -= 1 - if context[:transaction_depth_by_db][database] <= 0 + if context[:transaction_depth_by_db][database] == 0 context[:modified_tables_by_db][database].clear + elsif context[:transaction_depth_by_db][database] < 0 + context[:transaction_depth_by_db][database] = 0 + raise CrossDatabaseModificationAcrossUnsupportedTablesError, "Misaligned cross-DB transactions discovered at query #{sql}. This could be a bug in #{self.class} or a valid issue to investigate. Read more at https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions ." end return end - return if context[:transaction_depth_by_db].values.all?(&:zero?) + return unless self.in_transaction? + return if in_factory_bot_create? # PgQuery might fail in some cases due to limited nesting: # https://github.com/pganalyze/pg_query/issues/209 @@ -97,6 +104,42 @@ module Gitlab end # rubocop:enable Metrics/AbcSize + def self.transaction_begin?(parsed) + # We ignore BEGIN or START in tests + unless Rails.env.test? + return true if transaction_stmt?(parsed, :TRANS_STMT_BEGIN) + return true if transaction_stmt?(parsed, :TRANS_STMT_START) + end + + # SAVEPOINT + return true if transaction_stmt?(parsed, :TRANS_STMT_SAVEPOINT) + + false + end + + def self.transaction_end?(parsed) + # We ignore ROLLBACK or COMMIT in tests + unless Rails.env.test? + return true if transaction_stmt?(parsed, :TRANS_STMT_COMMIT) + return true if transaction_stmt?(parsed, :TRANS_STMT_COMMIT_PREPARED) + return true if transaction_stmt?(parsed, :TRANS_STMT_ROLLBACK) + return true if transaction_stmt?(parsed, :TRANS_STMT_ROLLBACK_PREPARED) + end + + # RELEASE (SAVEPOINT) or ROLLBACK TO (SAVEPOINT) + return true if transaction_stmt?(parsed, :TRANS_STMT_RELEASE) + return true if transaction_stmt?(parsed, :TRANS_STMT_ROLLBACK_TO) + + false + end + + # Known kinds: https://github.com/pganalyze/pg_query/blob/f6588703deb9d7a94b87b34b7c3bab240087fbc4/ext/pg_query/include/nodes/parsenodes.h#L3050 + def self.transaction_stmt?(parsed, kind) + parsed.pg.tree.stmts.map(&:stmt).any? do |stmt| + stmt.node == :transaction_stmt && stmt.transaction_stmt.kind == kind + end + end + # We only raise in tests for now otherwise some features will be broken # in development. For now we've mostly only added allowlist based on # spec names. Until we have allowed all the violations inline we don't @@ -105,13 +148,21 @@ module Gitlab Rails.env.test? end + def self.in_transaction? + context[:transaction_depth_by_db].values.any?(&:positive?) + end + # We ignore execution in the #create method from FactoryBot # because it is not representative of real code we run in # production. There are far too many false positives caused # by instantiating objects in different `gitlab_schema` in a # FactoryBot `create`. def self.in_factory_bot_create? - Rails.env.test? && caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } + Rails.env.test? && caller_locations.any? do |l| + l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' || + l.path.end_with?('lib/factory_bot/strategy/create.rb') || + l.path.end_with?('shoulda/matchers/active_record/validate_uniqueness_of_matcher.rb') && l.label == 'create_existing_record' + end end end end diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb index 7a22e324bdb..6ffe14249f0 100644 --- a/lib/gitlab/database/reindexing.rb +++ b/lib/gitlab/database/reindexing.rb @@ -15,6 +15,26 @@ module Gitlab # on e.g. vacuum. REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30 + def self.enabled? + Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml) + end + + def self.invoke(database = nil) + Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name| + next if database && database.to_s != connection_name.to_s + + Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false) + + # Hack: Before we do actual reindexing work, create async indexes + Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops) + + automatic_reindexing + end + rescue StandardError => e + Gitlab::AppLogger.error(e) + raise + end + # Performs automatic reindexing for a limited number of indexes per call # 1. Consume from the explicit reindexing queue # 2. Apply bloat heuristic to find most bloated indexes and reindex those diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb index 13298f67ca9..3e4a83aa2e7 100644 --- a/lib/gitlab/database/reindexing/coordinator.rb +++ b/lib/gitlab/database/reindexing/coordinator.rb @@ -53,6 +53,10 @@ module Gitlab def lease_timeout TIMEOUT_PER_ACTION end + + def lease_key + [super, index.connection_db_config.name].join('/') + end end end end diff --git a/lib/gitlab/database/schema_cache_with_renamed_table.rb b/lib/gitlab/database/schema_cache_with_renamed_table.rb index 28123edd708..74900dc0d26 100644 --- a/lib/gitlab/database/schema_cache_with_renamed_table.rb +++ b/lib/gitlab/database/schema_cache_with_renamed_table.rb @@ -42,7 +42,7 @@ module Gitlab def renamed_tables_cache @renamed_tables ||= begin Gitlab::Database::TABLES_TO_BE_RENAMED.select do |old_name, new_name| - ActiveRecord::Base.connection.view_exists?(old_name) + connection.view_exists?(old_name) end end end diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb index 3d929c62933..9ddc5391689 100644 --- a/lib/gitlab/database/schema_helpers.rb +++ b/lib/gitlab/database/schema_helpers.rb @@ -25,6 +25,7 @@ module Gitlab CREATE TRIGGER #{name} #{fires} ON #{table_name} FOR EACH ROW + #{yield if block_given?} EXECUTE FUNCTION #{function_name}() SQL end diff --git a/lib/gitlab/database/shared_model.rb b/lib/gitlab/database/shared_model.rb index f31dbc01907..17d7886e8c8 100644 --- a/lib/gitlab/database/shared_model.rb +++ b/lib/gitlab/database/shared_model.rb @@ -39,6 +39,10 @@ module Gitlab Thread.current[:overriding_connection] = connection end end + + def connection_db_config + self.class.connection_db_config + end end end end diff --git a/lib/gitlab/database/type/json_pg_safe.rb b/lib/gitlab/database/type/json_pg_safe.rb new file mode 100644 index 00000000000..bbc207bd0d9 --- /dev/null +++ b/lib/gitlab/database/type/json_pg_safe.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Type + # Extends Rails' ActiveRecord::Type::Json data type to remove JSON + # encooded nullbytes `\u0000` to prevent PostgreSQL errors like + # `PG::UntranslatableCharacter: ERROR: unsupported Unicode escape + # sequence`. + # + # Example: + # + # class SomeModel < ApplicationRecord + # # some_model.a_field is of type `jsonb` + # attribute :a_field, Gitlab::Database::Type::JsonPgSafe.new + # end + class JsonPgSafe < ActiveRecord::Type::Json + def serialize(value) + super&.gsub('\u0000', '') + end + end + end + end +end |