Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/async_indexes/index_creator.rb4
-rw-r--r--lib/gitlab/database/background_migration/batched_job.rb1
-rw-r--r--lib/gitlab/database/background_migration/batched_migration.rb12
-rw-r--r--lib/gitlab/database/count/reltuples_count_strategy.rb39
-rw-r--r--lib/gitlab/database/count/tablesample_count_strategy.rb2
-rw-r--r--lib/gitlab/database/gitlab_loose_foreign_keys.yml69
-rw-r--r--lib/gitlab/database/gitlab_schemas.yml13
-rw-r--r--lib/gitlab/database/load_balancing.rb4
-rw-r--r--lib/gitlab/database/load_balancing/configuration.rb4
-rw-r--r--lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb31
-rw-r--r--lib/gitlab/database/load_balancing/sticking.rb9
-rw-r--r--lib/gitlab/database/loose_foreign_keys.rb38
-rw-r--r--lib/gitlab/database/migration_helpers.rb1
-rw-r--r--lib/gitlab/database/migrations/background_migration_helpers.rb148
-rw-r--r--lib/gitlab/database/migrations/batched_background_migration_helpers.rb118
-rw-r--r--lib/gitlab/database/migrations/instrumentation.rb4
-rw-r--r--lib/gitlab/database/migrations/observers/migration_observer.rb4
-rw-r--r--lib/gitlab/database/migrations/runner.rb2
-rw-r--r--lib/gitlab/database/partitioning/detached_partition_dropper.rb2
-rw-r--r--lib/gitlab/database/partitioning/monthly_strategy.rb4
-rw-r--r--lib/gitlab/database/partitioning/partition_manager.rb8
-rw-r--r--lib/gitlab/database/partitioning/single_numeric_list_partition.rb76
-rw-r--r--lib/gitlab/database/partitioning/sliding_list_strategy.rb75
-rw-r--r--lib/gitlab/database/pg_class.rb2
-rw-r--r--lib/gitlab/database/postgres_hll/buckets.rb3
-rw-r--r--lib/gitlab/database/query_analyzer.rb12
-rw-r--r--lib/gitlab/database/query_analyzers/base.rb7
-rw-r--r--lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb67
-rw-r--r--lib/gitlab/database/reindexing.rb20
-rw-r--r--lib/gitlab/database/reindexing/coordinator.rb4
-rw-r--r--lib/gitlab/database/schema_cache_with_renamed_table.rb2
-rw-r--r--lib/gitlab/database/schema_helpers.rb1
-rw-r--r--lib/gitlab/database/shared_model.rb4
-rw-r--r--lib/gitlab/database/type/json_pg_safe.rb24
34 files changed, 603 insertions, 211 deletions
diff --git a/lib/gitlab/database/async_indexes/index_creator.rb b/lib/gitlab/database/async_indexes/index_creator.rb
index 994a1deba57..2fb4cc8f675 100644
--- a/lib/gitlab/database/async_indexes/index_creator.rb
+++ b/lib/gitlab/database/async_indexes/index_creator.rb
@@ -47,6 +47,10 @@ module Gitlab
TIMEOUT_PER_ACTION
end
+ def lease_key
+ [super, async_index.connection_db_config.name].join('/')
+ end
+
def set_statement_timeout
connection.execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield
diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb
index 32765cb6a56..503172dd750 100644
--- a/lib/gitlab/database/background_migration/batched_job.rb
+++ b/lib/gitlab/database/background_migration/batched_job.rb
@@ -21,6 +21,7 @@ module Gitlab
from_union([failed_jobs, self.stuck])
}
+ scope :except_succeeded, -> { where(status: self.statuses.except(:succeeded).values) }
enum status: {
pending: 0,
diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb
index d9fc2ea48f6..2844cbe4a74 100644
--- a/lib/gitlab/database/background_migration/batched_migration.rb
+++ b/lib/gitlab/database/background_migration/batched_migration.rb
@@ -18,6 +18,8 @@ module Gitlab
scope: [:job_class_name, :table_name, :column_name]
}
+ validate :validate_batched_jobs_status, if: -> { status_changed? && finished? }
+
scope :queue_order, -> { order(id: :asc) }
scope :queued, -> { where(status: [:active, :paused]) }
scope :for_configuration, ->(job_class_name, table_name, column_name, job_arguments) do
@@ -92,11 +94,11 @@ module Gitlab
end
def job_class_name=(class_name)
- write_attribute(:job_class_name, class_name.demodulize)
+ write_attribute(:job_class_name, class_name.delete_prefix("::"))
end
def batch_class_name=(class_name)
- write_attribute(:batch_class_name, class_name.demodulize)
+ write_attribute(:batch_class_name, class_name.delete_prefix("::"))
end
def migrated_tuple_count
@@ -133,6 +135,12 @@ module Gitlab
def optimize!
BatchOptimizer.new(self).optimize!
end
+
+ private
+
+ def validate_batched_jobs_status
+ errors.add(:batched_jobs, 'jobs need to be succeeded') if batched_jobs.except_succeeded.exists?
+ end
end
end
end
diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb
index 870cf25984b..68a0c15480a 100644
--- a/lib/gitlab/database/count/reltuples_count_strategy.rb
+++ b/lib/gitlab/database/count/reltuples_count_strategy.rb
@@ -32,12 +32,12 @@ module Gitlab
# Models using single-type inheritance (STI) don't work with
# reltuple count estimates. We just have to ignore them and
# use another strategy to compute them.
- def non_sti_models
+ def non_sti_models(models)
models.reject { |model| sti_model?(model) }
end
- def non_sti_table_names
- non_sti_models.map(&:table_name)
+ def non_sti_table_names(models)
+ non_sti_models(models).map(&:table_name)
end
def sti_model?(model)
@@ -45,21 +45,34 @@ module Gitlab
model.base_class != model
end
- def table_names
- models.map(&:table_name)
+ def table_to_model_mapping
+ @table_to_model_mapping ||= models.each_with_object({}) { |model, h| h[model.table_name] = model }
+ end
+
+ def table_to_model(table_name)
+ table_to_model_mapping[table_name]
end
def size_estimates(check_statistics: true)
- table_to_model = models.each_with_object({}) { |model, h| h[model.table_name] = model }
-
- # Querying tuple stats only works on the primary. Due to load balancing, the
- # easiest way to do this is to start a transaction.
- ActiveRecord::Base.transaction do # rubocop: disable Database/MultipleDatabases
- get_statistics(non_sti_table_names, check_statistics: check_statistics).each_with_object({}) do |row, data|
- model = table_to_model[row.table_name]
- data[model] = row.estimate
+ results = {}
+
+ models.group_by { |model| model.connection_db_config.name }.map do |db_name, models_for_db|
+ base_model = Gitlab::Database.database_base_models[db_name]
+ tables = non_sti_table_names(models_for_db)
+
+ # Querying tuple stats only works on the primary. Due to load balancing, the
+ # easiest way to do this is to start a transaction.
+ base_model.transaction do
+ Gitlab::Database::SharedModel.using_connection(base_model.connection) do
+ get_statistics(tables, check_statistics: check_statistics).each do |row|
+ model = table_to_model(row.table_name)
+ results[model] = row.estimate
+ end
+ end
end
end
+
+ results
end
# Generates the PostgreSQL query to return the tuples for tables
diff --git a/lib/gitlab/database/count/tablesample_count_strategy.rb b/lib/gitlab/database/count/tablesample_count_strategy.rb
index 489bc0aacea..92c8de9aeac 100644
--- a/lib/gitlab/database/count/tablesample_count_strategy.rb
+++ b/lib/gitlab/database/count/tablesample_count_strategy.rb
@@ -61,7 +61,7 @@ module Gitlab
#{where_clause(model)}
SQL
- rows = ActiveRecord::Base.connection.select_all(query) # rubocop: disable Database/MultipleDatabases
+ rows = model.connection.select_all(query)
Integer(rows.first['count'])
end
diff --git a/lib/gitlab/database/gitlab_loose_foreign_keys.yml b/lib/gitlab/database/gitlab_loose_foreign_keys.yml
new file mode 100644
index 00000000000..0343c054f23
--- /dev/null
+++ b/lib/gitlab/database/gitlab_loose_foreign_keys.yml
@@ -0,0 +1,69 @@
+ci_pipeline_chat_data:
+ - table: chat_names
+ column: chat_name_id
+ on_delete: async_delete
+dast_scanner_profiles_builds:
+ - table: ci_builds
+ column: ci_build_id
+ on_delete: async_delete
+dast_scanner_profiles_builds:
+ - table: ci_builds
+ column: ci_build_id
+ on_delete: async_delete
+dast_profiles_pipelines:
+ - table: ci_pipelines
+ column: ci_pipeline_id
+ on_delete: async_delete
+clusters_applications_runners:
+ - table: ci_runners
+ column: runner_id
+ on_delete: async_nullify
+ci_namespace_mirrors:
+ - table: namespaces
+ column: namespace_id
+ on_delete: async_delete
+ci_builds:
+ - table: users
+ column: user_id
+ on_delete: async_nullify
+ci_pipelines:
+ - table: merge_requests
+ column: merge_request_id
+ on_delete: async_delete
+ - table: external_pull_requests
+ column: external_pull_request_id
+ on_delete: async_nullify
+ - table: users
+ column: user_id
+ on_delete: async_nullify
+ci_project_mirrors:
+ - table: projects
+ column: project_id
+ on_delete: async_delete
+ - table: namespaces
+ column: namespace_id
+ on_delete: async_delete
+packages_build_infos:
+ - table: ci_pipelines
+ column: pipeline_id
+ on_delete: async_nullify
+packages_package_file_build_infos:
+ - table: ci_pipelines
+ column: pipeline_id
+ on_delete: async_nullify
+pages_deployments:
+ - table: ci_builds
+ column: ci_build_id
+ on_delete: async_nullify
+terraform_state_versions:
+ - table: ci_builds
+ column: ci_build_id
+ on_delete: async_nullify
+merge_request_metrics:
+ - table: ci_pipelines
+ column: pipeline_id
+ on_delete: async_delete
+project_pages_metadata:
+ - table: ci_job_artifacts
+ column: artifacts_archive_id
+ on_delete: async_nullify
diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml
index 66157e998a0..24c2d634780 100644
--- a/lib/gitlab/database/gitlab_schemas.yml
+++ b/lib/gitlab/database/gitlab_schemas.yml
@@ -1,4 +1,5 @@
abuse_reports: :gitlab_main
+agent_activity_events: :gitlab_main
agent_group_authorizations: :gitlab_main
agent_project_authorizations: :gitlab_main
alert_management_alert_assignees: :gitlab_main
@@ -85,6 +86,7 @@ ci_job_token_project_scope_links: :gitlab_ci
ci_job_variables: :gitlab_ci
ci_minutes_additional_packs: :gitlab_ci
ci_namespace_monthly_usages: :gitlab_ci
+ci_namespace_mirrors: :gitlab_ci
ci_pending_builds: :gitlab_ci
ci_pipeline_artifacts: :gitlab_ci
ci_pipeline_chat_data: :gitlab_ci
@@ -96,6 +98,7 @@ ci_pipelines: :gitlab_ci
ci_pipeline_variables: :gitlab_ci
ci_platform_metrics: :gitlab_ci
ci_project_monthly_usages: :gitlab_ci
+ci_project_mirrors: :gitlab_ci
ci_refs: :gitlab_ci
ci_resource_groups: :gitlab_ci
ci_resources: :gitlab_ci
@@ -161,6 +164,7 @@ dependency_proxy_group_settings: :gitlab_main
dependency_proxy_image_ttl_group_policies: :gitlab_main
dependency_proxy_manifests: :gitlab_main
deploy_keys_projects: :gitlab_main
+deployment_approvals: :gitlab_main
deployment_clusters: :gitlab_main
deployment_merge_requests: :gitlab_main
deployments: :gitlab_main
@@ -249,6 +253,7 @@ incident_management_oncall_schedules: :gitlab_main
incident_management_oncall_shifts: :gitlab_main
incident_management_pending_alert_escalations: :gitlab_main
incident_management_pending_issue_escalations: :gitlab_main
+incident_management_timeline_events: :gitlab_main
index_statuses: :gitlab_main
in_product_marketing_emails: :gitlab_main
insights: :gitlab_main
@@ -260,6 +265,7 @@ issuable_severities: :gitlab_main
issuable_slas: :gitlab_main
issue_assignees: :gitlab_main
issue_customer_relations_contacts: :gitlab_main
+issue_emails: :gitlab_main
issue_email_participants: :gitlab_main
issue_links: :gitlab_main
issue_metrics: :gitlab_main
@@ -281,6 +287,7 @@ ldap_group_links: :gitlab_main
lfs_file_locks: :gitlab_main
lfs_objects: :gitlab_main
lfs_objects_projects: :gitlab_main
+lfs_object_states: :gitlab_main
licenses: :gitlab_main
lists: :gitlab_main
list_user_preferences: :gitlab_main
@@ -290,6 +297,7 @@ members: :gitlab_main
merge_request_assignees: :gitlab_main
merge_request_blocks: :gitlab_main
merge_request_cleanup_schedules: :gitlab_main
+merge_requests_compliance_violations: :gitlab_main
merge_request_context_commit_diff_files: :gitlab_main
merge_request_context_commits: :gitlab_main
merge_request_diff_commits: :gitlab_main
@@ -314,6 +322,7 @@ namespace_package_settings: :gitlab_main
namespace_root_storage_statistics: :gitlab_main
namespace_settings: :gitlab_main
namespaces: :gitlab_main
+namespaces_sync_events: :gitlab_main
namespace_statistics: :gitlab_main
note_diff_files: :gitlab_main
notes: :gitlab_main
@@ -363,6 +372,7 @@ packages_pypi_metadata: :gitlab_main
packages_rubygems_metadata: :gitlab_main
packages_tags: :gitlab_main
pages_deployments: :gitlab_main
+pages_deployment_states: :gitlab_main
pages_domain_acme_orders: :gitlab_main
pages_domains: :gitlab_main
partitioned_foreign_keys: :gitlab_main
@@ -408,6 +418,7 @@ project_repository_storage_moves: :gitlab_main
project_security_settings: :gitlab_main
project_settings: :gitlab_main
projects: :gitlab_main
+projects_sync_events: :gitlab_main
project_statistics: :gitlab_main
project_topics: :gitlab_main
project_tracing_settings: :gitlab_main
@@ -485,6 +496,7 @@ trending_projects: :gitlab_main
u2f_registrations: :gitlab_main
upcoming_reconciliations: :gitlab_main
uploads: :gitlab_main
+upload_states: :gitlab_main
user_agent_details: :gitlab_main
user_callouts: :gitlab_main
user_canonical_emails: :gitlab_main
@@ -526,6 +538,7 @@ vulnerability_issue_links: :gitlab_main
vulnerability_occurrence_identifiers: :gitlab_main
vulnerability_occurrence_pipelines: :gitlab_main
vulnerability_occurrences: :gitlab_main
+vulnerability_reads: :gitlab_main
vulnerability_remediations: :gitlab_main
vulnerability_scanners: :gitlab_main
vulnerability_statistics: :gitlab_main
diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb
index 52eb0764ae3..e16db5af8ce 100644
--- a/lib/gitlab/database/load_balancing.rb
+++ b/lib/gitlab/database/load_balancing.rb
@@ -30,6 +30,10 @@ module Gitlab
end
end
+ def self.primary_only?
+ each_load_balancer.all?(&:primary_only?)
+ end
+
def self.release_hosts
each_load_balancer(&:release_host)
end
diff --git a/lib/gitlab/database/load_balancing/configuration.rb b/lib/gitlab/database/load_balancing/configuration.rb
index da313361073..e769cb5c35c 100644
--- a/lib/gitlab/database/load_balancing/configuration.rb
+++ b/lib/gitlab/database/load_balancing/configuration.rb
@@ -107,7 +107,11 @@ module Gitlab
hosts.any? || service_discovery_enabled?
end
+ # This is disabled for Rake tasks to ensure e.g. database migrations
+ # always produce consistent results.
def service_discovery_enabled?
+ return false if Gitlab::Runtime.rake?
+
service_discovery[:record].present?
end
diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
index b9acc36b4cc..5d91292b8de 100644
--- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
+++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
@@ -6,6 +6,8 @@ module Gitlab
class SidekiqServerMiddleware
JobReplicaNotUpToDate = Class.new(StandardError)
+ MINIMUM_DELAY_INTERVAL_SECONDS = 0.8
+
def call(worker, job, _queue)
worker_class = worker.class
strategy = select_load_balancing_strategy(worker_class, job)
@@ -42,11 +44,15 @@ module Gitlab
wal_locations = get_wal_locations(job)
- return :primary_no_wal unless wal_locations
+ return :primary_no_wal if wal_locations.blank?
+
+ # Happy case: we can read from a replica.
+ return replica_strategy(worker_class, job) if databases_in_sync?(wal_locations)
+
+ sleep_if_needed(job)
if databases_in_sync?(wal_locations)
- # Happy case: we can read from a replica.
- retried_before?(worker_class, job) ? :replica_retried : :replica
+ replica_strategy(worker_class, job)
elsif can_retry?(worker_class, job)
# Optimistic case: The worker allows retries and we have retries left.
:retry
@@ -56,17 +62,14 @@ module Gitlab
end
end
- def get_wal_locations(job)
- job['dedup_wal_locations'] || job['wal_locations'] || legacy_wal_location(job)
- end
+ def sleep_if_needed(job)
+ remaining_delay = MINIMUM_DELAY_INTERVAL_SECONDS - (Time.current.to_f - job['created_at'].to_f)
- # Already scheduled jobs could still contain legacy database write location.
- # TODO: remove this in the next iteration
- # https://gitlab.com/gitlab-org/gitlab/-/issues/338213
- def legacy_wal_location(job)
- wal_location = job['database_write_location'] || job['database_replica_location']
+ sleep remaining_delay if remaining_delay > 0 && remaining_delay < MINIMUM_DELAY_INTERVAL_SECONDS
+ end
- { ::Gitlab::Database::MAIN_DATABASE_NAME.to_sym => wal_location } if wal_location
+ def get_wal_locations(job)
+ job['dedup_wal_locations'] || job['wal_locations']
end
def load_balancing_available?(worker_class)
@@ -79,6 +82,10 @@ module Gitlab
worker_class.get_data_consistency == :delayed && not_yet_retried?(job)
end
+ def replica_strategy(worker_class, job)
+ retried_before?(worker_class, job) ? :replica_retried : :replica
+ end
+
def retried_before?(worker_class, job)
worker_class.get_data_consistency == :delayed && !not_yet_retried?(job)
end
diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb
index 834e9c6d3c6..8e5dc98e96e 100644
--- a/lib/gitlab/database/load_balancing/sticking.rb
+++ b/lib/gitlab/database/load_balancing/sticking.rb
@@ -123,21 +123,18 @@ module Gitlab
def unstick(namespace, id)
Gitlab::Redis::SharedState.with do |redis|
redis.del(redis_key_for(namespace, id))
- redis.del(old_redis_key_for(namespace, id))
end
end
def set_write_location_for(namespace, id, location)
Gitlab::Redis::SharedState.with do |redis|
redis.set(redis_key_for(namespace, id), location, ex: EXPIRATION)
- redis.set(old_redis_key_for(namespace, id), location, ex: EXPIRATION)
end
end
def last_write_location_for(namespace, id)
Gitlab::Redis::SharedState.with do |redis|
- redis.get(redis_key_for(namespace, id)) ||
- redis.get(old_redis_key_for(namespace, id))
+ redis.get(redis_key_for(namespace, id))
end
end
@@ -146,10 +143,6 @@ module Gitlab
"database-load-balancing/write-location/#{name}/#{namespace}/#{id}"
end
-
- def old_redis_key_for(namespace, id)
- "database-load-balancing/write-location/#{namespace}/#{id}"
- end
end
end
end
diff --git a/lib/gitlab/database/loose_foreign_keys.rb b/lib/gitlab/database/loose_foreign_keys.rb
new file mode 100644
index 00000000000..1ecfb5ce47f
--- /dev/null
+++ b/lib/gitlab/database/loose_foreign_keys.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module LooseForeignKeys
+ def self.definitions_by_table
+ @definitions_by_table ||= definitions.group_by(&:to_table).with_indifferent_access.freeze
+ end
+
+ def self.definitions
+ @definitions ||= loose_foreign_keys_yaml.flat_map do |child_table_name, configs|
+ configs.map { |config| build_definition(child_table_name, config) }
+ end.freeze
+ end
+
+ def self.build_definition(child_table_name, config)
+ parent_table_name = config.fetch('table')
+
+ ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new(
+ child_table_name,
+ parent_table_name,
+ {
+ column: config.fetch('column'),
+ on_delete: config.fetch('on_delete').to_sym,
+ gitlab_schema: GitlabSchema.table_schema(child_table_name)
+ }
+ )
+ end
+
+ def self.loose_foreign_keys_yaml
+ @loose_foreign_keys_yaml ||= YAML.load_file(Rails.root.join('lib/gitlab/database/gitlab_loose_foreign_keys.yml'))
+ end
+
+ private_class_method :build_definition
+ private_class_method :loose_foreign_keys_yaml
+ end
+ end
+end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 7dce4fa0ce2..4245dd80714 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -4,6 +4,7 @@ module Gitlab
module Database
module MigrationHelpers
include Migrations::BackgroundMigrationHelpers
+ include Migrations::BatchedBackgroundMigrationHelpers
include DynamicModelHelpers
include RenameTableHelpers
include AsyncIndexes::MigrationHelpers
diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb
index bdaf0d35a83..8c33c41ce77 100644
--- a/lib/gitlab/database/migrations/background_migration_helpers.rb
+++ b/lib/gitlab/database/migrations/background_migration_helpers.rb
@@ -5,59 +5,7 @@ module Gitlab
module Migrations
module BackgroundMigrationHelpers
BATCH_SIZE = 1_000 # Number of rows to process per job
- SUB_BATCH_SIZE = 100 # Number of rows to process per sub-batch
JOB_BUFFER_SIZE = 1_000 # Number of jobs to bulk queue at a time
- BATCH_CLASS_NAME = 'PrimaryKeyBatchingStrategy' # Default batch class for batched migrations
- BATCH_MIN_VALUE = 1 # Default minimum value for batched migrations
- BATCH_MIN_DELAY = 2.minutes.freeze # Minimum delay between batched migrations
-
- # Bulk queues background migration jobs for an entire table, batched by ID range.
- # "Bulk" meaning many jobs will be pushed at a time for efficiency.
- # If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
- #
- # model_class - The table being iterated over
- # job_class_name - The background migration job class as a string
- # batch_size - The maximum number of rows per job
- #
- # Example:
- #
- # class Route < ActiveRecord::Base
- # include EachBatch
- # self.table_name = 'routes'
- # end
- #
- # bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
- #
- # Where the model_class includes EachBatch, and the background migration exists:
- #
- # class Gitlab::BackgroundMigration::ProcessRoutes
- # def perform(start_id, end_id)
- # # do something
- # end
- # end
- def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BATCH_SIZE)
- raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
-
- jobs = []
- table_name = model_class.quoted_table_name
-
- model_class.each_batch(of: batch_size) do |relation|
- start_id, end_id = relation.pluck("MIN(#{table_name}.id)", "MAX(#{table_name}.id)").first
-
- if jobs.length >= JOB_BUFFER_SIZE
- # Note: This code path generally only helps with many millions of rows
- # We push multiple jobs at a time to reduce the time spent in
- # Sidekiq/Redis operations. We're using this buffer based approach so we
- # don't need to run additional queries for every range.
- bulk_migrate_async(jobs)
- jobs.clear
- end
-
- jobs << [job_class_name, [start_id, end_id]]
- end
-
- bulk_migrate_async(jobs) unless jobs.empty?
- end
# Queues background migration jobs for an entire table in batches.
# The default batching column used is the standard primary key `id`.
@@ -137,6 +85,7 @@ module Gitlab
# Requeue pending jobs previously queued with #queue_background_migration_jobs_by_range_at_intervals
#
# This method is useful to schedule jobs that had previously failed.
+ # It can only be used if the previous background migration used job tracking like the queue_background_migration_jobs_by_range_at_intervals helper.
#
# job_class_name - The background migration job class as a string
# delay_interval - The duration between each job's scheduled time
@@ -170,100 +119,6 @@ module Gitlab
duration
end
- # Creates a batched background migration for the given table. A batched migration runs one job
- # at a time, computing the bounds of the next batch based on the current migration settings and the previous
- # batch bounds. Each job's execution status is tracked in the database as the migration runs. The given job
- # class must be present in the Gitlab::BackgroundMigration module, and the batch class (if specified) must be
- # present in the Gitlab::BackgroundMigration::BatchingStrategies module.
- #
- # If migration with same job_class_name, table_name, column_name, and job_aruments already exists, this helper
- # will log an warning and not create a new one.
- #
- # job_class_name - The background migration job class as a string
- # batch_table_name - The name of the table the migration will batch over
- # batch_column_name - The name of the column the migration will batch over
- # job_arguments - Extra arguments to pass to the job instance when the migration runs
- # job_interval - The pause interval between each job's execution, minimum of 2 minutes
- # batch_min_value - The value in the column the batching will begin at
- # batch_max_value - The value in the column the batching will end at, defaults to `SELECT MAX(batch_column)`
- # batch_class_name - The name of the class that will be called to find the range of each next batch
- # batch_size - The maximum number of rows per job
- # sub_batch_size - The maximum number of rows processed per "iteration" within the job
- #
- #
- # *Returns the created BatchedMigration record*
- #
- # Example:
- #
- # queue_batched_background_migration(
- # 'CopyColumnUsingBackgroundMigrationJob',
- # :events,
- # :id,
- # job_interval: 2.minutes,
- # other_job_arguments: ['column1', 'column2'])
- #
- # Where the the background migration exists:
- #
- # class Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob
- # def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, *other_args)
- # # do something
- # end
- # end
- def queue_batched_background_migration( # rubocop:disable Metrics/ParameterLists
- job_class_name,
- batch_table_name,
- batch_column_name,
- *job_arguments,
- job_interval:,
- batch_min_value: BATCH_MIN_VALUE,
- batch_max_value: nil,
- batch_class_name: BATCH_CLASS_NAME,
- batch_size: BATCH_SIZE,
- sub_batch_size: SUB_BATCH_SIZE
- )
-
- if Gitlab::Database::BackgroundMigration::BatchedMigration.for_configuration(job_class_name, batch_table_name, batch_column_name, job_arguments).exists?
- Gitlab::AppLogger.warn "Batched background migration not enqueued because it already exists: " \
- "job_class_name: #{job_class_name}, table_name: #{batch_table_name}, column_name: #{batch_column_name}, " \
- "job_arguments: #{job_arguments.inspect}"
- return
- end
-
- job_interval = BATCH_MIN_DELAY if job_interval < BATCH_MIN_DELAY
-
- batch_max_value ||= connection.select_value(<<~SQL)
- SELECT MAX(#{connection.quote_column_name(batch_column_name)})
- FROM #{connection.quote_table_name(batch_table_name)}
- SQL
-
- migration_status = batch_max_value.nil? ? :finished : :active
- batch_max_value ||= batch_min_value
-
- migration = Gitlab::Database::BackgroundMigration::BatchedMigration.create!(
- job_class_name: job_class_name,
- table_name: batch_table_name,
- column_name: batch_column_name,
- job_arguments: job_arguments,
- interval: job_interval,
- min_value: batch_min_value,
- max_value: batch_max_value,
- batch_class_name: batch_class_name,
- batch_size: batch_size,
- sub_batch_size: sub_batch_size,
- status: migration_status)
-
- # This guard is necessary since #total_tuple_count was only introduced schema-wise,
- # after this migration helper had been used for the first time.
- return migration unless migration.respond_to?(:total_tuple_count)
-
- # We keep track of the estimated number of tuples to reason later
- # about the overall progress of a migration.
- migration.total_tuple_count = Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate
- migration.save!
-
- migration
- end
-
# Force a background migration to complete.
#
# WARNING: This method will block the caller and move the background migration from an
@@ -275,6 +130,7 @@ module Gitlab
# 4. Optionally remove job tracking information.
#
# This method does not garauntee that all jobs completed successfully.
+ # It can only be used if the previous background migration used the queue_background_migration_jobs_by_range_at_intervals helper.
def finalize_background_migration(class_name, delete_tracking_jobs: ['succeeded'])
# Empty the sidekiq queue.
Gitlab::BackgroundMigration.steal(class_name)
diff --git a/lib/gitlab/database/migrations/batched_background_migration_helpers.rb b/lib/gitlab/database/migrations/batched_background_migration_helpers.rb
new file mode 100644
index 00000000000..dcaf7fad05f
--- /dev/null
+++ b/lib/gitlab/database/migrations/batched_background_migration_helpers.rb
@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Migrations
+ # BatchedBackgroundMigrations are a new approach to scheduling and executing background migrations, which uses
+ # persistent state in the database to track each migration. This avoids having to batch over an entire table and
+ # schedule a large number of sidekiq jobs upfront. It also provides for more flexibility as the migration runs,
+ # as it can be paused and restarted, and have configuration values like the batch size updated dynamically as the
+ # migration runs.
+ #
+ # For now, these migrations are not considered ready for general use, for more information see the tracking epic:
+ # https://gitlab.com/groups/gitlab-org/-/epics/6751
+ module BatchedBackgroundMigrationHelpers
+ BATCH_SIZE = 1_000 # Number of rows to process per job
+ SUB_BATCH_SIZE = 100 # Number of rows to process per sub-batch
+ BATCH_CLASS_NAME = 'PrimaryKeyBatchingStrategy' # Default batch class for batched migrations
+ BATCH_MIN_VALUE = 1 # Default minimum value for batched migrations
+ BATCH_MIN_DELAY = 2.minutes.freeze # Minimum delay between batched migrations
+
+ # Creates a batched background migration for the given table. A batched migration runs one job
+ # at a time, computing the bounds of the next batch based on the current migration settings and the previous
+ # batch bounds. Each job's execution status is tracked in the database as the migration runs. The given job
+ # class must be present in the Gitlab::BackgroundMigration module, and the batch class (if specified) must be
+ # present in the Gitlab::BackgroundMigration::BatchingStrategies module.
+ #
+ # If migration with same job_class_name, table_name, column_name, and job_aruments already exists, this helper
+ # will log an warning and not create a new one.
+ #
+ # job_class_name - The background migration job class as a string
+ # batch_table_name - The name of the table the migration will batch over
+ # batch_column_name - The name of the column the migration will batch over
+ # job_arguments - Extra arguments to pass to the job instance when the migration runs
+ # job_interval - The pause interval between each job's execution, minimum of 2 minutes
+ # batch_min_value - The value in the column the batching will begin at
+ # batch_max_value - The value in the column the batching will end at, defaults to `SELECT MAX(batch_column)`
+ # batch_class_name - The name of the class that will be called to find the range of each next batch
+ # batch_size - The maximum number of rows per job
+ # sub_batch_size - The maximum number of rows processed per "iteration" within the job
+ #
+ # *Returns the created BatchedMigration record*
+ #
+ # Example:
+ #
+ # queue_batched_background_migration(
+ # 'CopyColumnUsingBackgroundMigrationJob',
+ # :events,
+ # :id,
+ # job_interval: 2.minutes,
+ # other_job_arguments: ['column1', 'column2'])
+ #
+ # Where the the background migration exists:
+ #
+ # class Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob
+ # def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, *other_args)
+ # # do something
+ # end
+ # end
+ def queue_batched_background_migration( # rubocop:disable Metrics/ParameterLists
+ job_class_name,
+ batch_table_name,
+ batch_column_name,
+ *job_arguments,
+ job_interval:,
+ batch_min_value: BATCH_MIN_VALUE,
+ batch_max_value: nil,
+ batch_class_name: BATCH_CLASS_NAME,
+ batch_size: BATCH_SIZE,
+ sub_batch_size: SUB_BATCH_SIZE
+ )
+
+ if Gitlab::Database::BackgroundMigration::BatchedMigration.for_configuration(job_class_name, batch_table_name, batch_column_name, job_arguments).exists?
+ Gitlab::AppLogger.warn "Batched background migration not enqueued because it already exists: " \
+ "job_class_name: #{job_class_name}, table_name: #{batch_table_name}, column_name: #{batch_column_name}, " \
+ "job_arguments: #{job_arguments.inspect}"
+ return
+ end
+
+ job_interval = BATCH_MIN_DELAY if job_interval < BATCH_MIN_DELAY
+
+ batch_max_value ||= connection.select_value(<<~SQL)
+ SELECT MAX(#{connection.quote_column_name(batch_column_name)})
+ FROM #{connection.quote_table_name(batch_table_name)}
+ SQL
+
+ migration_status = batch_max_value.nil? ? :finished : :active
+ batch_max_value ||= batch_min_value
+
+ migration = Gitlab::Database::BackgroundMigration::BatchedMigration.create!(
+ job_class_name: job_class_name,
+ table_name: batch_table_name,
+ column_name: batch_column_name,
+ job_arguments: job_arguments,
+ interval: job_interval,
+ min_value: batch_min_value,
+ max_value: batch_max_value,
+ batch_class_name: batch_class_name,
+ batch_size: batch_size,
+ sub_batch_size: sub_batch_size,
+ status: migration_status)
+
+ # This guard is necessary since #total_tuple_count was only introduced schema-wise,
+ # after this migration helper had been used for the first time.
+ return migration unless migration.respond_to?(:total_tuple_count)
+
+ # We keep track of the estimated number of tuples to reason later
+ # about the overall progress of a migration.
+ migration.total_tuple_count = Gitlab::Database::SharedModel.using_connection(connection) do
+ Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate
+ end
+ migration.save!
+
+ migration
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb
index 6e5ffb74411..1f7e81cae84 100644
--- a/lib/gitlab/database/migrations/instrumentation.rb
+++ b/lib/gitlab/database/migrations/instrumentation.rb
@@ -14,11 +14,11 @@ module Gitlab
@result_dir = result_dir
end
- def observe(version:, name:, &block)
+ def observe(version:, name:, connection:, &block)
observation = Observation.new(version, name)
observation.success = true
- observers = observer_classes.map { |c| c.new(observation, @result_dir) }
+ observers = observer_classes.map { |c| c.new(observation, @result_dir, connection) }
exception = nil
diff --git a/lib/gitlab/database/migrations/observers/migration_observer.rb b/lib/gitlab/database/migrations/observers/migration_observer.rb
index 106f8f1f829..0006af73f6c 100644
--- a/lib/gitlab/database/migrations/observers/migration_observer.rb
+++ b/lib/gitlab/database/migrations/observers/migration_observer.rb
@@ -7,8 +7,8 @@ module Gitlab
class MigrationObserver
attr_reader :connection, :observation, :output_dir
- def initialize(observation, output_dir)
- @connection = ActiveRecord::Base.connection
+ def initialize(observation, output_dir, connection)
+ @connection = connection
@observation = observation
@output_dir = output_dir
end
diff --git a/lib/gitlab/database/migrations/runner.rb b/lib/gitlab/database/migrations/runner.rb
index b267a64256b..f0bac594119 100644
--- a/lib/gitlab/database/migrations/runner.rb
+++ b/lib/gitlab/database/migrations/runner.rb
@@ -69,7 +69,7 @@ module Gitlab
instrumentation = Instrumentation.new(result_dir: result_dir)
sorted_migrations.each do |migration|
- instrumentation.observe(version: migration.version, name: migration.name) do
+ instrumentation.observe(version: migration.version, name: migration.name, connection: ActiveRecord::Migration.connection) do
ActiveRecord::Migrator.new(direction, migration_context.migrations, migration_context.schema_migration, migration.version).run
end
end
diff --git a/lib/gitlab/database/partitioning/detached_partition_dropper.rb b/lib/gitlab/database/partitioning/detached_partition_dropper.rb
index 593824384b5..5e32ecad4ca 100644
--- a/lib/gitlab/database/partitioning/detached_partition_dropper.rb
+++ b/lib/gitlab/database/partitioning/detached_partition_dropper.rb
@@ -4,8 +4,6 @@ module Gitlab
module Partitioning
class DetachedPartitionDropper
def perform
- return unless Feature.enabled?(:drop_detached_partitions, default_enabled: :yaml)
-
Gitlab::AppLogger.info(message: "Checking for previously detached partitions to drop")
Postgresql::DetachedPartition.ready_to_drop.find_each do |detached_partition|
diff --git a/lib/gitlab/database/partitioning/monthly_strategy.rb b/lib/gitlab/database/partitioning/monthly_strategy.rb
index c93e775d7ed..9c8cccb3dc6 100644
--- a/lib/gitlab/database/partitioning/monthly_strategy.rb
+++ b/lib/gitlab/database/partitioning/monthly_strategy.rb
@@ -36,6 +36,10 @@ module Gitlab
partitions
end
+ def after_adding_partitions
+ # No-op, required by the partition manager
+ end
+
private
def desired_partitions
diff --git a/lib/gitlab/database/partitioning/partition_manager.rb b/lib/gitlab/database/partitioning/partition_manager.rb
index 8742c0ff166..aa824dfbd2f 100644
--- a/lib/gitlab/database/partitioning/partition_manager.rb
+++ b/lib/gitlab/database/partitioning/partition_manager.rb
@@ -25,10 +25,8 @@ module Gitlab
partitions_to_create = missing_partitions
create(partitions_to_create) unless partitions_to_create.empty?
- if Feature.enabled?(:partition_pruning, default_enabled: :yaml)
- partitions_to_detach = extra_partitions
- detach(partitions_to_detach) unless partitions_to_detach.empty?
- end
+ partitions_to_detach = extra_partitions
+ detach(partitions_to_detach) unless partitions_to_detach.empty?
end
rescue StandardError => e
Gitlab::AppLogger.error(message: "Failed to create / detach partition(s)",
@@ -73,6 +71,8 @@ module Gitlab
partition_name: partition.partition_name,
table_name: partition.table)
end
+
+ model.partitioning_strategy.after_adding_partitions
end
end
end
diff --git a/lib/gitlab/database/partitioning/single_numeric_list_partition.rb b/lib/gitlab/database/partitioning/single_numeric_list_partition.rb
new file mode 100644
index 00000000000..23ac73a0e53
--- /dev/null
+++ b/lib/gitlab/database/partitioning/single_numeric_list_partition.rb
@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Partitioning
+ class SingleNumericListPartition
+ include Comparable
+
+ def self.from_sql(table, partition_name, definition)
+ # A list partition can support multiple values, but we only support a single number
+ matches = definition.match(/FOR VALUES IN \('(?<value>\d+)'\)/)
+
+ raise ArgumentError, 'Unknown partition definition' unless matches
+
+ value = Integer(matches[:value])
+
+ new(table, value, partition_name: partition_name)
+ end
+
+ attr_reader :table, :value
+
+ def initialize(table, value, partition_name: nil )
+ @table = table
+ @value = value
+ @partition_name = partition_name
+ end
+
+ def partition_name
+ @partition_name || "#{table}_#{value}"
+ end
+
+ def to_sql
+ <<~SQL
+ CREATE TABLE IF NOT EXISTS #{fully_qualified_partition}
+ PARTITION OF #{conn.quote_table_name(table)}
+ FOR VALUES IN (#{conn.quote(value)})
+ SQL
+ end
+
+ def to_detach_sql
+ <<~SQL
+ ALTER TABLE #{conn.quote_table_name(table)}
+ DETACH PARTITION #{fully_qualified_partition}
+ SQL
+ end
+
+ def ==(other)
+ table == other.table &&
+ partition_name == other.partition_name &&
+ value == other.value
+ end
+ alias_method :eql?, :==
+
+ def hash
+ [table, partition_name, value].hash
+ end
+
+ def <=>(other)
+ return if table != other.table
+
+ value <=> other.value
+ end
+
+ private
+
+ def fully_qualified_partition
+ "%s.%s" % [conn.quote_table_name(Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA), conn.quote_table_name(partition_name)]
+ end
+
+ def conn
+ @conn ||= Gitlab::Database::SharedModel.connection
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/partitioning/sliding_list_strategy.rb b/lib/gitlab/database/partitioning/sliding_list_strategy.rb
new file mode 100644
index 00000000000..21b86b43ae7
--- /dev/null
+++ b/lib/gitlab/database/partitioning/sliding_list_strategy.rb
@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Partitioning
+ class SlidingListStrategy
+ attr_reader :model, :partitioning_key, :next_partition_if, :detach_partition_if
+
+ delegate :table_name, to: :model
+
+ def initialize(model, partitioning_key, next_partition_if:, detach_partition_if:)
+ @model = model
+ @partitioning_key = partitioning_key
+ @next_partition_if = next_partition_if
+ @detach_partition_if = detach_partition_if
+
+ ensure_partitioning_column_ignored!
+ end
+
+ def current_partitions
+ Gitlab::Database::PostgresPartition.for_parent_table(table_name).map do |partition|
+ SingleNumericListPartition.from_sql(table_name, partition.name, partition.condition)
+ end.sort
+ end
+
+ def missing_partitions
+ if no_partitions_exist?
+ [initial_partition]
+ elsif next_partition_if.call(active_partition.value)
+ [next_partition]
+ else
+ []
+ end
+ end
+
+ def initial_partition
+ SingleNumericListPartition.new(table_name, 1)
+ end
+
+ def next_partition
+ SingleNumericListPartition.new(table_name, active_partition.value + 1)
+ end
+
+ def extra_partitions
+ possibly_extra = current_partitions[0...-1] # Never consider the most recent partition
+
+ possibly_extra.take_while { |p| detach_partition_if.call(p.value) }
+ end
+
+ def after_adding_partitions
+ active_value = active_partition.value
+ model.connection.change_column_default(model.table_name, partitioning_key, active_value)
+ end
+
+ def active_partition
+ # The current partitions list is sorted, so the last partition has the highest value
+ # This is the only partition that receives inserts.
+ current_partitions.last
+ end
+
+ def no_partitions_exist?
+ current_partitions.empty?
+ end
+
+ private
+
+ def ensure_partitioning_column_ignored!
+ unless model.ignored_columns.include?(partitioning_key.to_s)
+ raise "Add #{partitioning_key} to #{model.name}.ignored_columns to use it with SlidingListStrategy"
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/pg_class.rb b/lib/gitlab/database/pg_class.rb
index 0ce9eebc14c..bd582d903c6 100644
--- a/lib/gitlab/database/pg_class.rb
+++ b/lib/gitlab/database/pg_class.rb
@@ -2,7 +2,7 @@
module Gitlab
module Database
- class PgClass < ActiveRecord::Base
+ class PgClass < SharedModel
self.table_name = 'pg_class'
def self.for_table(relname)
diff --git a/lib/gitlab/database/postgres_hll/buckets.rb b/lib/gitlab/database/postgres_hll/buckets.rb
index 429e823379f..76818bbf340 100644
--- a/lib/gitlab/database/postgres_hll/buckets.rb
+++ b/lib/gitlab/database/postgres_hll/buckets.rb
@@ -65,8 +65,7 @@ module Gitlab
).to_i
if num_zero_buckets > 0 && num_uniques < 2.5 * TOTAL_BUCKETS
- ((0.7213 / (1 + 1.079 / TOTAL_BUCKETS)) * (TOTAL_BUCKETS *
- Math.log2(TOTAL_BUCKETS.to_f / num_zero_buckets)))
+ TOTAL_BUCKETS * Math.log(TOTAL_BUCKETS.to_f / num_zero_buckets)
else
num_uniques
end
diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb
index 0f285688876..2736f9d18dc 100644
--- a/lib/gitlab/database/query_analyzer.rb
+++ b/lib/gitlab/database/query_analyzer.rb
@@ -58,17 +58,15 @@ module Gitlab
return unless parsed
analyzers.each do |analyzer|
- next if analyzer.suppressed?
+ next if analyzer.suppressed? && !analyzer.requires_tracking?(parsed)
analyzer.analyze(parsed)
- rescue StandardError => e
+ rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e
# We catch all standard errors to prevent validation errors to introduce fatal errors in production
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
end
- private
-
# Enable query analyzers
def begin!
analyzers = all_analyzers.select do |analyzer|
@@ -77,7 +75,7 @@ module Gitlab
true
end
- rescue StandardError => e
+ rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
false
@@ -90,13 +88,15 @@ module Gitlab
def end!
enabled_analyzers.select do |analyzer|
analyzer.end!
- rescue StandardError => e
+ rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
Thread.current[:query_analyzer_enabled_analyzers] = nil
end
+ private
+
def enabled_analyzers
Thread.current[:query_analyzer_enabled_analyzers]
end
diff --git a/lib/gitlab/database/query_analyzers/base.rb b/lib/gitlab/database/query_analyzers/base.rb
index e8066f7a706..0802d3c8013 100644
--- a/lib/gitlab/database/query_analyzers/base.rb
+++ b/lib/gitlab/database/query_analyzers/base.rb
@@ -4,10 +4,17 @@ module Gitlab
module Database
module QueryAnalyzers
class Base
+ # `Exception` to ensure that is not easily rescued when running in test env
+ QueryAnalyzerError = Class.new(Exception) # rubocop:disable Lint/InheritException
+
def self.suppressed?
Thread.current[self.suppress_key]
end
+ def self.requires_tracking?(parsed)
+ false
+ end
+
def self.suppress=(value)
Thread.current[self.suppress_key] = value
end
diff --git a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb
index 2233f3c4646..2e3db2a5c6e 100644
--- a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb
+++ b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb
@@ -4,7 +4,7 @@ module Gitlab
module Database
module QueryAnalyzers
class PreventCrossDatabaseModification < Database::QueryAnalyzers::Base
- CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError)
+ CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(QueryAnalyzerError)
# This method will allow cross database modifications within the block
# Example:
@@ -36,29 +36,36 @@ module Gitlab
Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml)
end
+ def self.requires_tracking?(parsed)
+ # The transaction boundaries always needs to be tracked regardless of suppress behavior
+ self.transaction_begin?(parsed) || self.transaction_end?(parsed)
+ end
+
# rubocop:disable Metrics/AbcSize
def self.analyze(parsed)
- return if in_factory_bot_create?
-
database = ::Gitlab::Database.db_config_name(parsed.connection)
sql = parsed.sql
# We ignore BEGIN in tests as this is the outer transaction for
# DatabaseCleaner
- if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN'))
+ if self.transaction_begin?(parsed)
context[:transaction_depth_by_db][database] += 1
return
- elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') || (!Rails.env.test? && sql.start_with?('ROLLBACK', 'COMMIT'))
+ elsif self.transaction_end?(parsed)
context[:transaction_depth_by_db][database] -= 1
- if context[:transaction_depth_by_db][database] <= 0
+ if context[:transaction_depth_by_db][database] == 0
context[:modified_tables_by_db][database].clear
+ elsif context[:transaction_depth_by_db][database] < 0
+ context[:transaction_depth_by_db][database] = 0
+ raise CrossDatabaseModificationAcrossUnsupportedTablesError, "Misaligned cross-DB transactions discovered at query #{sql}. This could be a bug in #{self.class} or a valid issue to investigate. Read more at https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions ."
end
return
end
- return if context[:transaction_depth_by_db].values.all?(&:zero?)
+ return unless self.in_transaction?
+ return if in_factory_bot_create?
# PgQuery might fail in some cases due to limited nesting:
# https://github.com/pganalyze/pg_query/issues/209
@@ -97,6 +104,42 @@ module Gitlab
end
# rubocop:enable Metrics/AbcSize
+ def self.transaction_begin?(parsed)
+ # We ignore BEGIN or START in tests
+ unless Rails.env.test?
+ return true if transaction_stmt?(parsed, :TRANS_STMT_BEGIN)
+ return true if transaction_stmt?(parsed, :TRANS_STMT_START)
+ end
+
+ # SAVEPOINT
+ return true if transaction_stmt?(parsed, :TRANS_STMT_SAVEPOINT)
+
+ false
+ end
+
+ def self.transaction_end?(parsed)
+ # We ignore ROLLBACK or COMMIT in tests
+ unless Rails.env.test?
+ return true if transaction_stmt?(parsed, :TRANS_STMT_COMMIT)
+ return true if transaction_stmt?(parsed, :TRANS_STMT_COMMIT_PREPARED)
+ return true if transaction_stmt?(parsed, :TRANS_STMT_ROLLBACK)
+ return true if transaction_stmt?(parsed, :TRANS_STMT_ROLLBACK_PREPARED)
+ end
+
+ # RELEASE (SAVEPOINT) or ROLLBACK TO (SAVEPOINT)
+ return true if transaction_stmt?(parsed, :TRANS_STMT_RELEASE)
+ return true if transaction_stmt?(parsed, :TRANS_STMT_ROLLBACK_TO)
+
+ false
+ end
+
+ # Known kinds: https://github.com/pganalyze/pg_query/blob/f6588703deb9d7a94b87b34b7c3bab240087fbc4/ext/pg_query/include/nodes/parsenodes.h#L3050
+ def self.transaction_stmt?(parsed, kind)
+ parsed.pg.tree.stmts.map(&:stmt).any? do |stmt|
+ stmt.node == :transaction_stmt && stmt.transaction_stmt.kind == kind
+ end
+ end
+
# We only raise in tests for now otherwise some features will be broken
# in development. For now we've mostly only added allowlist based on
# spec names. Until we have allowed all the violations inline we don't
@@ -105,13 +148,21 @@ module Gitlab
Rails.env.test?
end
+ def self.in_transaction?
+ context[:transaction_depth_by_db].values.any?(&:positive?)
+ end
+
# We ignore execution in the #create method from FactoryBot
# because it is not representative of real code we run in
# production. There are far too many false positives caused
# by instantiating objects in different `gitlab_schema` in a
# FactoryBot `create`.
def self.in_factory_bot_create?
- Rails.env.test? && caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' }
+ Rails.env.test? && caller_locations.any? do |l|
+ l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' ||
+ l.path.end_with?('lib/factory_bot/strategy/create.rb') ||
+ l.path.end_with?('shoulda/matchers/active_record/validate_uniqueness_of_matcher.rb') && l.label == 'create_existing_record'
+ end
end
end
end
diff --git a/lib/gitlab/database/reindexing.rb b/lib/gitlab/database/reindexing.rb
index 7a22e324bdb..6ffe14249f0 100644
--- a/lib/gitlab/database/reindexing.rb
+++ b/lib/gitlab/database/reindexing.rb
@@ -15,6 +15,26 @@ module Gitlab
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
+ def self.enabled?
+ Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
+ end
+
+ def self.invoke(database = nil)
+ Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
+ next if database && database.to_s != connection_name.to_s
+
+ Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
+
+ # Hack: Before we do actual reindexing work, create async indexes
+ Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
+
+ automatic_reindexing
+ end
+ rescue StandardError => e
+ Gitlab::AppLogger.error(e)
+ raise
+ end
+
# Performs automatic reindexing for a limited number of indexes per call
# 1. Consume from the explicit reindexing queue
# 2. Apply bloat heuristic to find most bloated indexes and reindex those
diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb
index 13298f67ca9..3e4a83aa2e7 100644
--- a/lib/gitlab/database/reindexing/coordinator.rb
+++ b/lib/gitlab/database/reindexing/coordinator.rb
@@ -53,6 +53,10 @@ module Gitlab
def lease_timeout
TIMEOUT_PER_ACTION
end
+
+ def lease_key
+ [super, index.connection_db_config.name].join('/')
+ end
end
end
end
diff --git a/lib/gitlab/database/schema_cache_with_renamed_table.rb b/lib/gitlab/database/schema_cache_with_renamed_table.rb
index 28123edd708..74900dc0d26 100644
--- a/lib/gitlab/database/schema_cache_with_renamed_table.rb
+++ b/lib/gitlab/database/schema_cache_with_renamed_table.rb
@@ -42,7 +42,7 @@ module Gitlab
def renamed_tables_cache
@renamed_tables ||= begin
Gitlab::Database::TABLES_TO_BE_RENAMED.select do |old_name, new_name|
- ActiveRecord::Base.connection.view_exists?(old_name)
+ connection.view_exists?(old_name)
end
end
end
diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb
index 3d929c62933..9ddc5391689 100644
--- a/lib/gitlab/database/schema_helpers.rb
+++ b/lib/gitlab/database/schema_helpers.rb
@@ -25,6 +25,7 @@ module Gitlab
CREATE TRIGGER #{name}
#{fires} ON #{table_name}
FOR EACH ROW
+ #{yield if block_given?}
EXECUTE FUNCTION #{function_name}()
SQL
end
diff --git a/lib/gitlab/database/shared_model.rb b/lib/gitlab/database/shared_model.rb
index f31dbc01907..17d7886e8c8 100644
--- a/lib/gitlab/database/shared_model.rb
+++ b/lib/gitlab/database/shared_model.rb
@@ -39,6 +39,10 @@ module Gitlab
Thread.current[:overriding_connection] = connection
end
end
+
+ def connection_db_config
+ self.class.connection_db_config
+ end
end
end
end
diff --git a/lib/gitlab/database/type/json_pg_safe.rb b/lib/gitlab/database/type/json_pg_safe.rb
new file mode 100644
index 00000000000..bbc207bd0d9
--- /dev/null
+++ b/lib/gitlab/database/type/json_pg_safe.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Type
+ # Extends Rails' ActiveRecord::Type::Json data type to remove JSON
+ # encooded nullbytes `\u0000` to prevent PostgreSQL errors like
+ # `PG::UntranslatableCharacter: ERROR: unsupported Unicode escape
+ # sequence`.
+ #
+ # Example:
+ #
+ # class SomeModel < ApplicationRecord
+ # # some_model.a_field is of type `jsonb`
+ # attribute :a_field, Gitlab::Database::Type::JsonPgSafe.new
+ # end
+ class JsonPgSafe < ActiveRecord::Type::Json
+ def serialize(value)
+ super&.gsub('\u0000', '')
+ end
+ end
+ end
+ end
+end