Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2022-01-20 12:16:11 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2022-01-20 12:16:11 +0300
commitedaa33dee2ff2f7ea3fac488d41558eb5f86d68c (patch)
tree11f143effbfeba52329fb7afbd05e6e2a3790241 /lib/gitlab/background_migration
parentd8a5691316400a0f7ec4f83832698f1988eb27c1 (diff)
Add latest changes from gitlab-org/gitlab@14-7-stable-eev14.7.0-rc42
Diffstat (limited to 'lib/gitlab/background_migration')
-rw-r--r--lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb77
-rw-r--r--lib/gitlab/background_migration/backfill_ci_project_mirrors.rb52
-rw-r--r--lib/gitlab/background_migration/backfill_incident_issue_escalation_statuses.rb32
-rw-r--r--lib/gitlab/background_migration/base_job.rb23
-rw-r--r--lib/gitlab/background_migration/cleanup_concurrent_rename.rb14
-rw-r--r--lib/gitlab/background_migration/cleanup_concurrent_schema_change.rb56
-rw-r--r--lib/gitlab/background_migration/cleanup_concurrent_type_change.rb14
-rw-r--r--lib/gitlab/background_migration/copy_column.rb41
-rw-r--r--lib/gitlab/background_migration/encrypt_static_object_token.rb70
-rw-r--r--lib/gitlab/background_migration/fix_vulnerability_occurrences_with_hashes_as_raw_metadata.rb124
-rw-r--r--lib/gitlab/background_migration/job_coordinator.rb14
-rw-r--r--lib/gitlab/background_migration/migrate_legacy_artifacts.rb130
-rw-r--r--lib/gitlab/background_migration/populate_test_reports_issue_id.rb14
-rw-r--r--lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb148
-rw-r--r--lib/gitlab/background_migration/remove_duplicate_services.rb58
15 files changed, 539 insertions, 328 deletions
diff --git a/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb b/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb
new file mode 100644
index 00000000000..2247747ba08
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb
@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # A job to create ci_namespace_mirrors entries in batches
+ class BackfillCiNamespaceMirrors
+ class Namespace < ActiveRecord::Base # rubocop:disable Style/Documentation
+ include ::EachBatch
+
+ self.table_name = 'namespaces'
+ self.inheritance_column = nil
+
+ scope :base_query, -> do
+ select(:id, :parent_id)
+ end
+ end
+
+ PAUSE_SECONDS = 0.1
+ SUB_BATCH_SIZE = 500
+
+ def perform(start_id, end_id)
+ batch_query = Namespace.base_query.where(id: start_id..end_id)
+ batch_query.each_batch(of: SUB_BATCH_SIZE) do |sub_batch|
+ first, last = sub_batch.pluck(Arel.sql('MIN(id), MAX(id)')).first
+ ranged_query = Namespace.unscoped.base_query.where(id: first..last)
+
+ update_sql = <<~SQL
+ INSERT INTO ci_namespace_mirrors (namespace_id, traversal_ids)
+ #{insert_values(ranged_query)}
+ ON CONFLICT (namespace_id) DO NOTHING
+ SQL
+ # We do nothing on conflict because we consider they were already filled.
+
+ Namespace.connection.execute(update_sql)
+
+ sleep PAUSE_SECONDS
+ end
+
+ mark_job_as_succeeded(start_id, end_id)
+ end
+
+ private
+
+ def insert_values(batch)
+ calculated_traversal_ids(
+ batch.allow_cross_joins_across_databases(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/336433')
+ )
+ end
+
+ # Copied from lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb
+ def calculated_traversal_ids(batch)
+ <<~SQL
+ WITH RECURSIVE cte(source_id, namespace_id, parent_id, height) AS (
+ (
+ SELECT batch.id, batch.id, batch.parent_id, 1
+ FROM (#{batch.to_sql}) AS batch
+ )
+ UNION ALL
+ (
+ SELECT cte.source_id, n.id, n.parent_id, cte.height+1
+ FROM namespaces n, cte
+ WHERE n.id = cte.parent_id
+ )
+ )
+ SELECT flat_hierarchy.source_id as namespace_id,
+ array_agg(flat_hierarchy.namespace_id ORDER BY flat_hierarchy.height DESC) as traversal_ids
+ FROM (SELECT * FROM cte FOR UPDATE) flat_hierarchy
+ GROUP BY flat_hierarchy.source_id
+ SQL
+ end
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillCiNamespaceMirrors', arguments)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb b/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb
new file mode 100644
index 00000000000..ff6ab9928b0
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # A job to create ci_project_mirrors entries in batches
+ class BackfillCiProjectMirrors
+ class Project < ActiveRecord::Base # rubocop:disable Style/Documentation
+ include ::EachBatch
+
+ self.table_name = 'projects'
+
+ scope :base_query, -> do
+ select(:id, :namespace_id)
+ end
+ end
+
+ PAUSE_SECONDS = 0.1
+ SUB_BATCH_SIZE = 500
+
+ def perform(start_id, end_id)
+ batch_query = Project.base_query.where(id: start_id..end_id)
+ batch_query.each_batch(of: SUB_BATCH_SIZE) do |sub_batch|
+ first, last = sub_batch.pluck(Arel.sql('MIN(id), MAX(id)')).first
+ ranged_query = Project.unscoped.base_query.where(id: first..last)
+
+ update_sql = <<~SQL
+ INSERT INTO ci_project_mirrors (project_id, namespace_id)
+ #{insert_values(ranged_query)}
+ ON CONFLICT (project_id) DO NOTHING
+ SQL
+ # We do nothing on conflict because we consider they were already filled.
+
+ Project.connection.execute(update_sql)
+
+ sleep PAUSE_SECONDS
+ end
+
+ mark_job_as_succeeded(start_id, end_id)
+ end
+
+ private
+
+ def insert_values(batch)
+ batch.allow_cross_joins_across_databases(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/336433').to_sql
+ end
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillCiProjectMirrors', arguments)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/backfill_incident_issue_escalation_statuses.rb b/lib/gitlab/background_migration/backfill_incident_issue_escalation_statuses.rb
new file mode 100644
index 00000000000..2d46ff6b933
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_incident_issue_escalation_statuses.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # BackfillIncidentIssueEscalationStatuses adds
+ # IncidentManagement::IssuableEscalationStatus records for existing Incident issues.
+ # They will be added with no policy, and escalations_started_at as nil.
+ class BackfillIncidentIssueEscalationStatuses
+ def perform(start_id, stop_id)
+ ActiveRecord::Base.connection.execute <<~SQL
+ INSERT INTO incident_management_issuable_escalation_statuses (issue_id, created_at, updated_at)
+ SELECT issues.id, current_timestamp, current_timestamp
+ FROM issues
+ WHERE issues.issue_type = 1
+ AND issues.id BETWEEN #{start_id} AND #{stop_id}
+ ON CONFLICT (issue_id) DO NOTHING;
+ SQL
+
+ mark_job_as_succeeded(start_id, stop_id)
+ end
+
+ private
+
+ def mark_job_as_succeeded(*arguments)
+ ::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
+ self.class.name.demodulize,
+ arguments
+ )
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/base_job.rb b/lib/gitlab/background_migration/base_job.rb
new file mode 100644
index 00000000000..e21e7e0e4a3
--- /dev/null
+++ b/lib/gitlab/background_migration/base_job.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Simple base class for background migration job classes which are executed through the sidekiq queue.
+ #
+ # Any job class that inherits from the base class will have connection to the tracking database set on
+ # initialization.
+ class BaseJob
+ def initialize(connection:)
+ @connection = connection
+ end
+
+ def perform(*arguments)
+ raise NotImplementedError, "subclasses of #{self.class.name} must implement #{__method__}"
+ end
+
+ private
+
+ attr_reader :connection
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/cleanup_concurrent_rename.rb b/lib/gitlab/background_migration/cleanup_concurrent_rename.rb
deleted file mode 100644
index d3f366f3480..00000000000
--- a/lib/gitlab/background_migration/cleanup_concurrent_rename.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- # Background migration for cleaning up a concurrent column rename.
- class CleanupConcurrentRename < CleanupConcurrentSchemaChange
- RESCHEDULE_DELAY = 10.minutes
-
- def cleanup_concurrent_schema_change(table, old_column, new_column)
- cleanup_concurrent_column_rename(table, old_column, new_column)
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/cleanup_concurrent_schema_change.rb b/lib/gitlab/background_migration/cleanup_concurrent_schema_change.rb
deleted file mode 100644
index 91b50c1a493..00000000000
--- a/lib/gitlab/background_migration/cleanup_concurrent_schema_change.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- # Base class for background migration for rename/type changes.
- class CleanupConcurrentSchemaChange
- include Database::MigrationHelpers
-
- # table - The name of the table the migration is performed for.
- # old_column - The name of the old (to drop) column.
- # new_column - The name of the new column.
- def perform(table, old_column, new_column)
- return unless column_exists?(table, new_column) && column_exists?(table, old_column)
-
- rows_to_migrate = define_model_for(table)
- .where(new_column => nil)
- .where
- .not(old_column => nil)
-
- if rows_to_migrate.any?
- BackgroundMigrationWorker.perform_in(
- RESCHEDULE_DELAY,
- self.class.name,
- [table, old_column, new_column]
- )
- else
- cleanup_concurrent_schema_change(table, old_column, new_column)
- end
- end
-
- def cleanup_concurrent_schema_change(_table, _old_column, _new_column)
- raise NotImplementedError
- end
-
- # These methods are necessary so we can re-use the migration helpers in
- # this class.
- def connection
- ActiveRecord::Base.connection
- end
-
- def method_missing(name, *args, &block)
- connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend
- end
-
- def respond_to_missing?(*args)
- connection.respond_to?(*args) || super
- end
-
- def define_model_for(table)
- Class.new(ActiveRecord::Base) do
- self.table_name = table
- end
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb
deleted file mode 100644
index 48411095dbb..00000000000
--- a/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- # Background migration for cleaning up a concurrent column type changeb.
- class CleanupConcurrentTypeChange < CleanupConcurrentSchemaChange
- RESCHEDULE_DELAY = 10.minutes
-
- def cleanup_concurrent_schema_change(table, old_column, new_column)
- cleanup_concurrent_column_type_change(table, old_column)
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/copy_column.rb b/lib/gitlab/background_migration/copy_column.rb
deleted file mode 100644
index ef70f37d5eb..00000000000
--- a/lib/gitlab/background_migration/copy_column.rb
+++ /dev/null
@@ -1,41 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- # CopyColumn is a simple (reusable) background migration that can be used to
- # update the value of a column based on the value of another column in the
- # same table.
- #
- # For this background migration to work the table that is migrated _has_ to
- # have an `id` column as the primary key.
- class CopyColumn
- # table - The name of the table that contains the columns.
- # copy_from - The column containing the data to copy.
- # copy_to - The column to copy the data to.
- # start_id - The start ID of the range of rows to update.
- # end_id - The end ID of the range of rows to update.
- def perform(table, copy_from, copy_to, start_id, end_id)
- return unless connection.column_exists?(table, copy_to)
-
- quoted_table = connection.quote_table_name(table)
- quoted_copy_from = connection.quote_column_name(copy_from)
- quoted_copy_to = connection.quote_column_name(copy_to)
-
- # We're using raw SQL here since this job may be frequently executed. As
- # a result dynamically defining models would lead to many unnecessary
- # schema information queries.
- connection.execute <<-SQL.strip_heredoc
- UPDATE #{quoted_table}
- SET #{quoted_copy_to} = #{quoted_copy_from}
- WHERE id BETWEEN #{start_id} AND #{end_id}
- AND #{quoted_copy_from} IS NOT NULL
- AND #{quoted_copy_to} IS NULL
- SQL
- end
-
- def connection
- ActiveRecord::Base.connection
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/encrypt_static_object_token.rb b/lib/gitlab/background_migration/encrypt_static_object_token.rb
new file mode 100644
index 00000000000..80931353e2f
--- /dev/null
+++ b/lib/gitlab/background_migration/encrypt_static_object_token.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Populates "static_object_token_encrypted" field with encrypted versions
+ # of values from "static_object_token" field
+ class EncryptStaticObjectToken
+ # rubocop:disable Style/Documentation
+ class User < ActiveRecord::Base
+ include ::EachBatch
+ self.table_name = 'users'
+ scope :with_static_object_token, -> { where.not(static_object_token: nil) }
+ scope :without_static_object_token_encrypted, -> { where(static_object_token_encrypted: nil) }
+ end
+ # rubocop:enable Style/Documentation
+
+ BATCH_SIZE = 100
+
+ def perform(start_id, end_id)
+ ranged_query = User
+ .where(id: start_id..end_id)
+ .with_static_object_token
+ .without_static_object_token_encrypted
+
+ ranged_query.each_batch(of: BATCH_SIZE) do |sub_batch|
+ first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
+
+ batch_query = User.unscoped
+ .where(id: first..last)
+ .with_static_object_token
+ .without_static_object_token_encrypted
+
+ user_tokens = batch_query.pluck(:id, :static_object_token)
+
+ user_encrypted_tokens = user_tokens.map do |(id, plaintext_token)|
+ next if plaintext_token.blank?
+
+ [id, Gitlab::CryptoHelper.aes256_gcm_encrypt(plaintext_token)]
+ end
+
+ encrypted_tokens_sql = user_encrypted_tokens.compact.map { |(id, token)| "(#{id}, '#{token}')" }.join(',')
+
+ if user_encrypted_tokens.present?
+ User.connection.execute(<<~SQL)
+ WITH cte(cte_id, cte_token) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
+ SELECT *
+ FROM (VALUES #{encrypted_tokens_sql}) AS t (id, token)
+ )
+ UPDATE #{User.table_name}
+ SET static_object_token_encrypted = cte_token
+ FROM cte
+ WHERE cte_id = id
+ SQL
+ end
+
+ mark_job_as_succeeded(start_id, end_id)
+ end
+ end
+
+ private
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
+ self.class.name.demodulize,
+ arguments
+ )
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/fix_vulnerability_occurrences_with_hashes_as_raw_metadata.rb b/lib/gitlab/background_migration/fix_vulnerability_occurrences_with_hashes_as_raw_metadata.rb
new file mode 100644
index 00000000000..2b049ea2d2f
--- /dev/null
+++ b/lib/gitlab/background_migration/fix_vulnerability_occurrences_with_hashes_as_raw_metadata.rb
@@ -0,0 +1,124 @@
+# frozen_string_literal: true
+
+require 'parser/ruby27'
+
+module Gitlab
+ module BackgroundMigration
+ # This migration fixes raw_metadata entries which have incorrectly been passed a Ruby Hash instead of JSON data.
+ class FixVulnerabilityOccurrencesWithHashesAsRawMetadata
+ CLUSTER_IMAGE_SCANNING_REPORT_TYPE = 7
+ GENERIC_REPORT_TYPE = 99
+
+ # Type error is used to handle unexpected types when parsing stringified hashes.
+ class TypeError < ::StandardError
+ attr_reader :message, :type
+
+ def initialize(message, type)
+ @message = message
+ @type = type
+ end
+ end
+
+ # Migration model namespace isolated from application code.
+ class Finding < ActiveRecord::Base
+ include EachBatch
+
+ self.table_name = 'vulnerability_occurrences'
+
+ scope :by_api_report_types, -> { where(report_type: [CLUSTER_IMAGE_SCANNING_REPORT_TYPE, GENERIC_REPORT_TYPE]) }
+ end
+
+ def perform(start_id, end_id)
+ Finding.by_api_report_types.where(id: start_id..end_id).each do |finding|
+ next if valid_json?(finding.raw_metadata)
+
+ metadata = hash_from_s(finding.raw_metadata)
+
+ finding.update(raw_metadata: metadata.to_json) if metadata
+ end
+ mark_job_as_succeeded(start_id, end_id)
+ end
+
+ def hash_from_s(str_hash)
+ ast = Parser::Ruby27.parse(str_hash)
+
+ unless ast.type == :hash
+ ::Gitlab::AppLogger.error(message: "expected raw_metadata to be a hash", type: ast.type)
+ return
+ end
+
+ parse_hash(ast)
+ rescue Parser::SyntaxError => e
+ ::Gitlab::AppLogger.error(message: "error parsing raw_metadata", error: e.message)
+ nil
+ rescue TypeError => e
+ ::Gitlab::AppLogger.error(message: "error parsing raw_metadata", error: e.message, type: e.type)
+ nil
+ end
+
+ private
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
+ 'FixVulnerabilityOccurrencesWithHashesAsRawMetadata',
+ arguments
+ )
+ end
+
+ def valid_json?(metadata)
+ Oj.load(metadata)
+ true
+ rescue Oj::ParseError, Encoding::UndefinedConversionError
+ false
+ end
+
+ def parse_hash(hash)
+ out = {}
+ hash.children.each do |node|
+ unless node.type == :pair
+ raise TypeError.new("expected child of hash to be a `pair`", node.type)
+ end
+
+ key, value = node.children
+
+ key = parse_key(key)
+ value = parse_value(value)
+
+ out[key] = value
+ end
+
+ out
+ end
+
+ def parse_key(key)
+ case key.type
+ when :sym, :str, :int
+ key.children.first
+ else
+ raise TypeError.new("expected key to be either symbol, string, or integer", key.type)
+ end
+ end
+
+ def parse_value(value)
+ case value.type
+ when :sym, :str, :int
+ value.children.first
+ # rubocop:disable Lint/BooleanSymbol
+ when :true
+ true
+ when :false
+ false
+ # rubocop:enable Lint/BooleanSymbol
+ when :nil
+ nil
+ when :array
+ value.children.map { |c| parse_value(c) }
+ when :hash
+ parse_hash(value)
+ else
+ raise TypeError.new("value of a pair was an unexpected type", value.type)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/job_coordinator.rb b/lib/gitlab/background_migration/job_coordinator.rb
index cfbe7167677..5dc77f935e3 100644
--- a/lib/gitlab/background_migration/job_coordinator.rb
+++ b/lib/gitlab/background_migration/job_coordinator.rb
@@ -36,6 +36,8 @@ module Gitlab
attr_reader :worker_class
+ delegate :minimum_interval, :perform_in, to: :worker_class
+
def queue
@queue ||= worker_class.sidekiq_options['queue']
end
@@ -79,7 +81,7 @@ module Gitlab
def perform(class_name, arguments)
with_shared_connection do
- migration_class_for(class_name).new.perform(*arguments)
+ migration_instance_for(class_name).perform(*arguments)
end
end
@@ -113,6 +115,16 @@ module Gitlab
enqueued_job?([retry_set], migration_class)
end
+ def migration_instance_for(class_name)
+ migration_class = migration_class_for(class_name)
+
+ if migration_class < Gitlab::BackgroundMigration::BaseJob
+ migration_class.new(connection: connection)
+ else
+ migration_class.new
+ end
+ end
+
def migration_class_for(class_name)
Gitlab::BackgroundMigration.const_get(class_name, false)
end
diff --git a/lib/gitlab/background_migration/migrate_legacy_artifacts.rb b/lib/gitlab/background_migration/migrate_legacy_artifacts.rb
deleted file mode 100644
index 23d99274232..00000000000
--- a/lib/gitlab/background_migration/migrate_legacy_artifacts.rb
+++ /dev/null
@@ -1,130 +0,0 @@
-# frozen_string_literal: true
-# rubocop:disable Metrics/ClassLength
-
-module Gitlab
- module BackgroundMigration
- ##
- # The class to migrate job artifacts from `ci_builds` to `ci_job_artifacts`
- class MigrateLegacyArtifacts
- FILE_LOCAL_STORE = 1 # equal to ObjectStorage::Store::LOCAL
- ARCHIVE_FILE_TYPE = 1 # equal to Ci::JobArtifact.file_types['archive']
- METADATA_FILE_TYPE = 2 # equal to Ci::JobArtifact.file_types['metadata']
- LEGACY_PATH_FILE_LOCATION = 1 # equal to Ci::JobArtifact.file_location['legacy_path']
-
- def perform(start_id, stop_id)
- ActiveRecord::Base.transaction do
- insert_archives(start_id, stop_id)
- insert_metadatas(start_id, stop_id)
- delete_legacy_artifacts(start_id, stop_id)
- end
- end
-
- private
-
- def insert_archives(start_id, stop_id)
- ActiveRecord::Base.connection.execute <<~SQL
- INSERT INTO
- ci_job_artifacts (
- project_id,
- job_id,
- expire_at,
- file_location,
- created_at,
- updated_at,
- file,
- size,
- file_store,
- file_type
- )
- SELECT
- project_id,
- id,
- artifacts_expire_at #{add_missing_db_timezone},
- #{LEGACY_PATH_FILE_LOCATION},
- created_at #{add_missing_db_timezone},
- created_at #{add_missing_db_timezone},
- artifacts_file,
- artifacts_size,
- COALESCE(artifacts_file_store, #{FILE_LOCAL_STORE}),
- #{ARCHIVE_FILE_TYPE}
- FROM
- ci_builds
- WHERE
- id BETWEEN #{start_id.to_i} AND #{stop_id.to_i}
- AND artifacts_file <> ''
- AND NOT EXISTS (
- SELECT
- 1
- FROM
- ci_job_artifacts
- WHERE
- ci_builds.id = ci_job_artifacts.job_id
- AND ci_job_artifacts.file_type = #{ARCHIVE_FILE_TYPE})
- SQL
- end
-
- def insert_metadatas(start_id, stop_id)
- ActiveRecord::Base.connection.execute <<~SQL
- INSERT INTO
- ci_job_artifacts (
- project_id,
- job_id,
- expire_at,
- file_location,
- created_at,
- updated_at,
- file,
- size,
- file_store,
- file_type
- )
- SELECT
- project_id,
- id,
- artifacts_expire_at #{add_missing_db_timezone},
- #{LEGACY_PATH_FILE_LOCATION},
- created_at #{add_missing_db_timezone},
- created_at #{add_missing_db_timezone},
- artifacts_metadata,
- NULL,
- COALESCE(artifacts_metadata_store, #{FILE_LOCAL_STORE}),
- #{METADATA_FILE_TYPE}
- FROM
- ci_builds
- WHERE
- id BETWEEN #{start_id.to_i} AND #{stop_id.to_i}
- AND artifacts_file <> ''
- AND artifacts_metadata <> ''
- AND NOT EXISTS (
- SELECT
- 1
- FROM
- ci_job_artifacts
- WHERE
- ci_builds.id = ci_job_artifacts.job_id
- AND ci_job_artifacts.file_type = #{METADATA_FILE_TYPE})
- SQL
- end
-
- def delete_legacy_artifacts(start_id, stop_id)
- ActiveRecord::Base.connection.execute <<~SQL
- UPDATE
- ci_builds
- SET
- artifacts_file = NULL,
- artifacts_file_store = NULL,
- artifacts_size = NULL,
- artifacts_metadata = NULL,
- artifacts_metadata_store = NULL
- WHERE
- id BETWEEN #{start_id.to_i} AND #{stop_id.to_i}
- AND artifacts_file <> ''
- SQL
- end
-
- def add_missing_db_timezone
- 'at time zone \'UTC\''
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/populate_test_reports_issue_id.rb b/lib/gitlab/background_migration/populate_test_reports_issue_id.rb
new file mode 100644
index 00000000000..301efd0c943
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_test_reports_issue_id.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+# rubocop: disable Style/Documentation
+
+module Gitlab
+ module BackgroundMigration
+ class PopulateTestReportsIssueId
+ def perform(start_id, stop_id)
+ # NO OP
+ end
+ end
+ end
+end
+
+Gitlab::BackgroundMigration::PopulateTestReportsIssueId.prepend_mod
diff --git a/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb b/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb
index 84ff7423254..c1b8de1f6aa 100644
--- a/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb
+++ b/lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: true
# rubocop: disable Style/Documentation
-class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
+class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid # rubocop:disable Metrics/ClassLength
# rubocop: disable Gitlab/NamespacedClass
class VulnerabilitiesIdentifier < ActiveRecord::Base
self.table_name = "vulnerability_identifiers"
@@ -9,10 +9,14 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
end
class VulnerabilitiesFinding < ActiveRecord::Base
+ include EachBatch
include ShaAttribute
self.table_name = "vulnerability_occurrences"
+
+ has_many :signatures, foreign_key: 'finding_id', class_name: 'VulnerabilityFindingSignature', inverse_of: :finding
belongs_to :primary_identifier, class_name: 'VulnerabilitiesIdentifier', inverse_of: :primary_findings, foreign_key: 'primary_identifier_id'
+
REPORT_TYPES = {
sast: 0,
dependency_scanning: 1,
@@ -20,7 +24,9 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
dast: 3,
secret_detection: 4,
coverage_fuzzing: 5,
- api_fuzzing: 6
+ api_fuzzing: 6,
+ cluster_image_scanning: 7,
+ generic: 99
}.with_indifferent_access.freeze
enum report_type: REPORT_TYPES
@@ -28,6 +34,25 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
sha_attribute :location_fingerprint
end
+ class VulnerabilityFindingSignature < ActiveRecord::Base
+ include ShaAttribute
+
+ self.table_name = 'vulnerability_finding_signatures'
+ belongs_to :finding, foreign_key: 'finding_id', inverse_of: :signatures, class_name: 'VulnerabilitiesFinding'
+
+ sha_attribute :signature_sha
+ end
+
+ class VulnerabilitiesFindingPipeline < ActiveRecord::Base
+ include EachBatch
+ self.table_name = "vulnerability_occurrence_pipelines"
+ end
+
+ class Vulnerability < ActiveRecord::Base
+ include EachBatch
+ self.table_name = "vulnerabilities"
+ end
+
class CalculateFindingUUID
FINDING_NAMESPACES_IDS = {
development: "a143e9e2-41b3-47bc-9a19-081d089229f4",
@@ -52,35 +77,122 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
end
# rubocop: enable Gitlab/NamespacedClass
+ # rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength
def perform(start_id, end_id)
- findings = VulnerabilitiesFinding
- .joins(:primary_identifier)
- .select(:id, :report_type, :fingerprint, :location_fingerprint, :project_id)
- .where(id: start_id..end_id)
-
- mappings = findings.each_with_object({}) do |finding, hash|
- hash[finding] = { uuid: calculate_uuid_v5_for_finding(finding) }
+ unless Feature.enabled?(:migrate_vulnerability_finding_uuids, default_enabled: true)
+ return log_info('Migration is disabled by the feature flag', start_id: start_id, end_id: end_id)
end
- ::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings)
+ log_info('Migration started', start_id: start_id, end_id: end_id)
- logger.info(message: 'RecalculateVulnerabilitiesOccurrencesUuid Migration: recalculation is done for:',
- finding_ids: mappings.keys.pluck(:id))
+ VulnerabilitiesFinding
+ .joins(:primary_identifier)
+ .includes(:signatures)
+ .select(:id, :report_type, :primary_identifier_id, :fingerprint, :location_fingerprint, :project_id, :created_at, :vulnerability_id, :uuid)
+ .where(id: start_id..end_id)
+ .each_batch(of: 50) do |relation|
+ duplicates = find_duplicates(relation)
+ remove_findings(ids: duplicates) if duplicates.present?
+
+ to_update = relation.reject { |finding| duplicates.include?(finding.id) }
+
+ begin
+ known_uuids = Set.new
+ to_be_deleted = []
+
+ mappings = to_update.each_with_object({}) do |finding, hash|
+ uuid = calculate_uuid_v5_for_finding(finding)
+
+ if known_uuids.add?(uuid)
+ hash[finding] = { uuid: uuid }
+ else
+ to_be_deleted << finding.id
+ end
+ end
+
+ # It is technically still possible to have duplicate uuids
+ # if the data integrity is broken somehow and the primary identifiers of
+ # the findings are pointing to different projects with the same fingerprint values.
+ if to_be_deleted.present?
+ log_info('Conflicting UUIDs found within the batch', finding_ids: to_be_deleted)
+
+ remove_findings(ids: to_be_deleted)
+ end
+
+ ::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings) if mappings.present?
+
+ log_info('Recalculation is done', finding_ids: mappings.keys.pluck(:id))
+ rescue ActiveRecord::RecordNotUnique => error
+ log_info('RecordNotUnique error received')
+
+ match_data = /\(uuid\)=\((?<uuid>\S{36})\)/.match(error.message)
+
+ # This exception returns the **correct** UUIDv5 which probably comes from a later record
+ # and it's the one we can drop in the easiest way before retrying the UPDATE query
+ if match_data
+ uuid = match_data[:uuid]
+ log_info('Conflicting UUID found', uuid: uuid)
+
+ id = VulnerabilitiesFinding.find_by(uuid: uuid)&.id
+ remove_findings(ids: id) if id
+ retry
+ else
+ log_error('Couldnt find conflicting uuid')
+
+ Gitlab::ErrorTracking.track_and_raise_exception(error)
+ end
+ end
+ end
mark_job_as_succeeded(start_id, end_id)
rescue StandardError => error
- Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
+ log_error('An exception happened')
+
+ Gitlab::ErrorTracking.track_and_raise_exception(error)
end
+ # rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength
private
+ def find_duplicates(relation)
+ to_exclude = []
+ relation.flat_map do |record|
+ # Assuming we're scanning id 31 and the duplicate is id 40
+ # first we'd process 31 and add 40 to the list of ids to remove
+ # then we would process record 40 and add 31 to the list of removals
+ # so we would drop both records
+ to_exclude << record.id
+
+ VulnerabilitiesFinding.where(
+ report_type: record.report_type,
+ location_fingerprint: record.location_fingerprint,
+ primary_identifier_id: record.primary_identifier_id,
+ project_id: record.project_id
+ ).where.not(id: to_exclude).pluck(:id)
+ end
+ end
+
+ def remove_findings(ids:)
+ ids = Array(ids)
+ log_info('Removing Findings and associated records', ids: ids)
+
+ vulnerability_ids = VulnerabilitiesFinding.where(id: ids).pluck(:vulnerability_id).uniq.compact
+
+ VulnerabilitiesFindingPipeline.where(occurrence_id: ids).each_batch { |batch| batch.delete_all }
+ Vulnerability.where(id: vulnerability_ids).each_batch { |batch| batch.delete_all }
+ VulnerabilitiesFinding.where(id: ids).delete_all
+ end
+
def calculate_uuid_v5_for_finding(vulnerability_finding)
return unless vulnerability_finding
+ signatures = vulnerability_finding.signatures.sort_by { |signature| signature.algorithm_type_before_type_cast }
+ location_fingerprint = signatures.last&.signature_sha || vulnerability_finding.location_fingerprint
+
uuid_v5_name_components = {
report_type: vulnerability_finding.report_type,
primary_identifier_fingerprint: vulnerability_finding.fingerprint,
- location_fingerprint: vulnerability_finding.location_fingerprint,
+ location_fingerprint: location_fingerprint,
project_id: vulnerability_finding.project_id
}
@@ -89,6 +201,14 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
CalculateFindingUUID.call(name)
end
+ def log_info(message, **extra)
+ logger.info(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra)
+ end
+
+ def log_error(message, **extra)
+ logger.error(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra)
+ end
+
def logger
@logger ||= Gitlab::BackgroundMigration::Logger.build
end
diff --git a/lib/gitlab/background_migration/remove_duplicate_services.rb b/lib/gitlab/background_migration/remove_duplicate_services.rb
deleted file mode 100644
index 59fb9143a72..00000000000
--- a/lib/gitlab/background_migration/remove_duplicate_services.rb
+++ /dev/null
@@ -1,58 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- # Remove duplicated service records with the same project and type.
- # These were created in the past for unknown reasons, and should be blocked
- # now by the uniqueness validation in the Service model.
- class RemoveDuplicateServices
- # See app/models/service
- class Service < ActiveRecord::Base
- include EachBatch
-
- self.table_name = 'services'
- self.inheritance_column = :_type_disabled
-
- scope :project_ids_with_duplicates, -> do
- select(:project_id)
- .distinct
- .where.not(project_id: nil)
- .group(:project_id, :type)
- .having('count(*) > 1')
- end
-
- scope :types_with_duplicates, -> (project_ids) do
- select(:project_id, :type)
- .where(project_id: project_ids)
- .group(:project_id, :type)
- .having('count(*) > 1')
- end
- end
-
- def perform(*project_ids)
- types_with_duplicates = Service.types_with_duplicates(project_ids).pluck(:project_id, :type)
-
- types_with_duplicates.each do |project_id, type|
- remove_duplicates(project_id, type)
- end
- end
-
- private
-
- def remove_duplicates(project_id, type)
- scope = Service.where(project_id: project_id, type: type)
-
- # Build a subquery to determine which service record is actually in use,
- # by querying for it without specifying an order.
- #
- # This should match the record returned by `Project#find_service`,
- # and the `has_one` service associations on `Project`.
- correct_service = scope.select(:id).limit(1)
-
- # Delete all other services with the same `project_id` and `type`
- duplicate_services = scope.where.not(id: correct_service)
- duplicate_services.delete_all
- end
- end
- end
-end