diff options
author | Robert Speicher <rspeicher@gmail.com> | 2021-01-20 22:34:23 +0300 |
---|---|---|
committer | Robert Speicher <rspeicher@gmail.com> | 2021-01-20 22:34:23 +0300 |
commit | 6438df3a1e0fb944485cebf07976160184697d72 (patch) | |
tree | 00b09bfd170e77ae9391b1a2f5a93ef6839f2597 /lib/gitlab/background_migration | |
parent | 42bcd54d971da7ef2854b896a7b34f4ef8601067 (diff) |
Add latest changes from gitlab-org/gitlab@13-8-stable-eev13.8.0-rc42
Diffstat (limited to 'lib/gitlab/background_migration')
4 files changed, 307 insertions, 0 deletions
diff --git a/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb b/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb new file mode 100644 index 00000000000..0a8c203421b --- /dev/null +++ b/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Backfill expire_at for a range of Ci::JobArtifact + class BackfillArtifactExpiryDate + include Gitlab::Utils::StrongMemoize + + BATCH_SIZE = 1_000 + DEFAULT_EXPIRATION_SWITCH_DATE = Date.new(2020, 6, 22).freeze + OLD_ARTIFACT_AGE = 15.months + OLD_ARTIFACT_EXPIRY_OFFSET = 3.months + RECENT_ARTIFACT_EXPIRY_OFFSET = 1.year + + # Ci::JobArtifact model + class Ci::JobArtifact < ActiveRecord::Base + include ::EachBatch + + self.table_name = 'ci_job_artifacts' + + scope :between, -> (start_id, end_id) { where(id: start_id..end_id) } + scope :before_default_expiration_switch, -> { where('created_at < ?', DEFAULT_EXPIRATION_SWITCH_DATE) } + scope :without_expiry_date, -> { where(expire_at: nil) } + scope :old, -> { where(self.arel_table[:created_at].lt(OLD_ARTIFACT_AGE.ago)) } + scope :recent, -> { where(self.arel_table[:created_at].gt(OLD_ARTIFACT_AGE.ago)) } + end + + def perform(start_id, end_id) + Ci::JobArtifact.between(start_id, end_id) + .without_expiry_date.before_default_expiration_switch + .each_batch(of: BATCH_SIZE) do |batch| + batch.old.update_all(expire_at: old_artifact_expiry_date) + batch.recent.update_all(expire_at: recent_artifact_expiry_date) + end + end + + private + + def offset_date + strong_memoize(:offset_date) do + current_date = Time.current + target_date = Time.zone.local(current_date.year, current_date.month, 22, 0, 0, 0) + + current_date.day < 22 ? target_date : target_date.next_month + end + end + + def old_artifact_expiry_date + offset_date + OLD_ARTIFACT_EXPIRY_OFFSET + end + + def recent_artifact_expiry_date + offset_date + RECENT_ARTIFACT_EXPIRY_OFFSET + end + end + end +end diff --git a/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb new file mode 100644 index 00000000000..16c0de39a3b --- /dev/null +++ b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Background migration that extends CopyColumn to update the value of a + # column using the value of another column in the same table. + # + # - The {start_id, end_id} arguments are at the start so that it can be used + # with `queue_background_migration_jobs_by_range_at_intervals` + # - Provides support for background job tracking through the use of + # Gitlab::Database::BackgroundMigrationJob + # - Uses sub-batching so that we can keep each update's execution time at + # low 100s ms, while being able to update more records per 2 minutes + # that we allow background migration jobs to be scheduled one after the other + # - We skip the NULL checks as they may result in not using an index scan + # - The table that is migrated does _not_ need `id` as the primary key + # We use the provided primary_key column to perform the update. + class CopyColumnUsingBackgroundMigrationJob + include Gitlab::Database::DynamicModelHelpers + + PAUSE_SECONDS = 0.1 + + # start_id - The start ID of the range of rows to update. + # end_id - The end ID of the range of rows to update. + # table - The name of the table that contains the columns. + # primary_key - The primary key column of the table. + # copy_from - The column containing the data to copy. + # copy_to - The column to copy the data to. + # sub_batch_size - We don't want updates to take more than ~100ms + # This allows us to run multiple smaller batches during + # the minimum 2.minute interval that we can schedule jobs + def perform(start_id, end_id, table, primary_key, copy_from, copy_to, sub_batch_size) + quoted_copy_from = connection.quote_column_name(copy_from) + quoted_copy_to = connection.quote_column_name(copy_to) + + parent_batch_relation = relation_scoped_to_range(table, primary_key, start_id, end_id) + + parent_batch_relation.each_batch(column: primary_key, of: sub_batch_size) do |sub_batch| + sub_batch.update_all("#{quoted_copy_to}=#{quoted_copy_from}") + + sleep(PAUSE_SECONDS) + end + + # We have to add all arguments when marking a job as succeeded as they + # are all used to track the job by `queue_background_migration_jobs_by_range_at_intervals` + mark_job_as_succeeded(start_id, end_id, table, primary_key, copy_from, copy_to, sub_batch_size) + end + + private + + def connection + ActiveRecord::Base.connection + end + + def mark_job_as_succeeded(*arguments) + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(self.class.name, arguments) + end + + def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id) + define_batchable_model(source_table).where(source_key_column => start_id..stop_id) + end + end + end +end diff --git a/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb b/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb new file mode 100644 index 00000000000..52b09e07fd5 --- /dev/null +++ b/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # This class populates the `finding_uuid` attribute for + # the existing `vulnerability_feedback` records. + class PopulateFindingUuidForVulnerabilityFeedback + REPORT_TYPES = { + sast: 0, + dependency_scanning: 1, + container_scanning: 2, + dast: 3, + secret_detection: 4, + coverage_fuzzing: 5, + api_fuzzing: 6 + }.freeze + + class VulnerabilityFeedback < ActiveRecord::Base # rubocop:disable Style/Documentation + include EachBatch + + self.table_name = 'vulnerability_feedback' + + enum category: REPORT_TYPES + + scope :in_range, -> (start, stop) { where(id: start..stop) } + scope :without_uuid, -> { where(finding_uuid: nil) } + + def self.load_vulnerability_findings + all.to_a.tap { |collection| collection.each(&:vulnerability_finding) } + end + + def set_finding_uuid + return unless vulnerability_finding.present? && vulnerability_finding.primary_identifier.present? + + update_column(:finding_uuid, calculated_uuid) + rescue StandardError => error + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error) + end + + def vulnerability_finding + BatchLoader.for(finding_key).batch(replace_methods: false) do |finding_keys, loader| + project_ids = finding_keys.map { |key| key[:project_id] } + categories = finding_keys.map { |key| key[:category] } + fingerprints = finding_keys.map { |key| key[:project_fingerprint] } + + findings = Finding.with_primary_identifier.where( + project_id: project_ids.uniq, + report_type: categories.uniq, + project_fingerprint: fingerprints.uniq + ).to_a + + finding_keys.each do |finding_key| + loader.call( + finding_key, + findings.find { |f| finding_key == f.finding_key } + ) + end + end + end + + private + + def calculated_uuid + Gitlab::UUID.v5(uuid_components) + end + + def uuid_components + [ + category, + vulnerability_finding.primary_identifier.fingerprint, + vulnerability_finding.location_fingerprint, + project_id + ].join('-') + end + + def finding_key + { + project_id: project_id, + category: category, + project_fingerprint: project_fingerprint + } + end + end + + class Finding < ActiveRecord::Base # rubocop:disable Style/Documentation + include ShaAttribute + + self.table_name = 'vulnerability_occurrences' + + sha_attribute :project_fingerprint + sha_attribute :location_fingerprint + + belongs_to :primary_identifier, class_name: 'Gitlab::BackgroundMigration::PopulateFindingUuidForVulnerabilityFeedback::Identifier' + + enum report_type: REPORT_TYPES + + scope :with_primary_identifier, -> { includes(:primary_identifier) } + + def finding_key + { + project_id: project_id, + category: report_type, + project_fingerprint: project_fingerprint + } + end + end + + class Identifier < ActiveRecord::Base # rubocop:disable Style/Documentation + self.table_name = 'vulnerability_identifiers' + end + + def perform(*range) + feedback = VulnerabilityFeedback.without_uuid.in_range(*range).load_vulnerability_findings + feedback.each(&:set_finding_uuid) + + log_info(feedback.count) + end + + def log_info(feedback_count) + ::Gitlab::BackgroundMigration::Logger.info( + migrator: self.class.name, + message: '`finding_uuid` attributes has been set', + count: feedback_count + ) + end + end + end +end diff --git a/lib/gitlab/background_migration/remove_duplicate_services.rb b/lib/gitlab/background_migration/remove_duplicate_services.rb new file mode 100644 index 00000000000..59fb9143a72 --- /dev/null +++ b/lib/gitlab/background_migration/remove_duplicate_services.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Remove duplicated service records with the same project and type. + # These were created in the past for unknown reasons, and should be blocked + # now by the uniqueness validation in the Service model. + class RemoveDuplicateServices + # See app/models/service + class Service < ActiveRecord::Base + include EachBatch + + self.table_name = 'services' + self.inheritance_column = :_type_disabled + + scope :project_ids_with_duplicates, -> do + select(:project_id) + .distinct + .where.not(project_id: nil) + .group(:project_id, :type) + .having('count(*) > 1') + end + + scope :types_with_duplicates, -> (project_ids) do + select(:project_id, :type) + .where(project_id: project_ids) + .group(:project_id, :type) + .having('count(*) > 1') + end + end + + def perform(*project_ids) + types_with_duplicates = Service.types_with_duplicates(project_ids).pluck(:project_id, :type) + + types_with_duplicates.each do |project_id, type| + remove_duplicates(project_id, type) + end + end + + private + + def remove_duplicates(project_id, type) + scope = Service.where(project_id: project_id, type: type) + + # Build a subquery to determine which service record is actually in use, + # by querying for it without specifying an order. + # + # This should match the record returned by `Project#find_service`, + # and the `has_one` service associations on `Project`. + correct_service = scope.select(:id).limit(1) + + # Delete all other services with the same `project_id` and `type` + duplicate_services = scope.where.not(id: correct_service) + duplicate_services.delete_all + end + end + end +end |