Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Speicher <rspeicher@gmail.com>2021-01-20 22:34:23 +0300
committerRobert Speicher <rspeicher@gmail.com>2021-01-20 22:34:23 +0300
commit6438df3a1e0fb944485cebf07976160184697d72 (patch)
tree00b09bfd170e77ae9391b1a2f5a93ef6839f2597 /lib/gitlab/background_migration
parent42bcd54d971da7ef2854b896a7b34f4ef8601067 (diff)
Add latest changes from gitlab-org/gitlab@13-8-stable-eev13.8.0-rc42
Diffstat (limited to 'lib/gitlab/background_migration')
-rw-r--r--lib/gitlab/background_migration/backfill_artifact_expiry_date.rb57
-rw-r--r--lib/gitlab/background_migration/copy_column_using_background_migration_job.rb64
-rw-r--r--lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb128
-rw-r--r--lib/gitlab/background_migration/remove_duplicate_services.rb58
4 files changed, 307 insertions, 0 deletions
diff --git a/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb b/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb
new file mode 100644
index 00000000000..0a8c203421b
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb
@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Backfill expire_at for a range of Ci::JobArtifact
+ class BackfillArtifactExpiryDate
+ include Gitlab::Utils::StrongMemoize
+
+ BATCH_SIZE = 1_000
+ DEFAULT_EXPIRATION_SWITCH_DATE = Date.new(2020, 6, 22).freeze
+ OLD_ARTIFACT_AGE = 15.months
+ OLD_ARTIFACT_EXPIRY_OFFSET = 3.months
+ RECENT_ARTIFACT_EXPIRY_OFFSET = 1.year
+
+ # Ci::JobArtifact model
+ class Ci::JobArtifact < ActiveRecord::Base
+ include ::EachBatch
+
+ self.table_name = 'ci_job_artifacts'
+
+ scope :between, -> (start_id, end_id) { where(id: start_id..end_id) }
+ scope :before_default_expiration_switch, -> { where('created_at < ?', DEFAULT_EXPIRATION_SWITCH_DATE) }
+ scope :without_expiry_date, -> { where(expire_at: nil) }
+ scope :old, -> { where(self.arel_table[:created_at].lt(OLD_ARTIFACT_AGE.ago)) }
+ scope :recent, -> { where(self.arel_table[:created_at].gt(OLD_ARTIFACT_AGE.ago)) }
+ end
+
+ def perform(start_id, end_id)
+ Ci::JobArtifact.between(start_id, end_id)
+ .without_expiry_date.before_default_expiration_switch
+ .each_batch(of: BATCH_SIZE) do |batch|
+ batch.old.update_all(expire_at: old_artifact_expiry_date)
+ batch.recent.update_all(expire_at: recent_artifact_expiry_date)
+ end
+ end
+
+ private
+
+ def offset_date
+ strong_memoize(:offset_date) do
+ current_date = Time.current
+ target_date = Time.zone.local(current_date.year, current_date.month, 22, 0, 0, 0)
+
+ current_date.day < 22 ? target_date : target_date.next_month
+ end
+ end
+
+ def old_artifact_expiry_date
+ offset_date + OLD_ARTIFACT_EXPIRY_OFFSET
+ end
+
+ def recent_artifact_expiry_date
+ offset_date + RECENT_ARTIFACT_EXPIRY_OFFSET
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
new file mode 100644
index 00000000000..16c0de39a3b
--- /dev/null
+++ b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Background migration that extends CopyColumn to update the value of a
+ # column using the value of another column in the same table.
+ #
+ # - The {start_id, end_id} arguments are at the start so that it can be used
+ # with `queue_background_migration_jobs_by_range_at_intervals`
+ # - Provides support for background job tracking through the use of
+ # Gitlab::Database::BackgroundMigrationJob
+ # - Uses sub-batching so that we can keep each update's execution time at
+ # low 100s ms, while being able to update more records per 2 minutes
+ # that we allow background migration jobs to be scheduled one after the other
+ # - We skip the NULL checks as they may result in not using an index scan
+ # - The table that is migrated does _not_ need `id` as the primary key
+ # We use the provided primary_key column to perform the update.
+ class CopyColumnUsingBackgroundMigrationJob
+ include Gitlab::Database::DynamicModelHelpers
+
+ PAUSE_SECONDS = 0.1
+
+ # start_id - The start ID of the range of rows to update.
+ # end_id - The end ID of the range of rows to update.
+ # table - The name of the table that contains the columns.
+ # primary_key - The primary key column of the table.
+ # copy_from - The column containing the data to copy.
+ # copy_to - The column to copy the data to.
+ # sub_batch_size - We don't want updates to take more than ~100ms
+ # This allows us to run multiple smaller batches during
+ # the minimum 2.minute interval that we can schedule jobs
+ def perform(start_id, end_id, table, primary_key, copy_from, copy_to, sub_batch_size)
+ quoted_copy_from = connection.quote_column_name(copy_from)
+ quoted_copy_to = connection.quote_column_name(copy_to)
+
+ parent_batch_relation = relation_scoped_to_range(table, primary_key, start_id, end_id)
+
+ parent_batch_relation.each_batch(column: primary_key, of: sub_batch_size) do |sub_batch|
+ sub_batch.update_all("#{quoted_copy_to}=#{quoted_copy_from}")
+
+ sleep(PAUSE_SECONDS)
+ end
+
+ # We have to add all arguments when marking a job as succeeded as they
+ # are all used to track the job by `queue_background_migration_jobs_by_range_at_intervals`
+ mark_job_as_succeeded(start_id, end_id, table, primary_key, copy_from, copy_to, sub_batch_size)
+ end
+
+ private
+
+ def connection
+ ActiveRecord::Base.connection
+ end
+
+ def mark_job_as_succeeded(*arguments)
+ Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(self.class.name, arguments)
+ end
+
+ def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id)
+ define_batchable_model(source_table).where(source_key_column => start_id..stop_id)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb b/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb
new file mode 100644
index 00000000000..52b09e07fd5
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb
@@ -0,0 +1,128 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # This class populates the `finding_uuid` attribute for
+ # the existing `vulnerability_feedback` records.
+ class PopulateFindingUuidForVulnerabilityFeedback
+ REPORT_TYPES = {
+ sast: 0,
+ dependency_scanning: 1,
+ container_scanning: 2,
+ dast: 3,
+ secret_detection: 4,
+ coverage_fuzzing: 5,
+ api_fuzzing: 6
+ }.freeze
+
+ class VulnerabilityFeedback < ActiveRecord::Base # rubocop:disable Style/Documentation
+ include EachBatch
+
+ self.table_name = 'vulnerability_feedback'
+
+ enum category: REPORT_TYPES
+
+ scope :in_range, -> (start, stop) { where(id: start..stop) }
+ scope :without_uuid, -> { where(finding_uuid: nil) }
+
+ def self.load_vulnerability_findings
+ all.to_a.tap { |collection| collection.each(&:vulnerability_finding) }
+ end
+
+ def set_finding_uuid
+ return unless vulnerability_finding.present? && vulnerability_finding.primary_identifier.present?
+
+ update_column(:finding_uuid, calculated_uuid)
+ rescue StandardError => error
+ Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
+ end
+
+ def vulnerability_finding
+ BatchLoader.for(finding_key).batch(replace_methods: false) do |finding_keys, loader|
+ project_ids = finding_keys.map { |key| key[:project_id] }
+ categories = finding_keys.map { |key| key[:category] }
+ fingerprints = finding_keys.map { |key| key[:project_fingerprint] }
+
+ findings = Finding.with_primary_identifier.where(
+ project_id: project_ids.uniq,
+ report_type: categories.uniq,
+ project_fingerprint: fingerprints.uniq
+ ).to_a
+
+ finding_keys.each do |finding_key|
+ loader.call(
+ finding_key,
+ findings.find { |f| finding_key == f.finding_key }
+ )
+ end
+ end
+ end
+
+ private
+
+ def calculated_uuid
+ Gitlab::UUID.v5(uuid_components)
+ end
+
+ def uuid_components
+ [
+ category,
+ vulnerability_finding.primary_identifier.fingerprint,
+ vulnerability_finding.location_fingerprint,
+ project_id
+ ].join('-')
+ end
+
+ def finding_key
+ {
+ project_id: project_id,
+ category: category,
+ project_fingerprint: project_fingerprint
+ }
+ end
+ end
+
+ class Finding < ActiveRecord::Base # rubocop:disable Style/Documentation
+ include ShaAttribute
+
+ self.table_name = 'vulnerability_occurrences'
+
+ sha_attribute :project_fingerprint
+ sha_attribute :location_fingerprint
+
+ belongs_to :primary_identifier, class_name: 'Gitlab::BackgroundMigration::PopulateFindingUuidForVulnerabilityFeedback::Identifier'
+
+ enum report_type: REPORT_TYPES
+
+ scope :with_primary_identifier, -> { includes(:primary_identifier) }
+
+ def finding_key
+ {
+ project_id: project_id,
+ category: report_type,
+ project_fingerprint: project_fingerprint
+ }
+ end
+ end
+
+ class Identifier < ActiveRecord::Base # rubocop:disable Style/Documentation
+ self.table_name = 'vulnerability_identifiers'
+ end
+
+ def perform(*range)
+ feedback = VulnerabilityFeedback.without_uuid.in_range(*range).load_vulnerability_findings
+ feedback.each(&:set_finding_uuid)
+
+ log_info(feedback.count)
+ end
+
+ def log_info(feedback_count)
+ ::Gitlab::BackgroundMigration::Logger.info(
+ migrator: self.class.name,
+ message: '`finding_uuid` attributes has been set',
+ count: feedback_count
+ )
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/remove_duplicate_services.rb b/lib/gitlab/background_migration/remove_duplicate_services.rb
new file mode 100644
index 00000000000..59fb9143a72
--- /dev/null
+++ b/lib/gitlab/background_migration/remove_duplicate_services.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Remove duplicated service records with the same project and type.
+ # These were created in the past for unknown reasons, and should be blocked
+ # now by the uniqueness validation in the Service model.
+ class RemoveDuplicateServices
+ # See app/models/service
+ class Service < ActiveRecord::Base
+ include EachBatch
+
+ self.table_name = 'services'
+ self.inheritance_column = :_type_disabled
+
+ scope :project_ids_with_duplicates, -> do
+ select(:project_id)
+ .distinct
+ .where.not(project_id: nil)
+ .group(:project_id, :type)
+ .having('count(*) > 1')
+ end
+
+ scope :types_with_duplicates, -> (project_ids) do
+ select(:project_id, :type)
+ .where(project_id: project_ids)
+ .group(:project_id, :type)
+ .having('count(*) > 1')
+ end
+ end
+
+ def perform(*project_ids)
+ types_with_duplicates = Service.types_with_duplicates(project_ids).pluck(:project_id, :type)
+
+ types_with_duplicates.each do |project_id, type|
+ remove_duplicates(project_id, type)
+ end
+ end
+
+ private
+
+ def remove_duplicates(project_id, type)
+ scope = Service.where(project_id: project_id, type: type)
+
+ # Build a subquery to determine which service record is actually in use,
+ # by querying for it without specifying an order.
+ #
+ # This should match the record returned by `Project#find_service`,
+ # and the `has_one` service associations on `Project`.
+ correct_service = scope.select(:id).limit(1)
+
+ # Delete all other services with the same `project_id` and `type`
+ duplicate_services = scope.where.not(id: correct_service)
+ duplicate_services.delete_all
+ end
+ end
+ end
+end