Add latest changes from gitlab-org/gitlab@13-8-stable-eev13.8.0-rc42

author: Robert Speicher <rspeicher@gmail.com> 2021-01-20 22:34:23 +0300
committer: Robert Speicher <rspeicher@gmail.com> 2021-01-20 22:34:23 +0300
commit: 6438df3a1e0fb944485cebf07976160184697d72 (patch)
tree: 00b09bfd170e77ae9391b1a2f5a93ef6839f2597 /lib/gitlab/background_migration
parent: 42bcd54d971da7ef2854b896a7b34f4ef8601067 (diff)
4 files changed, 307 insertions, 0 deletions
diff --git a/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb b/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb
new file mode 100644
index 00000000000..0a8c203421b
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_artifact_expiry_date.rb
@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module BackgroundMigration
+    # Backfill expire_at for a range of Ci::JobArtifact
+    class BackfillArtifactExpiryDate
+      include Gitlab::Utils::StrongMemoize
+
+      BATCH_SIZE = 1_000
+      DEFAULT_EXPIRATION_SWITCH_DATE = Date.new(2020, 6, 22).freeze
+      OLD_ARTIFACT_AGE = 15.months
+      OLD_ARTIFACT_EXPIRY_OFFSET = 3.months
+      RECENT_ARTIFACT_EXPIRY_OFFSET = 1.year
+
+      # Ci::JobArtifact model
+      class Ci::JobArtifact < ActiveRecord::Base
+        include ::EachBatch
+
+        self.table_name = 'ci_job_artifacts'
+
+        scope :between, -> (start_id, end_id) { where(id: start_id..end_id) }
+        scope :before_default_expiration_switch, -> { where('created_at < ?', DEFAULT_EXPIRATION_SWITCH_DATE) }
+        scope :without_expiry_date, -> { where(expire_at: nil) }
+        scope :old, -> { where(self.arel_table[:created_at].lt(OLD_ARTIFACT_AGE.ago)) }
+        scope :recent, -> { where(self.arel_table[:created_at].gt(OLD_ARTIFACT_AGE.ago)) }
+      end
+
+      def perform(start_id, end_id)
+        Ci::JobArtifact.between(start_id, end_id)
+          .without_expiry_date.before_default_expiration_switch
+          .each_batch(of: BATCH_SIZE) do |batch|
+          batch.old.update_all(expire_at: old_artifact_expiry_date)
+          batch.recent.update_all(expire_at: recent_artifact_expiry_date)
+        end
+      end
+
+      private
+
+      def offset_date
+        strong_memoize(:offset_date) do
+          current_date = Time.current
+          target_date = Time.zone.local(current_date.year, current_date.month, 22, 0, 0, 0)
+
+          current_date.day < 22 ? target_date : target_date.next_month
+        end
+      end
+
+      def old_artifact_expiry_date
+        offset_date + OLD_ARTIFACT_EXPIRY_OFFSET
+      end
+
+      def recent_artifact_expiry_date
+        offset_date + RECENT_ARTIFACT_EXPIRY_OFFSET
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
new file mode 100644
index 00000000000..16c0de39a3b
--- /dev/null
+++ b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module BackgroundMigration
+    # Background migration that extends CopyColumn to update the value of a
+    # column using the value of another column in the same table.
+    #
+    # - The {start_id, end_id} arguments are at the start so that it can be used
+    #   with `queue_background_migration_jobs_by_range_at_intervals`
+    # - Provides support for background job tracking through the use of
+    #   Gitlab::Database::BackgroundMigrationJob
+    # - Uses sub-batching so that we can keep each update's execution time at
+    #   low 100s ms, while being able to update more records per 2 minutes
+    #   that we allow background migration jobs to be scheduled one after the other
+    # - We skip the NULL checks as they may result in not using an index scan
+    # - The table that is migrated does _not_ need `id` as the primary key
+    #   We use the provided primary_key column to perform the update.
+    class CopyColumnUsingBackgroundMigrationJob
+      include Gitlab::Database::DynamicModelHelpers
+
+      PAUSE_SECONDS = 0.1
+
+      # start_id - The start ID of the range of rows to update.
+      # end_id - The end ID of the range of rows to update.
+      # table - The name of the table that contains the columns.
+      # primary_key - The primary key column of the table.
+      # copy_from - The column containing the data to copy.
+      # copy_to - The column to copy the data to.
+      # sub_batch_size - We don't want updates to take more than ~100ms
+      #                  This allows us to run multiple smaller batches during
+      #                  the minimum 2.minute interval that we can schedule jobs
+      def perform(start_id, end_id, table, primary_key, copy_from, copy_to, sub_batch_size)
+        quoted_copy_from = connection.quote_column_name(copy_from)
+        quoted_copy_to = connection.quote_column_name(copy_to)
+
+        parent_batch_relation = relation_scoped_to_range(table, primary_key, start_id, end_id)
+
+        parent_batch_relation.each_batch(column: primary_key, of: sub_batch_size) do |sub_batch|
+          sub_batch.update_all("#{quoted_copy_to}=#{quoted_copy_from}")
+
+          sleep(PAUSE_SECONDS)
+        end
+
+        # We have to add all arguments when marking a job as succeeded as they
+        #  are all used to track the job by `queue_background_migration_jobs_by_range_at_intervals`
+        mark_job_as_succeeded(start_id, end_id, table, primary_key, copy_from, copy_to, sub_batch_size)
+      end
+
+      private
+
+      def connection
+        ActiveRecord::Base.connection
+      end
+
+      def mark_job_as_succeeded(*arguments)
+        Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(self.class.name, arguments)
+      end
+
+      def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id)
+        define_batchable_model(source_table).where(source_key_column => start_id..stop_id)
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb b/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb
new file mode 100644
index 00000000000..52b09e07fd5
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb
@@ -0,0 +1,128 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module BackgroundMigration
+    # This class populates the `finding_uuid` attribute for
+    # the existing `vulnerability_feedback` records.
+    class PopulateFindingUuidForVulnerabilityFeedback
+      REPORT_TYPES = {
+        sast: 0,
+        dependency_scanning: 1,
+        container_scanning: 2,
+        dast: 3,
+        secret_detection: 4,
+        coverage_fuzzing: 5,
+        api_fuzzing: 6
+      }.freeze
+
+      class VulnerabilityFeedback < ActiveRecord::Base # rubocop:disable Style/Documentation
+        include EachBatch
+
+        self.table_name = 'vulnerability_feedback'
+
+        enum category: REPORT_TYPES
+
+        scope :in_range, -> (start, stop) { where(id: start..stop) }
+        scope :without_uuid, -> { where(finding_uuid: nil) }
+
+        def self.load_vulnerability_findings
+          all.to_a.tap { |collection| collection.each(&:vulnerability_finding) }
+        end
+
+        def set_finding_uuid
+          return unless vulnerability_finding.present? && vulnerability_finding.primary_identifier.present?
+
+          update_column(:finding_uuid, calculated_uuid)
+        rescue StandardError => error
+          Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
+        end
+
+        def vulnerability_finding
+          BatchLoader.for(finding_key).batch(replace_methods: false) do |finding_keys, loader|
+            project_ids = finding_keys.map { |key| key[:project_id] }
+            categories = finding_keys.map { |key| key[:category] }
+            fingerprints = finding_keys.map { |key| key[:project_fingerprint] }
+
+            findings = Finding.with_primary_identifier.where(
+              project_id: project_ids.uniq,
+              report_type: categories.uniq,
+              project_fingerprint: fingerprints.uniq
+            ).to_a
+
+            finding_keys.each do |finding_key|
+              loader.call(
+                finding_key,
+                findings.find { |f| finding_key == f.finding_key }
+              )
+            end
+          end
+        end
+
+        private
+
+        def calculated_uuid
+          Gitlab::UUID.v5(uuid_components)
+        end
+
+        def uuid_components
+          [
+            category,
+            vulnerability_finding.primary_identifier.fingerprint,
+            vulnerability_finding.location_fingerprint,
+            project_id
+          ].join('-')
+        end
+
+        def finding_key
+          {
+            project_id: project_id,
+            category: category,
+            project_fingerprint: project_fingerprint
+          }
+        end
+      end
+
+      class Finding < ActiveRecord::Base # rubocop:disable Style/Documentation
+        include ShaAttribute
+
+        self.table_name = 'vulnerability_occurrences'
+
+        sha_attribute :project_fingerprint
+        sha_attribute :location_fingerprint
+
+        belongs_to :primary_identifier, class_name: 'Gitlab::BackgroundMigration::PopulateFindingUuidForVulnerabilityFeedback::Identifier'
+
+        enum report_type: REPORT_TYPES
+
+        scope :with_primary_identifier, -> { includes(:primary_identifier) }
+
+        def finding_key
+          {
+            project_id: project_id,
+            category: report_type,
+            project_fingerprint: project_fingerprint
+          }
+        end
+      end
+
+      class Identifier < ActiveRecord::Base # rubocop:disable Style/Documentation
+        self.table_name = 'vulnerability_identifiers'
+      end
+
+      def perform(*range)
+        feedback = VulnerabilityFeedback.without_uuid.in_range(*range).load_vulnerability_findings
+        feedback.each(&:set_finding_uuid)
+
+        log_info(feedback.count)
+      end
+
+      def log_info(feedback_count)
+        ::Gitlab::BackgroundMigration::Logger.info(
+          migrator: self.class.name,
+          message: '`finding_uuid` attributes has been set',
+          count: feedback_count
+        )
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/background_migration/remove_duplicate_services.rb b/lib/gitlab/background_migration/remove_duplicate_services.rb
new file mode 100644
index 00000000000..59fb9143a72
--- /dev/null
+++ b/lib/gitlab/background_migration/remove_duplicate_services.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module BackgroundMigration
+    # Remove duplicated service records with the same project and type.
+    # These were created in the past for unknown reasons, and should be blocked
+    # now by the uniqueness validation in the Service model.
+    class RemoveDuplicateServices
+      # See app/models/service
+      class Service < ActiveRecord::Base
+        include EachBatch
+
+        self.table_name = 'services'
+        self.inheritance_column = :_type_disabled
+
+        scope :project_ids_with_duplicates, -> do
+          select(:project_id)
+            .distinct
+            .where.not(project_id: nil)
+            .group(:project_id, :type)
+            .having('count(*) > 1')
+        end
+
+        scope :types_with_duplicates, -> (project_ids) do
+          select(:project_id, :type)
+            .where(project_id: project_ids)
+            .group(:project_id, :type)
+            .having('count(*) > 1')
+        end
+      end
+
+      def perform(*project_ids)
+        types_with_duplicates = Service.types_with_duplicates(project_ids).pluck(:project_id, :type)
+
+        types_with_duplicates.each do |project_id, type|
+          remove_duplicates(project_id, type)
+        end
+      end
+
+      private
+
+      def remove_duplicates(project_id, type)
+        scope = Service.where(project_id: project_id, type: type)
+
+        # Build a subquery to determine which service record is actually in use,
+        # by querying for it without specifying an order.
+        #
+        # This should match the record returned by `Project#find_service`,
+        # and the `has_one` service associations on `Project`.
+        correct_service = scope.select(:id).limit(1)
+
+        # Delete all other services with the same `project_id` and `type`
+        duplicate_services = scope.where.not(id: correct_service)
+        duplicate_services.delete_all
+      end
+    end
+  end
+end
author	Robert Speicher <rspeicher@gmail.com>	2021-01-20 22:34:23 +0300
committer	Robert Speicher <rspeicher@gmail.com>	2021-01-20 22:34:23 +0300
commit	6438df3a1e0fb944485cebf07976160184697d72 (patch)
tree	00b09bfd170e77ae9391b1a2f5a93ef6839f2597 /lib/gitlab/background_migration
parent	42bcd54d971da7ef2854b896a7b34f4ef8601067 (diff)