diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2022-06-20 14:10:13 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2022-06-20 14:10:13 +0300 |
commit | 0ea3fcec397b69815975647f5e2aa5fe944a8486 (patch) | |
tree | 7979381b89d26011bcf9bdc989a40fcc2f1ed4ff /lib/gitlab/background_migration | |
parent | 72123183a20411a36d607d70b12d57c484394c8e (diff) |
Add latest changes from gitlab-org/gitlab@15-1-stable-eev15.1.0-rc42
Diffstat (limited to 'lib/gitlab/background_migration')
9 files changed, 261 insertions, 143 deletions
diff --git a/lib/gitlab/background_migration/backfill_project_feature_package_registry_access_level.rb b/lib/gitlab/background_migration/backfill_project_feature_package_registry_access_level.rb new file mode 100644 index 00000000000..814f5a897a9 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_project_feature_package_registry_access_level.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Backfill projectfeatures.package_registry_access_level depending on projects.packages_enabled + class BackfillProjectFeaturePackageRegistryAccessLevel < ::Gitlab::BackgroundMigration::BatchedMigrationJob + FEATURE_DISABLED = 0 # ProjectFeature::DISABLED + FEATURE_PRIVATE = 10 # ProjectFeature::PRIVATE + FEATURE_ENABLED = 20 # ProjectFeature::ENABLED + FEATURE_PUBLIC = 30 # ProjectFeature::PUBLIC + PROJECT_PRIVATE = 0 # Gitlab::VisibilityLevel::PRIVATE + PROJECT_INTERNAL = 10 # Gitlab::VisibilityLevel::INTERNAL + PROJECT_PUBLIC = 20 # Gitlab::VisibilityLevel::PUBLIC + + # Migration only version of ProjectFeature table + class ProjectFeature < ::ApplicationRecord + self.table_name = 'project_features' + end + + def perform + each_sub_batch(operation_name: :update_all) do |sub_batch| + ProjectFeature.connection.execute( + <<~SQL + UPDATE project_features pf + SET package_registry_access_level = (CASE p.packages_enabled + WHEN true THEN (CASE p.visibility_level + WHEN #{PROJECT_PUBLIC} THEN #{FEATURE_PUBLIC} + WHEN #{PROJECT_INTERNAL} THEN #{FEATURE_ENABLED} + WHEN #{PROJECT_PRIVATE} THEN #{FEATURE_PRIVATE} + END) + WHEN false THEN #{FEATURE_DISABLED} + ELSE #{FEATURE_DISABLED} + END) + FROM projects p + WHERE pf.project_id = p.id AND + pf.project_id BETWEEN #{start_id} AND #{end_id} + SQL + ) + end + end + end + end +end diff --git a/lib/gitlab/background_migration/backfill_project_member_namespace_id.rb b/lib/gitlab/background_migration/backfill_project_member_namespace_id.rb new file mode 100644 index 00000000000..c2e37269b5e --- /dev/null +++ b/lib/gitlab/background_migration/backfill_project_member_namespace_id.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Backfills the `members.member_namespace_id` column for `type=ProjectMember` + class BackfillProjectMemberNamespaceId < Gitlab::BackgroundMigration::BatchedMigrationJob + def perform + parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id) + + parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size, order_hint: :type) do |sub_batch| + batch_metrics.time_operation(:update_all) do + # rubocop:disable Layout/LineLength + sub_batch.update_all('member_namespace_id = (SELECT projects.project_namespace_id FROM projects WHERE projects.id = source_id)') + # rubocop:enable Layout/LineLength + end + + pause_ms_value = [0, pause_ms].max + sleep(pause_ms_value * 0.001) + end + end + + def batch_metrics + @batch_metrics ||= Gitlab::Database::BackgroundMigration::BatchMetrics.new + end + + private + + def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id) + define_batchable_model(source_table, connection: ApplicationRecord.connection) + .where(source_key_column => start_id..stop_id) + .joins('INNER JOIN projects ON members.source_id = projects.id') + .where(type: 'ProjectMember', source_type: 'Project') + .where(member_namespace_id: nil) + end + end + end +end diff --git a/lib/gitlab/background_migration/cleanup_orphaned_routes.rb b/lib/gitlab/background_migration/cleanup_orphaned_routes.rb new file mode 100644 index 00000000000..0cd19dc5df9 --- /dev/null +++ b/lib/gitlab/background_migration/cleanup_orphaned_routes.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Removes orphaned routes, i.e. routes that reference a namespace or project that no longer exists. + # This was possible since we were using a polymorphic association source_id, source_type. However since now + # we have project namespaces we can use a FK on routes#namespace_id to avoid orphaned records in routes. + class CleanupOrphanedRoutes < Gitlab::BackgroundMigration::BatchedMigrationJob + include Gitlab::Database::DynamicModelHelpers + + def perform + # there should really be no records to fix, there is none gitlab.com, but taking the safer route, just in case. + fix_missing_namespace_id_routes + cleanup_orphaned_routes + end + + private + + def fix_missing_namespace_id_routes + non_orphaned_namespace_routes = non_orphaned_namespace_routes_scoped_to_range(batch_column, start_id, end_id) + non_orphaned_project_routes = non_orphaned_project_routes_scoped_to_range(batch_column, start_id, end_id) + + update_namespace_id(batch_column, non_orphaned_namespace_routes, sub_batch_size) + update_namespace_id(batch_column, non_orphaned_project_routes, sub_batch_size) + end + + def cleanup_orphaned_routes + orphaned_namespace_routes = orphaned_namespace_routes_scoped_to_range(batch_column, start_id, end_id) + orphaned_project_routes = orphaned_project_routes_scoped_to_range(batch_column, start_id, end_id) + + cleanup_relations(batch_column, orphaned_namespace_routes, pause_ms, sub_batch_size) + cleanup_relations(batch_column, orphaned_project_routes, pause_ms, sub_batch_size) + end + + def update_namespace_id(batch_column, non_orphaned_namespace_routes, sub_batch_size) + non_orphaned_namespace_routes.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch| + batch_metrics.time_operation(:fix_missing_namespace_id) do + ApplicationRecord.connection.execute <<~SQL + WITH route_and_ns(route_id, namespace_id) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} ( + #{sub_batch.to_sql} + ) + UPDATE routes + SET namespace_id = route_and_ns.namespace_id + FROM route_and_ns + WHERE id = route_and_ns.route_id + SQL + end + end + end + + def cleanup_relations(batch_column, orphaned_namespace_routes, pause_ms, sub_batch_size) + orphaned_namespace_routes.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch| + batch_metrics.time_operation(:cleanup_orphaned_routes) do + sub_batch.delete_all + end + end + end + + def orphaned_namespace_routes_scoped_to_range(source_key_column, start_id, stop_id) + Gitlab::BackgroundMigration::Route.joins("LEFT OUTER JOIN namespaces ON source_id = namespaces.id") + .where(source_key_column => start_id..stop_id) + .where(source_type: 'Namespace') + .where(namespace_id: nil) + .where(namespaces: { id: nil }) + end + + def orphaned_project_routes_scoped_to_range(source_key_column, start_id, stop_id) + Gitlab::BackgroundMigration::Route.joins("LEFT OUTER JOIN projects ON source_id = projects.id") + .where(source_key_column => start_id..stop_id) + .where(source_type: 'Project') + .where(namespace_id: nil) + .where(projects: { id: nil }) + end + + def non_orphaned_namespace_routes_scoped_to_range(source_key_column, start_id, stop_id) + Gitlab::BackgroundMigration::Route.joins("LEFT OUTER JOIN namespaces ON source_id = namespaces.id") + .where(source_key_column => start_id..stop_id) + .where(source_type: 'Namespace') + .where(namespace_id: nil) + .where.not(namespaces: { id: nil }) + .select("routes.id, namespaces.id") + end + + def non_orphaned_project_routes_scoped_to_range(source_key_column, start_id, stop_id) + Gitlab::BackgroundMigration::Route.joins("LEFT OUTER JOIN projects ON source_id = projects.id") + .where(source_key_column => start_id..stop_id) + .where(source_type: 'Project') + .where(namespace_id: nil) + .where.not(projects: { id: nil }) + .select("routes.id, projects.project_namespace_id") + end + end + + # Isolated route model for the migration + class Route < ApplicationRecord + include EachBatch + + self.table_name = 'routes' + self.inheritance_column = :_type_disabled + end + end +end diff --git a/lib/gitlab/background_migration/delete_invalid_epic_issues.rb b/lib/gitlab/background_migration/delete_invalid_epic_issues.rb new file mode 100644 index 00000000000..3af59ab4931 --- /dev/null +++ b/lib/gitlab/background_migration/delete_invalid_epic_issues.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop: disable Style/Documentation + class DeleteInvalidEpicIssues < BatchedMigrationJob + def perform + end + end + end +end + +# rubocop:disable Layout/LineLength +Gitlab::BackgroundMigration::DeleteInvalidEpicIssues.prepend_mod_with('Gitlab::BackgroundMigration::DeleteInvalidEpicIssues') diff --git a/lib/gitlab/background_migration/fix_merge_request_diff_commit_users.rb b/lib/gitlab/background_migration/fix_merge_request_diff_commit_users.rb index ea3e56cb14a..4df55a7b02a 100644 --- a/lib/gitlab/background_migration/fix_merge_request_diff_commit_users.rb +++ b/lib/gitlab/background_migration/fix_merge_request_diff_commit_users.rb @@ -5,12 +5,6 @@ module Gitlab # Background migration for fixing merge_request_diff_commit rows that don't # have committer/author details due to # https://gitlab.com/gitlab-org/gitlab/-/issues/344080. - # - # This migration acts on a single project and corrects its data. Because - # this process needs Git/Gitaly access, and duplicating all that code is far - # too much, this migration relies on global models such as Project, - # MergeRequest, etc. - # rubocop: disable Metrics/ClassLength class FixMergeRequestDiffCommitUsers BATCH_SIZE = 100 @@ -20,137 +14,8 @@ module Gitlab end def perform(project_id) - if (project = ::Project.find_by_id(project_id)) - process(project) - end - - ::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( - 'FixMergeRequestDiffCommitUsers', - [project_id] - ) - - schedule_next_job - end - - def process(project) - # Loading everything using one big query may result in timeouts (e.g. - # for projects the size of gitlab-org/gitlab). So instead we query - # data on a per merge request basis. - project.merge_requests.each_batch(column: :iid) do |mrs| - mrs.ids.each do |mr_id| - each_row_to_check(mr_id) do |commit| - update_commit(project, commit) - end - end - end - end - - def each_row_to_check(merge_request_id, &block) - columns = %w[merge_request_diff_id relative_order].map do |col| - Pagination::Keyset::ColumnOrderDefinition.new( - attribute_name: col, - order_expression: MergeRequestDiffCommit.arel_table[col.to_sym].asc, - nullable: :not_nullable, - distinct: false - ) - end - - order = Pagination::Keyset::Order.build(columns) - scope = MergeRequestDiffCommit - .joins(:merge_request_diff) - .where(merge_request_diffs: { merge_request_id: merge_request_id }) - .where('commit_author_id IS NULL OR committer_id IS NULL') - .order(order) - - Pagination::Keyset::Iterator - .new(scope: scope, use_union_optimization: true) - .each_batch(of: BATCH_SIZE) do |rows| - rows - .select([ - :merge_request_diff_id, - :relative_order, - :sha, - :committer_id, - :commit_author_id - ]) - .each(&block) - end - end - - # rubocop: disable Metrics/AbcSize - def update_commit(project, row) - commit = find_commit(project, row.sha) - updates = [] - - unless row.commit_author_id - author_id = find_or_create_user(commit, :author_name, :author_email) - - updates << [arel_table[:commit_author_id], author_id] if author_id - end - - unless row.committer_id - committer_id = - find_or_create_user(commit, :committer_name, :committer_email) - - updates << [arel_table[:committer_id], committer_id] if committer_id - end - - return if updates.empty? - - update = Arel::UpdateManager - .new - .table(MergeRequestDiffCommit.arel_table) - .where(matches_row(row)) - .set(updates) - .to_sql - - MergeRequestDiffCommit.connection.execute(update) - end - # rubocop: enable Metrics/AbcSize - - def schedule_next_job - job = Database::BackgroundMigrationJob - .for_migration_class('FixMergeRequestDiffCommitUsers') - .pending - .first - - return unless job - - BackgroundMigrationWorker.perform_in( - 2.minutes, - 'FixMergeRequestDiffCommitUsers', - job.arguments - ) - end - - def find_commit(project, sha) - @commits[sha] ||= (project.commit(sha)&.to_hash || {}) - end - - def find_or_create_user(commit, name_field, email_field) - name = commit[name_field] - email = commit[email_field] - - return unless name && email - - @users[[name, email]] ||= - MergeRequest::DiffCommitUser.find_or_create(name, email).id - end - - def matches_row(row) - primary_key = Arel::Nodes::Grouping - .new([arel_table[:merge_request_diff_id], arel_table[:relative_order]]) - - primary_val = Arel::Nodes::Grouping - .new([row.merge_request_diff_id, row.relative_order]) - - primary_key.eq(primary_val) - end - - def arel_table - MergeRequestDiffCommit.arel_table + # No-op, see https://gitlab.com/gitlab-org/gitlab/-/issues/344540 end end - # rubocop: enable Metrics/ClassLength end end diff --git a/lib/gitlab/background_migration/migrate_pages_to_zip_storage.rb b/lib/gitlab/background_migration/migrate_pages_to_zip_storage.rb index b7a912da060..f53f2e8ee79 100644 --- a/lib/gitlab/background_migration/migrate_pages_to_zip_storage.rb +++ b/lib/gitlab/background_migration/migrate_pages_to_zip_storage.rb @@ -9,10 +9,7 @@ module Gitlab # see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/54578 for discussion class MigratePagesToZipStorage def perform(start_id, stop_id) - ::Pages::MigrateFromLegacyStorageService.new(Gitlab::AppLogger, - ignore_invalid_entries: false, - mark_projects_as_not_deployed: false) - .execute_for_batch(start_id..stop_id) + # no-op end end end diff --git a/lib/gitlab/background_migration/nullify_orphan_runner_id_on_ci_builds.rb b/lib/gitlab/background_migration/nullify_orphan_runner_id_on_ci_builds.rb index 36d4e649271..13b66b2e02e 100644 --- a/lib/gitlab/background_migration/nullify_orphan_runner_id_on_ci_builds.rb +++ b/lib/gitlab/background_migration/nullify_orphan_runner_id_on_ci_builds.rb @@ -10,9 +10,9 @@ module Gitlab pause_ms = 0 if pause_ms < 0 batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id) - batch_relation.each_batch(column: batch_column, of: sub_batch_size, order_hint: :type) do |sub_batch| + batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch| batch_metrics.time_operation(:update_all) do - sub_batch.update_all(runner_id: nil) + filtered_sub_batch(sub_batch).update_all(runner_id: nil) end sleep(pause_ms * 0.001) @@ -31,9 +31,13 @@ module Gitlab def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id) define_batchable_model(source_table, connection: connection) + .where(source_key_column => start_id..stop_id) + end + + def filtered_sub_batch(sub_batch) + sub_batch .joins('LEFT OUTER JOIN ci_runners ON ci_runners.id = ci_builds.runner_id') .where('ci_builds.runner_id IS NOT NULL AND ci_runners.id IS NULL') - .where(source_key_column => start_id..stop_id) end end end diff --git a/lib/gitlab/background_migration/purge_stale_security_scans.rb b/lib/gitlab/background_migration/purge_stale_security_scans.rb new file mode 100644 index 00000000000..8b13a0382b4 --- /dev/null +++ b/lib/gitlab/background_migration/purge_stale_security_scans.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop:disable Style/Documentation + class PurgeStaleSecurityScans # rubocop:disable Migration/BackgroundMigrationBaseClass + class SecurityScan < ::ApplicationRecord + include EachBatch + + STALE_AFTER = 90.days + + self.table_name = 'security_scans' + + # Otherwise the schema_spec fails + validates :info, json_schema: { filename: 'security_scan_info', draft: 7 } + + enum status: { succeeded: 1, purged: 6 } + + scope :to_purge, -> { where('id <= ?', last_stale_record_id) } + scope :by_range, -> (range) { where(id: range) } + + def self.last_stale_record_id + where('created_at < ?', STALE_AFTER.ago).order(created_at: :desc).first + end + end + + def perform(_start_id, _end_id); end + end + end +end + +Gitlab::BackgroundMigration::PurgeStaleSecurityScans.prepend_mod diff --git a/lib/gitlab/background_migration/set_legacy_open_source_license_available_for_non_public_projects.rb b/lib/gitlab/background_migration/set_legacy_open_source_license_available_for_non_public_projects.rb new file mode 100644 index 00000000000..e85b1bc402a --- /dev/null +++ b/lib/gitlab/background_migration/set_legacy_open_source_license_available_for_non_public_projects.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Set `project_settings.legacy_open_source_license_available` to false for non-public projects + class SetLegacyOpenSourceLicenseAvailableForNonPublicProjects < ::Gitlab::BackgroundMigration::BatchedMigrationJob + PUBLIC = 20 + + # Migration only version of `project_settings` table + class ProjectSetting < ApplicationRecord + self.table_name = 'project_settings' + end + + def perform + each_sub_batch( + operation_name: :set_legacy_open_source_license_available, + batching_scope: ->(relation) { relation.where.not(visibility_level: PUBLIC) } + ) do |sub_batch| + ProjectSetting.where(project_id: sub_batch).update_all(legacy_open_source_license_available: false) + end + end + end + end +end |