diff options
Diffstat (limited to 'lib/gitlab/background_migration/project_namespaces')
3 files changed, 184 insertions, 0 deletions
diff --git a/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb b/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb new file mode 100644 index 00000000000..8e94c16369e --- /dev/null +++ b/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb @@ -0,0 +1,151 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module ProjectNamespaces + # Back-fill project namespaces for projects that do not yet have a namespace. + # + # TODO: remove this comment when an actuall backfill migration is added. + # + # This is first being added without an actual migration as we need to initially test + # if backfilling project namespaces affects performance in any significant way. + # rubocop: disable Metrics/ClassLength + class BackfillProjectNamespaces + BATCH_SIZE = 100 + DELETE_BATCH_SIZE = 10 + PROJECT_NAMESPACE_STI_NAME = 'Project' + + IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models + + def perform(start_id, end_id, namespace_id, migration_type = 'up') + load_project_ids(start_id, end_id, namespace_id) + + case migration_type + when 'up' + backfill_project_namespaces(namespace_id) + mark_job_as_succeeded(start_id, end_id, namespace_id, 'up') + when 'down' + cleanup_backfilled_project_namespaces(namespace_id) + mark_job_as_succeeded(start_id, end_id, namespace_id, 'down') + else + raise "Unknown migration type" + end + end + + private + + attr_accessor :project_ids + + def backfill_project_namespaces(namespace_id) + project_ids.each_slice(BATCH_SIZE) do |project_ids| + # We need to lock these project records for the period when we create project namespaces + # and link them to projects so that if a project is modified in the time between creating + # project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects` + # we do not get them out of sync. + # + # see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/72527#note_730679469 + Project.transaction do + Project.where(id: project_ids).select(:id).lock!('FOR UPDATE') + + batch_insert_namespaces(project_ids) + batch_update_projects(project_ids) + end + + batch_update_project_namespaces_traversal_ids(project_ids) + end + end + + def cleanup_backfilled_project_namespaces(namespace_id) + project_ids.each_slice(BATCH_SIZE) do |project_ids| + # IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records + # from namespaces are deleted due to FK/triggers + nullify_project_namespaces_in_projects(project_ids) + delete_project_namespace_records(project_ids) + end + end + + def batch_insert_namespaces(project_ids) + projects = IsolatedModels::Project.where(id: project_ids) + .select("projects.id, projects.name, projects.path, projects.namespace_id, projects.visibility_level, shared_runners_enabled, '#{PROJECT_NAMESPACE_STI_NAME}', now(), now()") + + ActiveRecord::Base.connection.execute <<~SQL + INSERT INTO namespaces (tmp_project_id, name, path, parent_id, visibility_level, shared_runners_enabled, type, created_at, updated_at) + #{projects.to_sql} + ON CONFLICT DO NOTHING; + SQL + end + + def batch_update_projects(project_ids) + projects = IsolatedModels::Project.where(id: project_ids) + .joins("INNER JOIN namespaces ON projects.id = namespaces.tmp_project_id") + .select("namespaces.id, namespaces.tmp_project_id") + + ActiveRecord::Base.connection.execute <<~SQL + WITH cte(project_namespace_id, project_id) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} ( + #{projects.to_sql} + ) + UPDATE projects + SET project_namespace_id = cte.project_namespace_id + FROM cte + WHERE id = cte.project_id AND projects.project_namespace_id IS DISTINCT FROM cte.project_namespace_id + SQL + end + + def batch_update_project_namespaces_traversal_ids(project_ids) + namespaces = Namespace.where(tmp_project_id: project_ids) + .joins("INNER JOIN namespaces n2 ON namespaces.parent_id = n2.id") + .select("namespaces.id as project_namespace_id, n2.traversal_ids") + + ActiveRecord::Base.connection.execute <<~SQL + UPDATE namespaces + SET traversal_ids = array_append(project_namespaces.traversal_ids, project_namespaces.project_namespace_id) + FROM (#{namespaces.to_sql}) as project_namespaces(project_namespace_id, traversal_ids) + WHERE id = project_namespaces.project_namespace_id + SQL + end + + def nullify_project_namespaces_in_projects(project_ids) + IsolatedModels::Project.where(id: project_ids).update_all(project_namespace_id: nil) + end + + def delete_project_namespace_records(project_ids) + project_ids.each_slice(DELETE_BATCH_SIZE) do |p_ids| + IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all + end + end + + def load_project_ids(start_id, end_id, namespace_id) + projects = IsolatedModels::Project.arel_table + relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id)) + relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(hierarchy_cte(namespace_id)))) if namespace_id + + @project_ids = relation.pluck(:id) + end + + def mark_job_as_succeeded(*arguments) + ::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments) + end + + def hierarchy_cte(root_namespace_id) + <<-SQL + WITH RECURSIVE "base_and_descendants" AS ( + ( + SELECT "namespaces"."id" + FROM "namespaces" + WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = #{root_namespace_id.to_i} + ) + UNION + ( + SELECT "namespaces"."id" + FROM "namespaces", "base_and_descendants" + WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id" + ) + ) + SELECT "id" FROM "base_and_descendants" AS "namespaces" + SQL + end + end + # rubocop: enable Metrics/ClassLength + end + end +end diff --git a/lib/gitlab/background_migration/project_namespaces/models/namespace.rb b/lib/gitlab/background_migration/project_namespaces/models/namespace.rb new file mode 100644 index 00000000000..5576c34cf65 --- /dev/null +++ b/lib/gitlab/background_migration/project_namespaces/models/namespace.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module ProjectNamespaces + module Models + # isolated Namespace model + class Namespace < ActiveRecord::Base + include EachBatch + + self.table_name = 'namespaces' + self.inheritance_column = :_type_disabled + end + end + end + end +end diff --git a/lib/gitlab/background_migration/project_namespaces/models/project.rb b/lib/gitlab/background_migration/project_namespaces/models/project.rb new file mode 100644 index 00000000000..4a6a309e289 --- /dev/null +++ b/lib/gitlab/background_migration/project_namespaces/models/project.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module ProjectNamespaces + module Models + # isolated Project model + class Project < ActiveRecord::Base + include EachBatch + + self.table_name = 'projects' + end + end + end + end +end |