Welcome to mirror list, hosted at ThFree Co, Russian Federation.

backfill_project_namespaces.rb « project_namespaces « background_migration « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 8e94c16369e3ee6271f24ddd533dad738c4830c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# frozen_string_literal: true

module Gitlab
  module BackgroundMigration
    module ProjectNamespaces
      # Back-fill project namespaces for projects that do not yet have a namespace.
      #
      # TODO: remove this comment when an actuall backfill migration is added.
      #
      # This is first being added without an actual migration as we need to initially test
      # if backfilling project namespaces affects performance in any significant way.
      # rubocop: disable Metrics/ClassLength
      class BackfillProjectNamespaces
        BATCH_SIZE = 100
        DELETE_BATCH_SIZE = 10
        PROJECT_NAMESPACE_STI_NAME = 'Project'

        IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models

        def perform(start_id, end_id, namespace_id, migration_type = 'up')
          load_project_ids(start_id, end_id, namespace_id)

          case migration_type
          when 'up'
            backfill_project_namespaces(namespace_id)
            mark_job_as_succeeded(start_id, end_id, namespace_id, 'up')
          when 'down'
            cleanup_backfilled_project_namespaces(namespace_id)
            mark_job_as_succeeded(start_id, end_id, namespace_id, 'down')
          else
            raise "Unknown migration type"
          end
        end

        private

        attr_accessor :project_ids

        def backfill_project_namespaces(namespace_id)
          project_ids.each_slice(BATCH_SIZE) do |project_ids|
            # We need to lock these project records for the period when we create project namespaces
            # and link them to projects so that if a project is modified in the time between creating
            # project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects`
            # we do not get them out of sync.
            #
            # see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/72527#note_730679469
            Project.transaction do
              Project.where(id: project_ids).select(:id).lock!('FOR UPDATE')

              batch_insert_namespaces(project_ids)
              batch_update_projects(project_ids)
            end

            batch_update_project_namespaces_traversal_ids(project_ids)
          end
        end

        def cleanup_backfilled_project_namespaces(namespace_id)
          project_ids.each_slice(BATCH_SIZE) do |project_ids|
            # IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records
            # from namespaces are deleted due to FK/triggers
            nullify_project_namespaces_in_projects(project_ids)
            delete_project_namespace_records(project_ids)
          end
        end

        def batch_insert_namespaces(project_ids)
          projects = IsolatedModels::Project.where(id: project_ids)
            .select("projects.id, projects.name, projects.path, projects.namespace_id, projects.visibility_level, shared_runners_enabled, '#{PROJECT_NAMESPACE_STI_NAME}', now(), now()")

          ActiveRecord::Base.connection.execute <<~SQL
            INSERT INTO namespaces (tmp_project_id, name, path, parent_id, visibility_level, shared_runners_enabled, type, created_at, updated_at)
            #{projects.to_sql}
            ON CONFLICT DO NOTHING;
          SQL
        end

        def batch_update_projects(project_ids)
          projects = IsolatedModels::Project.where(id: project_ids)
                       .joins("INNER JOIN namespaces ON projects.id = namespaces.tmp_project_id")
                       .select("namespaces.id, namespaces.tmp_project_id")

          ActiveRecord::Base.connection.execute <<~SQL
            WITH cte(project_namespace_id, project_id) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
              #{projects.to_sql}
            )
            UPDATE projects
            SET project_namespace_id = cte.project_namespace_id
            FROM cte
            WHERE id = cte.project_id AND projects.project_namespace_id IS DISTINCT FROM cte.project_namespace_id
          SQL
        end

        def batch_update_project_namespaces_traversal_ids(project_ids)
          namespaces = Namespace.where(tmp_project_id: project_ids)
                         .joins("INNER JOIN namespaces n2 ON namespaces.parent_id = n2.id")
                         .select("namespaces.id as project_namespace_id, n2.traversal_ids")

          ActiveRecord::Base.connection.execute <<~SQL
            UPDATE namespaces
            SET traversal_ids = array_append(project_namespaces.traversal_ids, project_namespaces.project_namespace_id)
            FROM (#{namespaces.to_sql}) as project_namespaces(project_namespace_id, traversal_ids)
            WHERE id = project_namespaces.project_namespace_id
          SQL
        end

        def nullify_project_namespaces_in_projects(project_ids)
          IsolatedModels::Project.where(id: project_ids).update_all(project_namespace_id: nil)
        end

        def delete_project_namespace_records(project_ids)
          project_ids.each_slice(DELETE_BATCH_SIZE) do |p_ids|
            IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all
          end
        end

        def load_project_ids(start_id, end_id, namespace_id)
          projects = IsolatedModels::Project.arel_table
          relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id))
          relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(hierarchy_cte(namespace_id)))) if namespace_id

          @project_ids = relation.pluck(:id)
        end

        def mark_job_as_succeeded(*arguments)
          ::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments)
        end

        def hierarchy_cte(root_namespace_id)
          <<-SQL
              WITH RECURSIVE "base_and_descendants" AS (
                  (
                      SELECT "namespaces"."id"
                      FROM "namespaces"
                      WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = #{root_namespace_id.to_i}
                  )
                  UNION
                  (
                      SELECT "namespaces"."id"
                      FROM "namespaces", "base_and_descendants"
                      WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id"
                  )
              )
              SELECT "id" FROM "base_and_descendants" AS "namespaces"
          SQL
        end
      end
      # rubocop: enable Metrics/ClassLength
    end
  end
end