1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module ProjectNamespaces
# Back-fill project namespaces for projects that do not yet have a namespace.
#
# rubocop: disable Metrics/ClassLength
class BackfillProjectNamespaces
SUB_BATCH_SIZE = 25
PROJECT_NAMESPACE_STI_NAME = 'Project'
IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models
def perform(start_id, end_id, migration_table_name, migration_column_name, sub_batch_size, pause_ms, namespace_id, migration_type = 'up')
@sub_batch_size = sub_batch_size || SUB_BATCH_SIZE
load_project_ids(start_id, end_id, namespace_id)
case migration_type
when 'up'
backfill_project_namespaces(namespace_id)
mark_job_as_succeeded(start_id, end_id, namespace_id, 'up')
when 'down'
cleanup_backfilled_project_namespaces(namespace_id)
mark_job_as_succeeded(start_id, end_id, namespace_id, 'down')
else
raise "Unknown migration type"
end
end
private
attr_accessor :project_ids, :sub_batch_size
def backfill_project_namespaces(namespace_id)
project_ids.each_slice(sub_batch_size) do |project_ids|
# cleanup gin indexes on namespaces table
cleanup_gin_index('namespaces')
# cleanup gin indexes on projects table
cleanup_gin_index('projects')
# We need to lock these project records for the period when we create project namespaces
# and link them to projects so that if a project is modified in the time between creating
# project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects`
# we do not get them out of sync.
#
# see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/72527#note_730679469
Project.transaction do
Project.where(id: project_ids).select(:id).lock!('FOR UPDATE').load
batch_insert_namespaces(project_ids)
batch_update_projects(project_ids)
batch_update_project_namespaces_traversal_ids(project_ids)
end
end
end
def cleanup_gin_index(table_name)
index_names = ActiveRecord::Base.connection.select_values("select indexname::text from pg_indexes where tablename = '#{table_name}' and indexdef ilike '%gin%'")
index_names.each do |index_name|
ActiveRecord::Base.connection.execute("select gin_clean_pending_list('#{index_name}')")
end
end
def cleanup_backfilled_project_namespaces(namespace_id)
project_ids.each_slice(sub_batch_size) do |project_ids|
# IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records
# from namespaces are deleted due to FK/triggers
nullify_project_namespaces_in_projects(project_ids)
delete_project_namespace_records(project_ids)
end
end
def batch_insert_namespaces(project_ids)
projects = IsolatedModels::Project.where(id: project_ids)
.select("projects.id, projects.name, projects.path, projects.namespace_id, projects.visibility_level, shared_runners_enabled, '#{PROJECT_NAMESPACE_STI_NAME}', now(), now()")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (tmp_project_id, name, path, parent_id, visibility_level, shared_runners_enabled, type, created_at, updated_at)
#{projects.to_sql}
ON CONFLICT DO NOTHING;
SQL
end
def batch_update_projects(project_ids)
projects = IsolatedModels::Project.where(id: project_ids)
.joins("INNER JOIN namespaces ON projects.id = namespaces.tmp_project_id")
.select("namespaces.id, namespaces.tmp_project_id")
ActiveRecord::Base.connection.execute <<~SQL
WITH cte(project_namespace_id, project_id) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
#{projects.to_sql}
)
UPDATE projects
SET project_namespace_id = cte.project_namespace_id
FROM cte
WHERE id = cte.project_id AND projects.project_namespace_id IS DISTINCT FROM cte.project_namespace_id
SQL
end
def batch_update_project_namespaces_traversal_ids(project_ids)
namespaces = Namespace.where(tmp_project_id: project_ids)
.joins("INNER JOIN namespaces n2 ON namespaces.parent_id = n2.id")
.select("namespaces.id as project_namespace_id, n2.traversal_ids")
ActiveRecord::Base.connection.execute <<~SQL
UPDATE namespaces
SET traversal_ids = array_append(project_namespaces.traversal_ids, project_namespaces.project_namespace_id)
FROM (#{namespaces.to_sql}) as project_namespaces(project_namespace_id, traversal_ids)
WHERE id = project_namespaces.project_namespace_id
SQL
end
def nullify_project_namespaces_in_projects(project_ids)
IsolatedModels::Project.where(id: project_ids).update_all(project_namespace_id: nil)
end
def delete_project_namespace_records(project_ids)
# keep the deletes a 10x smaller batch as deletes seem to be much more expensive
delete_batch_size = (sub_batch_size / 10).to_i + 1
project_ids.each_slice(delete_batch_size) do |p_ids|
IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all
end
end
def load_project_ids(start_id, end_id, namespace_id)
projects = IsolatedModels::Project.arel_table
relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id))
relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(self.class.hierarchy_cte(namespace_id)))) if namespace_id
@project_ids = relation.pluck(:id)
end
def mark_job_as_succeeded(*arguments)
::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments)
end
def self.hierarchy_cte(root_namespace_id)
<<-SQL
WITH RECURSIVE "base_and_descendants" AS (
(
SELECT "namespaces"."id"
FROM "namespaces"
WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = #{root_namespace_id.to_i}
)
UNION
(
SELECT "namespaces"."id"
FROM "namespaces", "base_and_descendants"
WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id"
)
)
SELECT "id" FROM "base_and_descendants" AS "namespaces"
SQL
end
end
# rubocop: enable Metrics/ClassLength
end
end
end
|