1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
|
# frozen_string_literal: true
# rubocop:disable Migration/PreventStrings
# This migration cleans up Projects that were orphaned when their namespace was deleted
# Instead of deleting them, we:
# - Find (or create) the Ghost User
# - Create (if not already exists) a `lost-and-found` group owned by the Ghost User
# - Find orphaned projects --> namespace_id can not be found in namespaces
# - Move the orphaned projects to the `lost-and-found` group
# (while making them private and setting `archived=true`)
#
# On GitLab.com (2020-05-11) this migration will update 66 orphaned projects
class CleanupProjectsWithMissingNamespace < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
VISIBILITY_PRIVATE = 0
ACCESS_LEVEL_OWNER = 50
# The batch size of projects to check in each iteration
# We expect the selectivity for orphaned projects to be very low:
# (66 orphaned projects out of a total 13.6M)
# so 10K should be a safe choice
BATCH_SIZE = 10000
disable_ddl_transaction!
class UserDetail < ActiveRecord::Base
self.table_name = 'user_details'
belongs_to :user, class_name: 'CleanupProjectsWithMissingNamespace::User'
end
class User < ActiveRecord::Base
self.table_name = 'users'
LOST_AND_FOUND_GROUP = 'lost-and-found'
USER_TYPE_GHOST = 5
DEFAULT_PROJECTS_LIMIT = 100000
default_value_for :admin, false
default_value_for :can_create_group, true # we need this to create the group
default_value_for :can_create_team, false
default_value_for :project_view, :files
default_value_for :notified_of_own_activity, false
default_value_for :preferred_language, I18n.default_locale
has_one :user_detail, class_name: 'CleanupProjectsWithMissingNamespace::UserDetail'
has_one :namespace, -> { where(type: nil) },
foreign_key: :owner_id, inverse_of: :owner, autosave: true,
class_name: 'CleanupProjectsWithMissingNamespace::Namespace'
before_save :ensure_namespace_correct
before_save :ensure_bio_is_assigned_to_user_details, if: :bio_changed?
enum project_view: { readme: 0, activity: 1, files: 2 }
def ensure_namespace_correct
if namespace
namespace.path = username if username_changed?
namespace.name = name if name_changed?
else
build_namespace(path: username, name: name)
end
end
def ensure_bio_is_assigned_to_user_details
return if Feature.disabled?(:migrate_bio_to_user_details, default_enabled: true)
user_detail.bio = bio.to_s[0...255]
end
def user_detail
super.presence || build_user_detail
end
# Return (or create if necessary) the `lost-and-found` group
def lost_and_found_group
existing_lost_and_found_group || Group.create_unique_group(self, LOST_AND_FOUND_GROUP)
end
def existing_lost_and_found_group
# There should only be one Group for User Ghost starting with LOST_AND_FOUND_GROUP
Group
.joins('INNER JOIN members ON namespaces.id = members.source_id')
.where('namespaces.type = ?', 'Group')
.where('members.type = ?', 'GroupMember')
.where('members.source_type = ?', 'Namespace')
.where('members.user_id = ?', self.id)
.where('members.requested_at IS NULL')
.where('members.access_level = ?', ACCESS_LEVEL_OWNER)
.find_by(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%"))
end
class << self
# Return (or create if necessary) the ghost user
def ghost
email = 'ghost%s@example.com'
unique_internal(where(user_type: USER_TYPE_GHOST), 'ghost', email) do |u|
u.bio = _('This is a "Ghost User", created to hold all issues authored by users that have since been deleted. This user cannot be removed.')
u.name = 'Ghost User'
end
end
def unique_internal(scope, username, email_pattern, &block)
scope.first || create_unique_internal(scope, username, email_pattern, &block)
end
def create_unique_internal(scope, username, email_pattern, &creation_block)
# Since we only want a single one of these in an instance, we use an
# exclusive lease to ensure that this block is never run concurrently.
lease_key = "user:unique_internal:#{username}"
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: 1.minute.to_i)
until uuid = lease.try_obtain
# Keep trying until we obtain the lease. To prevent hammering Redis too
# much we'll wait for a bit between retries.
sleep(1)
end
# Recheck if the user is already present. One might have been
# added between the time we last checked (first line of this method)
# and the time we acquired the lock.
existing_user = uncached { scope.first }
return existing_user if existing_user.present?
uniquify = Uniquify.new
username = uniquify.string(username) { |s| User.find_by_username(s) }
email = uniquify.string(-> (n) { Kernel.sprintf(email_pattern, n) }) do |s|
User.find_by_email(s)
end
User.create!(
username: username,
email: email,
user_type: USER_TYPE_GHOST,
projects_limit: DEFAULT_PROJECTS_LIMIT,
state: :active,
&creation_block
)
ensure
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
end
end
class Namespace < ActiveRecord::Base
self.table_name = 'namespaces'
belongs_to :owner, class_name: 'CleanupProjectsWithMissingNamespace::User'
end
class Group < Namespace
# Disable STI to allow us to manually set "type = 'Group'"
# Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::Group"
self.inheritance_column = :_type_disabled
def self.create_unique_group(user, group_name)
# 'lost-and-found' may be already defined, find a unique one
group_name = Uniquify.new.string(group_name) do |str|
Group.where(parent_id: nil, name: str).exists?
end
group = Group.create!(
name: group_name,
path: group_name,
type: 'Group',
description: 'Group to store orphaned projects',
visibility_level: VISIBILITY_PRIVATE
)
# No need to create a route for the lost-and-found group
GroupMember.add_user(group, user, ACCESS_LEVEL_OWNER)
group
end
end
class Member < ActiveRecord::Base
self.table_name = 'members'
end
class GroupMember < Member
NOTIFICATION_SETTING_GLOBAL = 3
# Disable STI to allow us to manually set "type = 'GroupMember'"
# Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::GroupMember"
self.inheritance_column = :_type_disabled
def self.add_user(source, user, access_level)
GroupMember.create!(
type: 'GroupMember',
source_id: source.id,
user_id: user.id,
source_type: 'Namespace',
access_level: access_level,
notification_level: NOTIFICATION_SETTING_GLOBAL
)
end
end
class Project < ActiveRecord::Base
self.table_name = 'projects'
include ::EachBatch
def self.without_namespace
where(
'NOT EXISTS (
SELECT 1
FROM namespaces
WHERE projects.namespace_id = namespaces.id
)'
)
end
end
def up
# Reset the column information of all the models that update the database
# to ensure the Active Record's knowledge of the table structure is current
User.reset_column_information
Namespace.reset_column_information
Member.reset_column_information
Project.reset_column_information
# Find or Create the ghost user
ghost_user = User.ghost
# Find or Create the `lost-and-found`
lost_and_found = ghost_user.lost_and_found_group
# With BATCH_SIZE=10K and projects.count=13.6M
# ~1360 iterations will be run:
# - each requires on average ~160ms for relation.without_namespace
# - worst case scenario is that 66 of those batches will trigger an update (~200ms each)
# In general, we expect less than 5% (=66/13.6M x 10K) to trigger an update
# Expected total run time: ~235 seconds (== 220 seconds + 14 seconds)
Project.each_batch(of: BATCH_SIZE) do |relation|
relation.without_namespace.update_all <<~SQL
namespace_id = #{lost_and_found.id},
archived = TRUE,
visibility_level = #{VISIBILITY_PRIVATE},
-- Names are expected to be unique inside their namespace
-- (uniqueness validation on namespace_id, name)
-- Attach the id to the name and path to make sure that they are unique
name = name || '_' || id::text,
path = path || '_' || id::text
SQL
end
end
def down
# no-op: the original state for those projects was inconsistent
# Also, the original namespace_id for each project is lost during the update
end
end
# rubocop:enable Migration/PreventStrings
|