Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb')
-rw-r--r--db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb261
1 files changed, 0 insertions, 261 deletions
diff --git a/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb b/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb
deleted file mode 100644
index 8e6ee0f35ca..00000000000
--- a/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb
+++ /dev/null
@@ -1,261 +0,0 @@
-# frozen_string_literal: true
-
-# rubocop:disable Migration/PreventStrings
-
-# This migration cleans up Projects that were orphaned when their namespace was deleted
-# Instead of deleting them, we:
-# - Find (or create) the Ghost User
-# - Create (if not already exists) a `lost-and-found` group owned by the Ghost User
-# - Find orphaned projects --> namespace_id can not be found in namespaces
-# - Move the orphaned projects to the `lost-and-found` group
-# (while making them private and setting `archived=true`)
-#
-# On GitLab.com (2020-05-11) this migration will update 66 orphaned projects
-class CleanupProjectsWithMissingNamespace < ActiveRecord::Migration[6.0]
- include Gitlab::Database::MigrationHelpers
-
- DOWNTIME = false
- VISIBILITY_PRIVATE = 0
- ACCESS_LEVEL_OWNER = 50
-
- # The batch size of projects to check in each iteration
- # We expect the selectivity for orphaned projects to be very low:
- # (66 orphaned projects out of a total 13.6M)
- # so 10K should be a safe choice
- BATCH_SIZE = 10000
-
- disable_ddl_transaction!
-
- class UserDetail < ActiveRecord::Base
- self.table_name = 'user_details'
-
- belongs_to :user, class_name: 'CleanupProjectsWithMissingNamespace::User'
- end
-
- class User < ActiveRecord::Base
- self.table_name = 'users'
-
- LOST_AND_FOUND_GROUP = 'lost-and-found'
- USER_TYPE_GHOST = 5
- DEFAULT_PROJECTS_LIMIT = 100000
-
- default_value_for :admin, false
- default_value_for :can_create_group, true # we need this to create the group
- default_value_for :can_create_team, false
- default_value_for :project_view, :files
- default_value_for :notified_of_own_activity, false
- default_value_for :preferred_language, I18n.default_locale
-
- has_one :user_detail, class_name: 'CleanupProjectsWithMissingNamespace::UserDetail'
- has_one :namespace, -> { where(type: nil) },
- foreign_key: :owner_id, inverse_of: :owner, autosave: true,
- class_name: 'CleanupProjectsWithMissingNamespace::Namespace'
-
- before_save :ensure_namespace_correct
- before_save :ensure_bio_is_assigned_to_user_details, if: :bio_changed?
-
- enum project_view: { readme: 0, activity: 1, files: 2 }
-
- def ensure_namespace_correct
- if namespace
- namespace.path = username if username_changed?
- namespace.name = name if name_changed?
- else
- build_namespace(path: username, name: name)
- end
- end
-
- def ensure_bio_is_assigned_to_user_details
- user_detail.bio = bio.to_s[0...255]
- end
-
- def user_detail
- super.presence || build_user_detail
- end
-
- # Return (or create if necessary) the `lost-and-found` group
- def lost_and_found_group
- existing_lost_and_found_group || Group.create_unique_group(self, LOST_AND_FOUND_GROUP)
- end
-
- def existing_lost_and_found_group
- # There should only be one Group for User Ghost starting with LOST_AND_FOUND_GROUP
- Group
- .joins('INNER JOIN members ON namespaces.id = members.source_id')
- .where(namespaces: { type: 'Group' })
- .where(members: { type: 'GroupMember' })
- .where(members: { source_type: 'Namespace' })
- .where(members: { user_id: self.id })
- .where(members: { requested_at: nil })
- .where(members: { access_level: ACCESS_LEVEL_OWNER })
- .find_by(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%"))
- end
-
- class << self
- # Return (or create if necessary) the ghost user
- def ghost
- email = 'ghost%s@example.com'
-
- unique_internal(where(user_type: USER_TYPE_GHOST), 'ghost', email) do |u|
- u.bio = _('This is a "Ghost User", created to hold all issues authored by users that have since been deleted. This user cannot be removed.')
- u.name = 'Ghost User'
- end
- end
-
- def unique_internal(scope, username, email_pattern, &block)
- scope.first || create_unique_internal(scope, username, email_pattern, &block)
- end
-
- def create_unique_internal(scope, username, email_pattern, &creation_block)
- # Since we only want a single one of these in an instance, we use an
- # exclusive lease to ensure that this block is never run concurrently.
- lease_key = "user:unique_internal:#{username}"
- lease = Gitlab::ExclusiveLease.new(lease_key, timeout: 1.minute.to_i)
-
- until uuid = lease.try_obtain
- # Keep trying until we obtain the lease. To prevent hammering Redis too
- # much we'll wait for a bit between retries.
- sleep(1)
- end
-
- # Recheck if the user is already present. One might have been
- # added between the time we last checked (first line of this method)
- # and the time we acquired the lock.
- existing_user = uncached { scope.first }
- return existing_user if existing_user.present?
-
- uniquify = Uniquify.new
-
- username = uniquify.string(username) { |s| User.find_by_username(s) }
-
- email = uniquify.string(-> (n) { Kernel.sprintf(email_pattern, n) }) do |s|
- User.find_by_email(s)
- end
-
- User.create!(
- username: username,
- email: email,
- user_type: USER_TYPE_GHOST,
- projects_limit: DEFAULT_PROJECTS_LIMIT,
- state: :active,
- &creation_block
- )
- ensure
- Gitlab::ExclusiveLease.cancel(lease_key, uuid)
- end
- end
- end
-
- class Namespace < ActiveRecord::Base
- self.table_name = 'namespaces'
-
- belongs_to :owner, class_name: 'CleanupProjectsWithMissingNamespace::User'
- end
-
- class Group < Namespace
- # Disable STI to allow us to manually set "type = 'Group'"
- # Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::Group"
- self.inheritance_column = :_type_disabled
-
- def self.create_unique_group(user, group_name)
- # 'lost-and-found' may be already defined, find a unique one
- group_name = Uniquify.new.string(group_name) do |str|
- Group.where(parent_id: nil, name: str).exists?
- end
-
- group = Group.create!(
- name: group_name,
- path: group_name,
- type: 'Group',
- description: 'Group to store orphaned projects',
- visibility_level: VISIBILITY_PRIVATE
- )
-
- # No need to create a route for the lost-and-found group
-
- GroupMember.add_user(group, user, ACCESS_LEVEL_OWNER)
-
- group
- end
- end
-
- class Member < ActiveRecord::Base
- self.table_name = 'members'
- end
-
- class GroupMember < Member
- NOTIFICATION_SETTING_GLOBAL = 3
-
- # Disable STI to allow us to manually set "type = 'GroupMember'"
- # Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::GroupMember"
- self.inheritance_column = :_type_disabled
-
- def self.add_user(source, user, access_level)
- GroupMember.create!(
- type: 'GroupMember',
- source_id: source.id,
- user_id: user.id,
- source_type: 'Namespace',
- access_level: access_level,
- notification_level: NOTIFICATION_SETTING_GLOBAL
- )
- end
- end
-
- class Project < ActiveRecord::Base
- self.table_name = 'projects'
-
- include ::EachBatch
-
- def self.without_namespace
- where(
- 'NOT EXISTS (
- SELECT 1
- FROM namespaces
- WHERE projects.namespace_id = namespaces.id
- )'
- )
- end
- end
-
- def up
- # Reset the column information of all the models that update the database
- # to ensure the Active Record's knowledge of the table structure is current
- User.reset_column_information
- Namespace.reset_column_information
- Member.reset_column_information
- Project.reset_column_information
-
- # Find or Create the ghost user
- ghost_user = User.ghost
-
- # Find or Create the `lost-and-found`
- lost_and_found = ghost_user.lost_and_found_group
-
- # With BATCH_SIZE=10K and projects.count=13.6M
- # ~1360 iterations will be run:
- # - each requires on average ~160ms for relation.without_namespace
- # - worst case scenario is that 66 of those batches will trigger an update (~200ms each)
- # In general, we expect less than 5% (=66/13.6M x 10K) to trigger an update
- # Expected total run time: ~235 seconds (== 220 seconds + 14 seconds)
- Project.each_batch(of: BATCH_SIZE) do |relation|
- relation.without_namespace.update_all <<~SQL
- namespace_id = #{lost_and_found.id},
- archived = TRUE,
- visibility_level = #{VISIBILITY_PRIVATE},
-
- -- Names are expected to be unique inside their namespace
- -- (uniqueness validation on namespace_id, name)
- -- Attach the id to the name and path to make sure that they are unique
- name = name || '_' || id::text,
- path = path || '_' || id::text
- SQL
- end
- end
-
- def down
- # no-op: the original state for those projects was inconsistent
- # Also, the original namespace_id for each project is lost during the update
- end
-end
-# rubocop:enable Migration/PreventStrings