diff options
author | Douglas Barbosa Alexandre <dbalexandre@gmail.com> | 2018-12-19 00:02:36 +0300 |
---|---|---|
committer | Douglas Barbosa Alexandre <dbalexandre@gmail.com> | 2018-12-19 02:13:38 +0300 |
commit | 754f66113e91ba880ed92075bd06adc1509c1d8f (patch) | |
tree | 9ff0cfa691b3388c91329918e2ac8594f7c6b221 /lib/gitlab/background_migration | |
parent | 5d68c23792e87e710877e4baf57605bcf11a6cb5 (diff) |
Backfill project_repositories for legacy storage projects
Adds a background migration that will ensure all projects that
are on legacy storage have a row in `project_repositories`.
Diffstat (limited to 'lib/gitlab/background_migration')
3 files changed, 238 insertions, 123 deletions
diff --git a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb index 2f76f2f7434..a6194616663 100644 --- a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb +++ b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb @@ -2,132 +2,13 @@ module Gitlab module BackgroundMigration - # Class that will create fill the project_repositories table - # for all projects that are on hashed storage and an entry is - # is missing in this table. - class BackfillHashedProjectRepositories - # Shard model - class Shard < ActiveRecord::Base - self.table_name = 'shards' - end - - # Class that will find or create the shard by name. - # There is only a small set of shards, which would - # not change quickly, so look them up from memory - # instead of hitting the DB each time. - class ShardFinder - def find_shard_id(name) - shard_id = shards.fetch(name, nil) - return shard_id if shard_id.present? - - Shard.transaction(requires_new: true) do - create!(name) - end - rescue ActiveRecord::RecordNotUnique - reload! - retry - end - - private - - def create!(name) - Shard.create!(name: name).tap { |shard| @shards[name] = shard.id } - end - - def shards - @shards ||= reload! - end - - def reload! - @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten] - end - end - - # ProjectRegistry model - class ProjectRepository < ActiveRecord::Base - self.table_name = 'project_repositories' - - belongs_to :project, inverse_of: :project_repository - end - - # Project model - class Project < ActiveRecord::Base - self.table_name = 'projects' - - HASHED_PATH_PREFIX = '@hashed' - - HASHED_STORAGE_FEATURES = { - repository: 1, - attachments: 2 - }.freeze - - has_one :project_repository, inverse_of: :project - - class << self - def on_hashed_storage - where(Project.arel_table[:storage_version] - .gteq(HASHED_STORAGE_FEATURES[:repository])) - end - - def without_project_repository - joins(left_outer_join_project_repository) - .where(ProjectRepository.arel_table[:project_id].eq(nil)) - end - - def left_outer_join_project_repository - projects_table = Project.arel_table - repository_table = ProjectRepository.arel_table - - projects_table - .join(repository_table, Arel::Nodes::OuterJoin) - .on(projects_table[:id].eq(repository_table[:project_id])) - .join_sources - end - end - - def hashed_storage? - self.storage_version && self.storage_version >= 1 - end - - def hashed_disk_path - "#{HASHED_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}" - end - - def disk_hash - @disk_hash ||= Digest::SHA2.hexdigest(id.to_s) - end - end - - def perform(start_id, stop_id) - Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) - end - + # Class that will fill the project_repositories table for projects that + # are on hashed storage and an entry is is missing in this table. + class BackfillHashedProjectRepositories < BackfillProjectRepositories private - def project_repositories(start_id, stop_id) + def projects Project.on_hashed_storage - .without_project_repository - .where(id: start_id..stop_id) - .map { |project| build_attributes_for_project(project) } - .compact - end - - def build_attributes_for_project(project) - return unless project.hashed_storage? - - { - project_id: project.id, - shard_id: find_shard_id(project.repository_storage), - disk_path: project.hashed_disk_path - } - end - - def find_shard_id(repository_storage) - shard_finder.find_shard_id(repository_storage) - end - - def shard_finder - @shard_finder ||= ShardFinder.new end end end diff --git a/lib/gitlab/background_migration/backfill_legacy_project_repositories.rb b/lib/gitlab/background_migration/backfill_legacy_project_repositories.rb new file mode 100644 index 00000000000..6dc92672929 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_legacy_project_repositories.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Class that will fill the project_repositories table for projects that + # are on legacy storage and an entry is is missing in this table. + class BackfillLegacyProjectRepositories < BackfillProjectRepositories + private + + def projects + Project.with_parent.on_legacy_storage + end + end + end +end diff --git a/lib/gitlab/background_migration/backfill_project_repositories.rb b/lib/gitlab/background_migration/backfill_project_repositories.rb new file mode 100644 index 00000000000..aaf520d70f6 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_project_repositories.rb @@ -0,0 +1,219 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Class that will create fill the project_repositories table + # for projects an entry is is missing in this table. + class BackfillProjectRepositories + OrphanedNamespaceError = Class.new(StandardError) + + # Shard model + class Shard < ActiveRecord::Base + self.table_name = 'shards' + end + + # Class that will find or create the shard by name. + # There is only a small set of shards, which would + # not change quickly, so look them up from memory + # instead of hitting the DB each time. + class ShardFinder + def find_shard_id(name) + shard_id = shards.fetch(name, nil) + return shard_id if shard_id.present? + + Shard.transaction(requires_new: true) do + create!(name) + end + rescue ActiveRecord::RecordNotUnique + reload! + retry + end + + private + + def create!(name) + Shard.create!(name: name).tap { |shard| @shards[name] = shard.id } + end + + def shards + @shards ||= reload! + end + + def reload! + @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten] + end + end + + module Storage + # Class that returns the disk path for a project using hashed storage + class HashedProject + attr_accessor :project + + ROOT_PATH_PREFIX = '@hashed' + + def initialize(project) + @project = project + end + + def disk_path + "#{ROOT_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}" + end + + def disk_hash + @disk_hash ||= Digest::SHA2.hexdigest(project.id.to_s) + end + end + + # Class that returns the disk path for a project using legacy storage + class LegacyProject + attr_accessor :project + + def initialize(project) + @project = project + end + + def disk_path + project.full_path + end + end + end + + # Concern used by Project and Namespace to determine the full route to the project + module Routable + extend ActiveSupport::Concern + + def full_path + @full_path ||= build_full_path + end + + def build_full_path + return path unless has_parent? + + raise OrphanedNamespaceError if parent.nil? + + parent.full_path + '/' + path + end + + def has_parent? + read_attribute(association(:parent).reflection.foreign_key) + end + end + + # Namespace model. + class Namespace < ActiveRecord::Base + self.table_name = 'namespaces' + self.inheritance_column = nil + + include Routable + + belongs_to :parent, class_name: 'Namespace', inverse_of: 'namespaces' + + has_many :projects, inverse_of: :parent + has_many :namespaces, inverse_of: :parent + end + + # ProjectRegistry model + class ProjectRepository < ActiveRecord::Base + self.table_name = 'project_repositories' + + belongs_to :project, inverse_of: :project_repository + end + + # Project model + class Project < ActiveRecord::Base + self.table_name = 'projects' + + include Routable + + HASHED_STORAGE_FEATURES = { + repository: 1, + attachments: 2 + }.freeze + + scope :with_parent, -> { includes(:parent) } + + belongs_to :parent, class_name: 'Namespace', foreign_key: :namespace_id, inverse_of: 'projects' + + has_one :project_repository, inverse_of: :project + + delegate :disk_path, to: :storage + + class << self + def on_hashed_storage + where(Project.arel_table[:storage_version] + .gteq(HASHED_STORAGE_FEATURES[:repository])) + end + + def on_legacy_storage + where(Project.arel_table[:storage_version].eq(nil) + .or(Project.arel_table[:storage_version].eq(0))) + end + + def without_project_repository + joins(left_outer_join_project_repository) + .where(ProjectRepository.arel_table[:project_id].eq(nil)) + end + + def left_outer_join_project_repository + projects_table = Project.arel_table + repository_table = ProjectRepository.arel_table + + projects_table + .join(repository_table, Arel::Nodes::OuterJoin) + .on(projects_table[:id].eq(repository_table[:project_id])) + .join_sources + end + end + + def storage + @storage ||= + if hashed_storage? + Storage::HashedProject.new(self) + else + Storage::LegacyProject.new(self) + end + end + + def hashed_storage? + self.storage_version && + self.storage_version >= HASHED_STORAGE_FEATURES[:repository] + end + end + + def perform(start_id, stop_id) + Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) + end + + private + + def projects + raise NotImplementedError, + "#{self.class} does not implement #{__method__}" + end + + def project_repositories(start_id, stop_id) + projects + .without_project_repository + .where(id: start_id..stop_id) + .map { |project| build_attributes_for_project(project) } + .compact + end + + def build_attributes_for_project(project) + { + project_id: project.id, + shard_id: find_shard_id(project.repository_storage), + disk_path: project.disk_path + } + end + + def find_shard_id(repository_storage) + shard_finder.find_shard_id(repository_storage) + end + + def shard_finder + @shard_finder ||= ShardFinder.new + end + end + end +end |