Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--db/post_migrate/20181218192239_backfill_project_repositories_for_legacy_storage_projects.rb26
-rw-r--r--db/schema.rb2
-rw-r--r--lib/gitlab/background_migration/backfill_hashed_project_repositories.rb127
-rw-r--r--lib/gitlab/background_migration/backfill_legacy_project_repositories.rb15
-rw-r--r--lib/gitlab/background_migration/backfill_project_repositories.rb219
-rw-r--r--spec/factories/project_repositories.rb12
-rw-r--r--spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb59
-rw-r--r--spec/lib/gitlab/background_migration/backfill_legacy_project_repositories_spec.rb45
-rw-r--r--spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb94
9 files changed, 423 insertions, 176 deletions
diff --git a/db/post_migrate/20181218192239_backfill_project_repositories_for_legacy_storage_projects.rb b/db/post_migrate/20181218192239_backfill_project_repositories_for_legacy_storage_projects.rb
new file mode 100644
index 00000000000..42f96750789
--- /dev/null
+++ b/db/post_migrate/20181218192239_backfill_project_repositories_for_legacy_storage_projects.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+class BackfillProjectRepositoriesForLegacyStorageProjects < ActiveRecord::Migration[5.0]
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ BATCH_SIZE = 1_000
+ DELAY_INTERVAL = 5.minutes
+ MIGRATION = 'BackfillLegacyProjectRepositories'
+
+ disable_ddl_transaction!
+
+ class Project < ActiveRecord::Base
+ include EachBatch
+
+ self.table_name = 'projects'
+ end
+
+ def up
+ queue_background_migration_jobs_by_range_at_intervals(Project, MIGRATION, DELAY_INTERVAL)
+ end
+
+ def down
+ # no-op: since there could have been existing rows before the migration do not remove anything
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 008bff49a2b..604ed1cd6b0 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20181212104941) do
+ActiveRecord::Schema.define(version: 20181218192239) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
diff --git a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
index 2f76f2f7434..a6194616663 100644
--- a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
+++ b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
@@ -2,132 +2,13 @@
module Gitlab
module BackgroundMigration
- # Class that will create fill the project_repositories table
- # for all projects that are on hashed storage and an entry is
- # is missing in this table.
- class BackfillHashedProjectRepositories
- # Shard model
- class Shard < ActiveRecord::Base
- self.table_name = 'shards'
- end
-
- # Class that will find or create the shard by name.
- # There is only a small set of shards, which would
- # not change quickly, so look them up from memory
- # instead of hitting the DB each time.
- class ShardFinder
- def find_shard_id(name)
- shard_id = shards.fetch(name, nil)
- return shard_id if shard_id.present?
-
- Shard.transaction(requires_new: true) do
- create!(name)
- end
- rescue ActiveRecord::RecordNotUnique
- reload!
- retry
- end
-
- private
-
- def create!(name)
- Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
- end
-
- def shards
- @shards ||= reload!
- end
-
- def reload!
- @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten]
- end
- end
-
- # ProjectRegistry model
- class ProjectRepository < ActiveRecord::Base
- self.table_name = 'project_repositories'
-
- belongs_to :project, inverse_of: :project_repository
- end
-
- # Project model
- class Project < ActiveRecord::Base
- self.table_name = 'projects'
-
- HASHED_PATH_PREFIX = '@hashed'
-
- HASHED_STORAGE_FEATURES = {
- repository: 1,
- attachments: 2
- }.freeze
-
- has_one :project_repository, inverse_of: :project
-
- class << self
- def on_hashed_storage
- where(Project.arel_table[:storage_version]
- .gteq(HASHED_STORAGE_FEATURES[:repository]))
- end
-
- def without_project_repository
- joins(left_outer_join_project_repository)
- .where(ProjectRepository.arel_table[:project_id].eq(nil))
- end
-
- def left_outer_join_project_repository
- projects_table = Project.arel_table
- repository_table = ProjectRepository.arel_table
-
- projects_table
- .join(repository_table, Arel::Nodes::OuterJoin)
- .on(projects_table[:id].eq(repository_table[:project_id]))
- .join_sources
- end
- end
-
- def hashed_storage?
- self.storage_version && self.storage_version >= 1
- end
-
- def hashed_disk_path
- "#{HASHED_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
- end
-
- def disk_hash
- @disk_hash ||= Digest::SHA2.hexdigest(id.to_s)
- end
- end
-
- def perform(start_id, stop_id)
- Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id))
- end
-
+ # Class that will fill the project_repositories table for projects that
+ # are on hashed storage and an entry is is missing in this table.
+ class BackfillHashedProjectRepositories < BackfillProjectRepositories
private
- def project_repositories(start_id, stop_id)
+ def projects
Project.on_hashed_storage
- .without_project_repository
- .where(id: start_id..stop_id)
- .map { |project| build_attributes_for_project(project) }
- .compact
- end
-
- def build_attributes_for_project(project)
- return unless project.hashed_storage?
-
- {
- project_id: project.id,
- shard_id: find_shard_id(project.repository_storage),
- disk_path: project.hashed_disk_path
- }
- end
-
- def find_shard_id(repository_storage)
- shard_finder.find_shard_id(repository_storage)
- end
-
- def shard_finder
- @shard_finder ||= ShardFinder.new
end
end
end
diff --git a/lib/gitlab/background_migration/backfill_legacy_project_repositories.rb b/lib/gitlab/background_migration/backfill_legacy_project_repositories.rb
new file mode 100644
index 00000000000..6dc92672929
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_legacy_project_repositories.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Class that will fill the project_repositories table for projects that
+ # are on legacy storage and an entry is is missing in this table.
+ class BackfillLegacyProjectRepositories < BackfillProjectRepositories
+ private
+
+ def projects
+ Project.with_parent.on_legacy_storage
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/backfill_project_repositories.rb b/lib/gitlab/background_migration/backfill_project_repositories.rb
new file mode 100644
index 00000000000..aaf520d70f6
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_project_repositories.rb
@@ -0,0 +1,219 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Class that will create fill the project_repositories table
+ # for projects an entry is is missing in this table.
+ class BackfillProjectRepositories
+ OrphanedNamespaceError = Class.new(StandardError)
+
+ # Shard model
+ class Shard < ActiveRecord::Base
+ self.table_name = 'shards'
+ end
+
+ # Class that will find or create the shard by name.
+ # There is only a small set of shards, which would
+ # not change quickly, so look them up from memory
+ # instead of hitting the DB each time.
+ class ShardFinder
+ def find_shard_id(name)
+ shard_id = shards.fetch(name, nil)
+ return shard_id if shard_id.present?
+
+ Shard.transaction(requires_new: true) do
+ create!(name)
+ end
+ rescue ActiveRecord::RecordNotUnique
+ reload!
+ retry
+ end
+
+ private
+
+ def create!(name)
+ Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
+ end
+
+ def shards
+ @shards ||= reload!
+ end
+
+ def reload!
+ @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten]
+ end
+ end
+
+ module Storage
+ # Class that returns the disk path for a project using hashed storage
+ class HashedProject
+ attr_accessor :project
+
+ ROOT_PATH_PREFIX = '@hashed'
+
+ def initialize(project)
+ @project = project
+ end
+
+ def disk_path
+ "#{ROOT_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
+ end
+
+ def disk_hash
+ @disk_hash ||= Digest::SHA2.hexdigest(project.id.to_s)
+ end
+ end
+
+ # Class that returns the disk path for a project using legacy storage
+ class LegacyProject
+ attr_accessor :project
+
+ def initialize(project)
+ @project = project
+ end
+
+ def disk_path
+ project.full_path
+ end
+ end
+ end
+
+ # Concern used by Project and Namespace to determine the full route to the project
+ module Routable
+ extend ActiveSupport::Concern
+
+ def full_path
+ @full_path ||= build_full_path
+ end
+
+ def build_full_path
+ return path unless has_parent?
+
+ raise OrphanedNamespaceError if parent.nil?
+
+ parent.full_path + '/' + path
+ end
+
+ def has_parent?
+ read_attribute(association(:parent).reflection.foreign_key)
+ end
+ end
+
+ # Namespace model.
+ class Namespace < ActiveRecord::Base
+ self.table_name = 'namespaces'
+ self.inheritance_column = nil
+
+ include Routable
+
+ belongs_to :parent, class_name: 'Namespace', inverse_of: 'namespaces'
+
+ has_many :projects, inverse_of: :parent
+ has_many :namespaces, inverse_of: :parent
+ end
+
+ # ProjectRegistry model
+ class ProjectRepository < ActiveRecord::Base
+ self.table_name = 'project_repositories'
+
+ belongs_to :project, inverse_of: :project_repository
+ end
+
+ # Project model
+ class Project < ActiveRecord::Base
+ self.table_name = 'projects'
+
+ include Routable
+
+ HASHED_STORAGE_FEATURES = {
+ repository: 1,
+ attachments: 2
+ }.freeze
+
+ scope :with_parent, -> { includes(:parent) }
+
+ belongs_to :parent, class_name: 'Namespace', foreign_key: :namespace_id, inverse_of: 'projects'
+
+ has_one :project_repository, inverse_of: :project
+
+ delegate :disk_path, to: :storage
+
+ class << self
+ def on_hashed_storage
+ where(Project.arel_table[:storage_version]
+ .gteq(HASHED_STORAGE_FEATURES[:repository]))
+ end
+
+ def on_legacy_storage
+ where(Project.arel_table[:storage_version].eq(nil)
+ .or(Project.arel_table[:storage_version].eq(0)))
+ end
+
+ def without_project_repository
+ joins(left_outer_join_project_repository)
+ .where(ProjectRepository.arel_table[:project_id].eq(nil))
+ end
+
+ def left_outer_join_project_repository
+ projects_table = Project.arel_table
+ repository_table = ProjectRepository.arel_table
+
+ projects_table
+ .join(repository_table, Arel::Nodes::OuterJoin)
+ .on(projects_table[:id].eq(repository_table[:project_id]))
+ .join_sources
+ end
+ end
+
+ def storage
+ @storage ||=
+ if hashed_storage?
+ Storage::HashedProject.new(self)
+ else
+ Storage::LegacyProject.new(self)
+ end
+ end
+
+ def hashed_storage?
+ self.storage_version &&
+ self.storage_version >= HASHED_STORAGE_FEATURES[:repository]
+ end
+ end
+
+ def perform(start_id, stop_id)
+ Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id))
+ end
+
+ private
+
+ def projects
+ raise NotImplementedError,
+ "#{self.class} does not implement #{__method__}"
+ end
+
+ def project_repositories(start_id, stop_id)
+ projects
+ .without_project_repository
+ .where(id: start_id..stop_id)
+ .map { |project| build_attributes_for_project(project) }
+ .compact
+ end
+
+ def build_attributes_for_project(project)
+ {
+ project_id: project.id,
+ shard_id: find_shard_id(project.repository_storage),
+ disk_path: project.disk_path
+ }
+ end
+
+ def find_shard_id(repository_storage)
+ shard_finder.find_shard_id(repository_storage)
+ end
+
+ def shard_finder
+ @shard_finder ||= ShardFinder.new
+ end
+ end
+ end
+end
diff --git a/spec/factories/project_repositories.rb b/spec/factories/project_repositories.rb
new file mode 100644
index 00000000000..39e8ea2e11e
--- /dev/null
+++ b/spec/factories/project_repositories.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+FactoryBot.define do
+ factory :project_repository do
+ project
+
+ after(:build) do |project_repository, _|
+ project_repository.shard_name = project_repository.project.repository_storage
+ project_repository.disk_path = project_repository.project.disk_path
+ end
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
index b6c1edbbf8b..236e63ada45 100644
--- a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
+++ b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
@@ -3,59 +3,14 @@
require 'spec_helper'
describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do
- let(:namespaces) { table(:namespaces) }
- let(:project_repositories) { table(:project_repositories) }
- let(:projects) { table(:projects) }
- let(:shards) { table(:shards) }
- let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
- let(:shard) { shards.create!(name: 'default') }
-
- describe described_class::ShardFinder do
- describe '#find_shard_id' do
- it 'creates a new shard when it does not exist yet' do
- expect { subject.find_shard_id('other') }.to change(shards, :count).by(1)
- end
-
- it 'returns the shard when it exists' do
- shards.create(id: 5, name: 'other')
-
- shard_id = subject.find_shard_id('other')
-
- expect(shard_id).to eq(5)
- end
-
- it 'only queries the database once to retrieve shards' do
- subject.find_shard_id('default')
-
- expect { subject.find_shard_id('default') }.not_to exceed_query_limit(0)
- end
- end
- end
-
- describe described_class::Project do
- describe '.on_hashed_storage' do
- it 'finds projects with repository on hashed storage' do
- projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
- projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
- projects.create!(id: 3, name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0)
- projects.create!(id: 4, name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: nil)
-
- expect(described_class.on_hashed_storage.pluck(:id)).to match_array([1, 2])
- end
- end
-
- describe '.without_project_repository' do
- it 'finds projects which do not have a projects_repositories entry' do
- projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id)
- projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id)
- project_repositories.create!(project_id: 2, disk_path: '@phony/foo/bar', shard_id: shard.id)
-
- expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(1)
- end
- end
- end
-
describe '#perform' do
+ let(:namespaces) { table(:namespaces) }
+ let(:project_repositories) { table(:project_repositories) }
+ let(:projects) { table(:projects) }
+ let(:shards) { table(:shards) }
+ let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
+ let(:shard) { shards.create!(name: 'default') }
+
it 'creates a project_repository row for projects on hashed storage that need one' do
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
diff --git a/spec/lib/gitlab/background_migration/backfill_legacy_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_legacy_project_repositories_spec.rb
new file mode 100644
index 00000000000..313f0ccc6b6
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/backfill_legacy_project_repositories_spec.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::BackfillLegacyProjectRepositories, :migration, schema: 20181218192239 do
+ describe '#perform' do
+ let(:namespaces) { table(:namespaces) }
+ let(:project_repositories) { table(:project_repositories) }
+ let(:projects) { table(:projects) }
+ let(:shards) { table(:shards) }
+ let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
+ let(:shard) { shards.create!(name: 'default') }
+
+ it 'creates a project_repository row for projects on legacy storage that need one' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: nil)
+ projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 0)
+
+ expect { described_class.new.perform(1, projects.last.id) }.to change(project_repositories, :count).by(2)
+ end
+
+ it 'does nothing for projects on legacy storage that have already a project_repository row' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
+ project_repositories.create!(project_id: 1, disk_path: 'phony/foo/bar', shard_id: shard.id)
+
+ expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
+ end
+
+ it 'does nothing for projects on hashed storage' do
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+
+ expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
+ end
+
+ it 'inserts rows in a single query' do
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0, repository_storage: shard.name)
+
+ control_count = ActiveRecord::QueryRecorder.new { described_class.new.perform(1, projects.last.id) }
+
+ projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 0, repository_storage: shard.name)
+ projects.create!(name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: 0, repository_storage: shard.name)
+
+ expect { described_class.new.perform(1, projects.last.id) }.not_to exceed_query_limit(control_count)
+ end
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb
new file mode 100644
index 00000000000..53c071f0268
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb
@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::BackfillProjectRepositories do
+ let(:group) { create(:group, name: 'foo', path: 'foo') }
+
+ describe described_class::ShardFinder do
+ let(:shard) { create(:shard, name: 'default') }
+
+ describe '#find_shard_id' do
+ it 'creates a new shard when it does not exist yet' do
+ expect { subject.find_shard_id('other') }.to change(Shard, :count).by(1)
+ end
+
+ it 'returns the shard when it exists' do
+ other_shard = create(:shard, name: 'other')
+
+ shard_id = subject.find_shard_id('other')
+
+ expect(shard_id).to eq(other_shard.id)
+ end
+
+ it 'only queries the database once to retrieve shards' do
+ subject.find_shard_id('default')
+
+ expect { subject.find_shard_id('default') }.not_to exceed_query_limit(0)
+ end
+ end
+ end
+
+ describe described_class::Project do
+ let!(:project_hashed_storage_1) { create(:project, name: 'foo', path: 'foo', namespace: group, storage_version: 1) }
+ let!(:project_hashed_storage_2) { create(:project, name: 'bar', path: 'bar', namespace: group, storage_version: 2) }
+ let!(:project_legacy_storage_3) { create(:project, name: 'baz', path: 'baz', namespace: group, storage_version: 0) }
+ let!(:project_legacy_storage_4) { create(:project, name: 'zoo', path: 'zoo', namespace: group, storage_version: nil) }
+
+ describe '.on_hashed_storage' do
+ it 'finds projects with repository on hashed storage' do
+ projects = described_class.on_hashed_storage.pluck(:id)
+
+ expect(projects).to match_array([project_hashed_storage_1.id, project_hashed_storage_2.id])
+ end
+ end
+
+ describe '.on_legacy_storage' do
+ it 'finds projects with repository on legacy storage' do
+ projects = described_class.on_legacy_storage.pluck(:id)
+
+ expect(projects).to match_array([project_legacy_storage_3.id, project_legacy_storage_4.id])
+ end
+ end
+
+ describe '.without_project_repository' do
+ it 'finds projects which do not have a projects_repositories entry' do
+ create(:project_repository, project: project_hashed_storage_1)
+ create(:project_repository, project: project_legacy_storage_3)
+
+ projects = described_class.without_project_repository.pluck(:id)
+
+ expect(projects).to contain_exactly(project_hashed_storage_2.id, project_legacy_storage_4.id)
+ end
+ end
+
+ describe '#disk_path' do
+ context 'for projects on hashed storage' do
+ it 'returns the correct disk_path' do
+ project = described_class.find(project_hashed_storage_1.id)
+
+ expect(project.disk_path).to eq(project_hashed_storage_1.disk_path)
+ end
+ end
+
+ context 'for projects on legacy storage' do
+ it 'returns the correct disk_path' do
+ project = described_class.find(project_legacy_storage_3.id)
+
+ expect(project.disk_path).to eq(project_legacy_storage_3.disk_path)
+ end
+
+ it 'raises OrphanedNamespaceError when any parent namespace does not exist' do
+ subgroup = create(:group, parent: group)
+ project_orphaned_namespace = create(:project, name: 'baz', path: 'baz', namespace: subgroup, storage_version: nil)
+ subgroup.update_column(:parent_id, Namespace.maximum(:id).succ)
+
+ project = described_class.find(project_orphaned_namespace.id)
+
+ expect { project.disk_path }
+ .to raise_error(Gitlab::BackgroundMigration::BackfillProjectRepositories::OrphanedNamespaceError)
+ end
+ end
+ end
+ end
+end