Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb7
-rw-r--r--lib/gitlab/background_migration/backfill_hashed_project_repositories.rb92
-rw-r--r--spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb139
3 files changed, 91 insertions, 147 deletions
diff --git a/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
index b989d9fb43d..7814cdba58a 100644
--- a/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
+++ b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
@@ -1,11 +1,11 @@
# frozen_string_literal: true
-class BackfillHashedProjectRepositories < ActiveRecord::Migration[5.0]
+class BackfillHashedProjectRepositories < ActiveRecord::Migration[4.2]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 1_000
- DELAY_INTERVAL = 1.minutes
+ DELAY_INTERVAL = 5.minutes
MIGRATION = 'BackfillHashedProjectRepositories'
disable_ddl_transaction!
@@ -21,7 +21,6 @@ class BackfillHashedProjectRepositories < ActiveRecord::Migration[5.0]
end
def down
- # Since there could have been existing rows before the migration
- # do not remove anything
+ # no-op: since there could have been existing rows before the migration do not remove anything
end
end
diff --git a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
index 88696dd1aa6..2f76f2f7434 100644
--- a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
+++ b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
@@ -2,58 +2,60 @@
module Gitlab
module BackgroundMigration
- # Class the will create rows in project_repositories for all
- # projects that are on hashed storage
+ # Class that will create fill the project_repositories table
+ # for all projects that are on hashed storage and an entry is
+ # is missing in this table.
class BackfillHashedProjectRepositories
- # Model for a Shard
+ # Shard model
class Shard < ActiveRecord::Base
self.table_name = 'shards'
-
- def self.by_name(name)
- to_a.detect { |shard| shard.name == name } || create_by(name: name)
- rescue ActiveRecord::RecordNotUnique
- retry
- end
end
# Class that will find or create the shard by name.
- # There is only a small set of shards, which would not change quickly,
- # so look them up from memory instead of hitting the DB each time.
+ # There is only a small set of shards, which would
+ # not change quickly, so look them up from memory
+ # instead of hitting the DB each time.
class ShardFinder
- def find(name)
- shards.detect { |shard| shard.name == name } || create!(name)
+ def find_shard_id(name)
+ shard_id = shards.fetch(name, nil)
+ return shard_id if shard_id.present?
+
+ Shard.transaction(requires_new: true) do
+ create!(name)
+ end
rescue ActiveRecord::RecordNotUnique
- load!
+ reload!
retry
end
private
def create!(name)
- Shard.create!(name: name).tap { |shard| @shards << shard }
+ Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
end
def shards
- @shards || load!
+ @shards ||= reload!
end
- def load!
- @shards = Shard.all.to_a
+ def reload!
+ @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten]
end
end
- # Model for a ProjectRepository
+ # ProjectRegistry model
class ProjectRepository < ActiveRecord::Base
self.table_name = 'project_repositories'
belongs_to :project, inverse_of: :project_repository
end
- # Model for a Project
+ # Project model
class Project < ActiveRecord::Base
self.table_name = 'projects'
HASHED_PATH_PREFIX = '@hashed'
+
HASHED_STORAGE_FEATURES = {
repository: 1,
attachments: 2
@@ -63,33 +65,26 @@ module Gitlab
class << self
def on_hashed_storage
- where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[:repository]))
+ where(Project.arel_table[:storage_version]
+ .gteq(HASHED_STORAGE_FEATURES[:repository]))
end
def without_project_repository
- cond = ProjectRepository.arel_table[:project_id].eq(nil)
- left_outer_joins(:project_repository).where(cond)
+ joins(left_outer_join_project_repository)
+ .where(ProjectRepository.arel_table[:project_id].eq(nil))
end
- def left_outer_joins(relation)
- return super if Gitlab.rails5?
+ def left_outer_join_project_repository
+ projects_table = Project.arel_table
+ repository_table = ProjectRepository.arel_table
- # TODO Rails 4?
+ projects_table
+ .join(repository_table, Arel::Nodes::OuterJoin)
+ .on(projects_table[:id].eq(repository_table[:project_id]))
+ .join_sources
end
end
- def project_repository_attributes(shard_finder)
- return unless hashed_storage?
-
- {
- project_id: id,
- shard_id: shard_finder.find(repository_storage).id,
- disk_path: hashed_disk_path
- }
- end
-
- private
-
def hashed_storage?
self.storage_version && self.storage_version >= 1
end
@@ -99,7 +94,7 @@ module Gitlab
end
def disk_hash
- @disk_hash ||= Digest::SHA2.hexdigest(id.to_s) if id
+ @disk_hash ||= Digest::SHA2.hexdigest(id.to_s)
end
end
@@ -110,12 +105,27 @@ module Gitlab
private
def project_repositories(start_id, stop_id)
- Project.on_hashed_storage.without_project_repository
+ Project.on_hashed_storage
+ .without_project_repository
.where(id: start_id..stop_id)
- .map { |project| project.project_repository_attributes(shard_finder) }
+ .map { |project| build_attributes_for_project(project) }
.compact
end
+ def build_attributes_for_project(project)
+ return unless project.hashed_storage?
+
+ {
+ project_id: project.id,
+ shard_id: find_shard_id(project.repository_storage),
+ disk_path: project.hashed_disk_path
+ }
+ end
+
+ def find_shard_id(repository_storage)
+ shard_finder.find_shard_id(repository_storage)
+ end
+
def shard_finder
@shard_finder ||= ShardFinder.new
end
diff --git a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
index d2f499ffa64..b6c1edbbf8b 100644
--- a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
+++ b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
@@ -3,59 +3,31 @@
require 'spec_helper'
describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do
- let(:shards) { table(:shards) }
let(:namespaces) { table(:namespaces) }
- let(:projects) { table(:projects) }
let(:project_repositories) { table(:project_repositories) }
+ let(:projects) { table(:projects) }
+ let(:shards) { table(:shards) }
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
- let(:default_shard) { shards.create!(name: 'default') }
+ let(:shard) { shards.create!(name: 'default') }
describe described_class::ShardFinder do
- describe '#find' do
- subject(:finder) { described_class.new }
-
- it 'creates the shard by name' do
- expect(finder).to receive(:create!).and_call_original
-
- expect(finder.find('default')).to be_present
- end
-
- it 'does not try to create existing shards' do
- shards.create(name: 'default')
-
- expect(finder).not_to receive(:create!)
-
- finder.find('default')
- end
-
- it 'only queries the database once for shards' do
- finder.find('default')
-
- expect do
- finder.find('default')
- end.not_to exceed_query_limit(0)
- end
-
+ describe '#find_shard_id' do
it 'creates a new shard when it does not exist yet' do
- expect do
- finder.find('other')
- end.to change(shards, :count).by(1)
+ expect { subject.find_shard_id('other') }.to change(shards, :count).by(1)
end
- it 'only creates a new shard once' do
- finder.find('other')
+ it 'returns the shard when it exists' do
+ shards.create(id: 5, name: 'other')
- expect do
- finder.find('other')
- end.not_to change(shards, :count)
- end
+ shard_id = subject.find_shard_id('other')
- it 'is not vulnerable to race conditions' do
- finder.find('default')
+ expect(shard_id).to eq(5)
+ end
- other_shard = shards.create(name: 'other')
+ it 'only queries the database once to retrieve shards' do
+ subject.find_shard_id('default')
- expect(finder.find('other').id).to eq(other_shard.id)
+ expect { subject.find_shard_id('default') }.not_to exceed_query_limit(0)
end
end
end
@@ -63,93 +35,56 @@ describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migrat
describe described_class::Project do
describe '.on_hashed_storage' do
it 'finds projects with repository on hashed storage' do
- hashed_projects = [
- projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1),
- projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
- ]
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+ projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
+ projects.create!(id: 3, name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0)
+ projects.create!(id: 4, name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: nil)
- projects.create!(name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0)
- projects.create!(name: 'quz', path: 'quz', namespace_id: group.id, storage_version: nil)
-
- expect(described_class.on_hashed_storage.pluck(:id)).to match_array(hashed_projects.map(&:id))
+ expect(described_class.on_hashed_storage.pluck(:id)).to match_array([1, 2])
end
end
describe '.without_project_repository' do
- it 'finds projects which do not have a projects_repositories row' do
- without_project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id)
- with_project = projects.create!(name: 'bar', path: 'bar', namespace_id: group.id)
- project_repositories.create!(project_id: with_project.id, disk_path: '@phony/foo/bar', shard_id: default_shard.id)
+ it 'finds projects which do not have a projects_repositories entry' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id)
+ projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id)
+ project_repositories.create!(project_id: 2, disk_path: '@phony/foo/bar', shard_id: shard.id)
- expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(without_project.id)
- end
- end
-
- describe '#project_repository_attributes' do
- let(:shard_finder) { Gitlab::BackgroundMigration::BackfillHashedProjectRepositories::ShardFinder.new }
-
- it 'composes the correct attributes for project_repository' do
- shiny_shard = shards.create!(name: 'shiny')
- project = projects.create!(id: 5, name: 'foo', path: 'foo', namespace_id: group.id, repository_storage: shiny_shard.name, storage_version: 1)
-
- expected_attributes = {
- project_id: project.id,
- shard_id: shiny_shard.id,
- disk_path: '@hashed/ef/2d/ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d'
- }
-
- expect(described_class.find(project.id).project_repository_attributes(shard_finder)).to eq(expected_attributes)
- end
-
- it 'returns nil for a project not on hashed storage' do
- project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
-
- expect(described_class.find(project.id).project_repository_attributes(shard_finder)).to be_nil
+ expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(1)
end
end
end
describe '#perform' do
- def perform!
- described_class.new.perform(1, projects.last.id)
- end
-
- it 'create project_repository row for hashed storage project' do
- projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+ it 'creates a project_repository row for projects on hashed storage that need one' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+ projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
- expect do
- perform!
- end.to change(project_repositories, :count).by(1)
+ expect { described_class.new.perform(1, projects.last.id) }.to change(project_repositories, :count).by(2)
end
- it 'does nothing for projects that have already a project_repository' do
- project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
- project_repositories.create!(project_id: project.id, disk_path: '@phony/foo/bar', shard_id: default_shard.id)
+ it 'does nothing for projects on hashed storage that have already a project_repository row' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+ project_repositories.create!(project_id: 1, disk_path: '@phony/foo/bar', shard_id: shard.id)
- expect do
- perform!
- end.not_to change(project_repositories, :count)
+ expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
end
it 'does nothing for projects on legacy storage' do
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
- expect do
- perform!
- end.not_to change(project_repositories, :count)
+ expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
end
it 'inserts rows in a single query' do
- projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
- control_count = ActiveRecord::QueryRecorder.new do
- perform!
- end
+ control_count = ActiveRecord::QueryRecorder.new { described_class.new.perform(1, projects.last.id) }
- projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
- projects.create!(name: 'quz', path: 'quz', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
+ projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
+ projects.create!(name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
- expect { perform! }.not_to exceed_query_limit(control_count)
+ expect { described_class.new.perform(1, projects.last.id) }.not_to exceed_query_limit(control_count)
end
end
end