Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-12-08 02:16:44 +0300
committerStan Hu <stanhu@gmail.com>2018-12-08 02:16:44 +0300
commit9655a602ac0d186e10c44f7b6bcdfc0f14ac7b6a (patch)
treee61c2ada5b2ad79e5607bb241df5a376c3238a84
parenta27ba8edfa6f41de135cf6f1573cc5366440f7b5 (diff)
parent8c9e692095afb59111c73781c8ee501cb4cb2459 (diff)
Merge branch 'tc-backfill-hashed-project_repositories' into 'master'
Fill project_repositories for hashed storage projects Closes #48527 See merge request gitlab-org/gitlab-ce!23482
-rw-r--r--changelogs/unreleased/tc-backfill-hashed-project_repositories.yml5
-rw-r--r--db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb26
-rw-r--r--lib/gitlab/background_migration/backfill_hashed_project_repositories.rb134
-rw-r--r--spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb90
4 files changed, 255 insertions, 0 deletions
diff --git a/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml b/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml
new file mode 100644
index 00000000000..90a5c8c4e2c
--- /dev/null
+++ b/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml
@@ -0,0 +1,5 @@
+---
+title: Fill project_repositories for hashed storage projects
+merge_request: 23482
+author:
+type: added
diff --git a/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
new file mode 100644
index 00000000000..7814cdba58a
--- /dev/null
+++ b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+class BackfillHashedProjectRepositories < ActiveRecord::Migration[4.2]
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ BATCH_SIZE = 1_000
+ DELAY_INTERVAL = 5.minutes
+ MIGRATION = 'BackfillHashedProjectRepositories'
+
+ disable_ddl_transaction!
+
+ class Project < ActiveRecord::Base
+ include EachBatch
+
+ self.table_name = 'projects'
+ end
+
+ def up
+ queue_background_migration_jobs_by_range_at_intervals(Project, MIGRATION, DELAY_INTERVAL)
+ end
+
+ def down
+ # no-op: since there could have been existing rows before the migration do not remove anything
+ end
+end
diff --git a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
new file mode 100644
index 00000000000..2f76f2f7434
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
@@ -0,0 +1,134 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Class that will create fill the project_repositories table
+ # for all projects that are on hashed storage and an entry is
+ # is missing in this table.
+ class BackfillHashedProjectRepositories
+ # Shard model
+ class Shard < ActiveRecord::Base
+ self.table_name = 'shards'
+ end
+
+ # Class that will find or create the shard by name.
+ # There is only a small set of shards, which would
+ # not change quickly, so look them up from memory
+ # instead of hitting the DB each time.
+ class ShardFinder
+ def find_shard_id(name)
+ shard_id = shards.fetch(name, nil)
+ return shard_id if shard_id.present?
+
+ Shard.transaction(requires_new: true) do
+ create!(name)
+ end
+ rescue ActiveRecord::RecordNotUnique
+ reload!
+ retry
+ end
+
+ private
+
+ def create!(name)
+ Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
+ end
+
+ def shards
+ @shards ||= reload!
+ end
+
+ def reload!
+ @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten]
+ end
+ end
+
+ # ProjectRegistry model
+ class ProjectRepository < ActiveRecord::Base
+ self.table_name = 'project_repositories'
+
+ belongs_to :project, inverse_of: :project_repository
+ end
+
+ # Project model
+ class Project < ActiveRecord::Base
+ self.table_name = 'projects'
+
+ HASHED_PATH_PREFIX = '@hashed'
+
+ HASHED_STORAGE_FEATURES = {
+ repository: 1,
+ attachments: 2
+ }.freeze
+
+ has_one :project_repository, inverse_of: :project
+
+ class << self
+ def on_hashed_storage
+ where(Project.arel_table[:storage_version]
+ .gteq(HASHED_STORAGE_FEATURES[:repository]))
+ end
+
+ def without_project_repository
+ joins(left_outer_join_project_repository)
+ .where(ProjectRepository.arel_table[:project_id].eq(nil))
+ end
+
+ def left_outer_join_project_repository
+ projects_table = Project.arel_table
+ repository_table = ProjectRepository.arel_table
+
+ projects_table
+ .join(repository_table, Arel::Nodes::OuterJoin)
+ .on(projects_table[:id].eq(repository_table[:project_id]))
+ .join_sources
+ end
+ end
+
+ def hashed_storage?
+ self.storage_version && self.storage_version >= 1
+ end
+
+ def hashed_disk_path
+ "#{HASHED_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
+ end
+
+ def disk_hash
+ @disk_hash ||= Digest::SHA2.hexdigest(id.to_s)
+ end
+ end
+
+ def perform(start_id, stop_id)
+ Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id))
+ end
+
+ private
+
+ def project_repositories(start_id, stop_id)
+ Project.on_hashed_storage
+ .without_project_repository
+ .where(id: start_id..stop_id)
+ .map { |project| build_attributes_for_project(project) }
+ .compact
+ end
+
+ def build_attributes_for_project(project)
+ return unless project.hashed_storage?
+
+ {
+ project_id: project.id,
+ shard_id: find_shard_id(project.repository_storage),
+ disk_path: project.hashed_disk_path
+ }
+ end
+
+ def find_shard_id(repository_storage)
+ shard_finder.find_shard_id(repository_storage)
+ end
+
+ def shard_finder
+ @shard_finder ||= ShardFinder.new
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
new file mode 100644
index 00000000000..b6c1edbbf8b
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do
+ let(:namespaces) { table(:namespaces) }
+ let(:project_repositories) { table(:project_repositories) }
+ let(:projects) { table(:projects) }
+ let(:shards) { table(:shards) }
+ let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
+ let(:shard) { shards.create!(name: 'default') }
+
+ describe described_class::ShardFinder do
+ describe '#find_shard_id' do
+ it 'creates a new shard when it does not exist yet' do
+ expect { subject.find_shard_id('other') }.to change(shards, :count).by(1)
+ end
+
+ it 'returns the shard when it exists' do
+ shards.create(id: 5, name: 'other')
+
+ shard_id = subject.find_shard_id('other')
+
+ expect(shard_id).to eq(5)
+ end
+
+ it 'only queries the database once to retrieve shards' do
+ subject.find_shard_id('default')
+
+ expect { subject.find_shard_id('default') }.not_to exceed_query_limit(0)
+ end
+ end
+ end
+
+ describe described_class::Project do
+ describe '.on_hashed_storage' do
+ it 'finds projects with repository on hashed storage' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+ projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
+ projects.create!(id: 3, name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0)
+ projects.create!(id: 4, name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: nil)
+
+ expect(described_class.on_hashed_storage.pluck(:id)).to match_array([1, 2])
+ end
+ end
+
+ describe '.without_project_repository' do
+ it 'finds projects which do not have a projects_repositories entry' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id)
+ projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id)
+ project_repositories.create!(project_id: 2, disk_path: '@phony/foo/bar', shard_id: shard.id)
+
+ expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(1)
+ end
+ end
+ end
+
+ describe '#perform' do
+ it 'creates a project_repository row for projects on hashed storage that need one' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+ projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
+
+ expect { described_class.new.perform(1, projects.last.id) }.to change(project_repositories, :count).by(2)
+ end
+
+ it 'does nothing for projects on hashed storage that have already a project_repository row' do
+ projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+ project_repositories.create!(project_id: 1, disk_path: '@phony/foo/bar', shard_id: shard.id)
+
+ expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
+ end
+
+ it 'does nothing for projects on legacy storage' do
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
+
+ expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
+ end
+
+ it 'inserts rows in a single query' do
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
+
+ control_count = ActiveRecord::QueryRecorder.new { described_class.new.perform(1, projects.last.id) }
+
+ projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
+ projects.create!(name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
+
+ expect { described_class.new.perform(1, projects.last.id) }.not_to exceed_query_limit(control_count)
+ end
+ end
+end