diff options
16 files changed, 147 insertions, 98 deletions
diff --git a/app/services/projects/hashed_storage/migrate_attachments_service.rb b/app/services/projects/hashed_storage/migrate_attachments_service.rb index a1f0302aeb7..7476980a117 100644 --- a/app/services/projects/hashed_storage/migrate_attachments_service.rb +++ b/app/services/projects/hashed_storage/migrate_attachments_service.rb @@ -34,22 +34,22 @@ module Projects private - def move_folder!(old_disk_path, new_disk_path) - unless File.directory?(old_disk_path) - logger.info("Skipped attachments migration from '#{old_disk_path}' to '#{new_disk_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})") - return + def move_folder!(old_path, new_path) + unless File.directory?(old_path) + logger.info("Skipped attachments migration from '#{old_path}' to '#{new_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})") + return true end - if File.exist?(new_disk_path) - logger.error("Cannot migrate attachments from '#{old_disk_path}' to '#{new_disk_path}', target path already exist (PROJECT_ID=#{project.id})") - raise AttachmentMigrationError, "Target path '#{new_disk_path}' already exist" + if File.exist?(new_path) + logger.error("Cannot migrate attachments from '#{old_path}' to '#{new_path}', target path already exist (PROJECT_ID=#{project.id})") + raise AttachmentMigrationError, "Target path '#{new_path}' already exist" end # Create hashed storage base path folder - FileUtils.mkdir_p(File.dirname(new_disk_path)) + FileUtils.mkdir_p(File.dirname(new_path)) - FileUtils.mv(old_disk_path, new_disk_path) - logger.info("Migrated project attachments from '#{old_disk_path}' to '#{new_disk_path}' (PROJECT_ID=#{project.id})") + FileUtils.mv(old_path, new_path) + logger.info("Migrated project attachments from '#{old_path}' to '#{new_path}' (PROJECT_ID=#{project.id})") true end diff --git a/app/services/projects/hashed_storage/migration_service.rb b/app/services/projects/hashed_storage/migration_service.rb index 140c35bdd52..7320f41ebf0 100644 --- a/app/services/projects/hashed_storage/migration_service.rb +++ b/app/services/projects/hashed_storage/migration_service.rb @@ -14,16 +14,26 @@ module Projects def execute # Migrate repository from Legacy to Hashed Storage unless project.hashed_storage?(:repository) - return unless HashedStorage::MigrateRepositoryService.new(project, old_disk_path, logger: logger).execute + return false unless migrate_repository end # Migrate attachments from Legacy to Hashed Storage unless project.hashed_storage?(:attachments) - HashedStorage::MigrateAttachmentsService.new(project, old_disk_path, logger: logger).execute + return false unless migrate_attachments end true end + + private + + def migrate_repository + HashedStorage::MigrateRepositoryService.new(project, old_disk_path, logger: logger).execute + end + + def migrate_attachments + HashedStorage::MigrateAttachmentsService.new(project, old_disk_path, logger: logger).execute + end end end end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 4a306fc7a26..85c123c2704 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -45,6 +45,8 @@ - github_importer:github_import_stage_import_pull_requests - github_importer:github_import_stage_import_repository +- hashed_storage:hashed_storage_migrator + - mail_scheduler:mail_scheduler_issue_due - mail_scheduler:mail_scheduler_notification_service @@ -131,7 +133,6 @@ - repository_fork - repository_import - repository_remove_remote -- storage_migrator - system_hook_push - update_merge_requests - upload_checksum diff --git a/app/workers/hashed_storage/migrator_worker.rb b/app/workers/hashed_storage/migrator_worker.rb new file mode 100644 index 00000000000..49e347d4060 --- /dev/null +++ b/app/workers/hashed_storage/migrator_worker.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module HashedStorage + class MigratorWorker + include ApplicationWorker + + queue_namespace :hashed_storage + + # @param [Integer] start initial ID of the batch + # @param [Integer] finish last ID of the batch + def perform(start, finish) + migrator = Gitlab::HashedStorage::Migrator.new + migrator.bulk_migrate(start: start, finish: finish) + end + end +end diff --git a/app/workers/project_migrate_hashed_storage_worker.rb b/app/workers/project_migrate_hashed_storage_worker.rb index c3cdf102382..1c8f313e6e9 100644 --- a/app/workers/project_migrate_hashed_storage_worker.rb +++ b/app/workers/project_migrate_hashed_storage_worker.rb @@ -4,21 +4,25 @@ class ProjectMigrateHashedStorageWorker include ApplicationWorker LEASE_TIMEOUT = 30.seconds.to_i + LEASE_KEY_SEGMENT = 'project_migrate_hashed_storage_worker'.freeze # rubocop: disable CodeReuse/ActiveRecord def perform(project_id, old_disk_path = nil) - project = Project.find_by(id: project_id) - return if project.nil? || project.pending_delete? - uuid = lease_for(project_id).try_obtain + if uuid - ::Projects::HashedStorage::MigrationService.new(project, old_disk_path || project.full_path, logger: logger).execute + project = Project.find_by(id: project_id) + return if project.nil? || project.pending_delete? + + old_disk_path ||= project.disk_path + + ::Projects::HashedStorage::MigrationService.new(project, old_disk_path, logger: logger).execute else - false + return false end - rescue => ex + + ensure cancel_lease_for(project_id, uuid) if uuid - raise ex end # rubocop: enable CodeReuse/ActiveRecord @@ -29,7 +33,8 @@ class ProjectMigrateHashedStorageWorker private def lease_key(project_id) - "project_migrate_hashed_storage_worker:#{project_id}" + # we share the same lease key for both migration and rollback so they don't run simultaneously + "#{LEASE_KEY_SEGMENT}:#{project_id}" end def cancel_lease_for(project_id, uuid) diff --git a/app/workers/storage_migrator_worker.rb b/app/workers/storage_migrator_worker.rb deleted file mode 100644 index 714c4eed5b2..00000000000 --- a/app/workers/storage_migrator_worker.rb +++ /dev/null @@ -1,16 +0,0 @@ -# frozen_string_literal: true - -class StorageMigratorWorker - include ApplicationWorker - - # @param [Integer] start initial ID of the batch - # @param [Integer] finish last ID of the batch - # @param [String] operation the operation to be performed: ['migrate', 'rollback'] - def perform(start, finish, operation = :migrate) - # when scheduling a job symbols are converted to string, we need to convert back - operation = operation.to_sym if operation - - migrator = Gitlab::HashedStorage::Migrator.new - migrator.bulk_migrate(start: start, finish: finish, operation: operation) - end -end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 5f87d04c697..1e094c03171 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -68,7 +68,7 @@ - [background_migration, 1] - [gcp_cluster, 1] - [project_migrate_hashed_storage, 1] - - [storage_migrator, 1] + - [hashed_storage, 1] - [pages_domain_verification, 1] - [object_storage_upload, 1] - [object_storage, 1] diff --git a/lib/gitlab/hashed_storage/migrator.rb b/lib/gitlab/hashed_storage/migrator.rb index 4794b1978db..bf463077dcc 100644 --- a/lib/gitlab/hashed_storage/migrator.rb +++ b/lib/gitlab/hashed_storage/migrator.rb @@ -13,35 +13,28 @@ module Gitlab # # @param [Integer] start first project id for the range # @param [Integer] finish last project id for the range - # @param [Symbol] operation [:migrate, :rollback] - def bulk_schedule(start:, finish:, operation: :migrate) - StorageMigratorWorker.perform_async(start, finish, operation) + def bulk_schedule(start:, finish:) + ::HashedStorage::MigratorWorker.perform_async(start, finish) end # Start migration of projects from specified range # - # Flagging a project to be migrated is a synchronous action, + # Flagging a project to be migrated is a synchronous action # but the migration runs through async jobs # # @param [Integer] start first project id for the range # @param [Integer] finish last project id for the range - # @param [Symbol] operation [:migrate, :rollback] # rubocop: disable CodeReuse/ActiveRecord - def bulk_migrate(start:, finish:, operation: :migrate) + def bulk_migrate(start:, finish:) projects = build_relation(start, finish) projects.with_route.find_each(batch_size: BATCH_SIZE) do |project| - case operation - when :migrate - migrate(project) - when :rollback - rollback(project) - end + migrate(project) end end # rubocop: enable CodeReuse/ActiveRecord - # Flag a project to be migrated + # Flag a project to be migrated to Hashed Storage # # @param [Project] project that will be migrated def migrate(project) diff --git a/lib/tasks/gitlab/storage.rake b/lib/tasks/gitlab/storage.rake index 8212be90b97..f9ce3e1d338 100644 --- a/lib/tasks/gitlab/storage.rake +++ b/lib/tasks/gitlab/storage.rake @@ -37,7 +37,7 @@ namespace :gitlab do print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{helper.batch_size}" helper.project_id_batches do |start, finish| - storage_migrator.bulk_schedule(start: start, finish: finish, operation: :migrate) + storage_migrator.bulk_schedule(start: start, finish: finish) print '.' end diff --git a/spec/lib/gitlab/hashed_storage/migrator_spec.rb b/spec/lib/gitlab/hashed_storage/migrator_spec.rb index 6d68cb0744b..3942f168ceb 100644 --- a/spec/lib/gitlab/hashed_storage/migrator_spec.rb +++ b/spec/lib/gitlab/hashed_storage/migrator_spec.rb @@ -4,7 +4,7 @@ describe Gitlab::HashedStorage::Migrator do describe '#bulk_schedule' do it 'schedules job to StorageMigratorWorker' do Sidekiq::Testing.fake! do - expect { subject.bulk_schedule(start: 1, finish: 5) }.to change(StorageMigratorWorker.jobs, :size).by(1) + expect { subject.bulk_schedule(start: 1, finish: 5) }.to change(HashedStorage::MigratorWorker.jobs, :size).by(1) end end end @@ -46,7 +46,7 @@ describe Gitlab::HashedStorage::Migrator do describe '#migrate' do let(:project) { create(:project, :legacy_storage, :empty_repo) } - it 'enqueues job to ProjectMigrateHashedStorageWorker' do + it 'enqueues project migration job' do Sidekiq::Testing.fake! do expect { subject.migrate(project) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1) end @@ -58,7 +58,7 @@ describe Gitlab::HashedStorage::Migrator do expect { subject.migrate(project) }.not_to raise_error end - it 'migrate project' do + it 'migrates project storage' do perform_enqueued_jobs do subject.migrate(project) end @@ -73,5 +73,19 @@ describe Gitlab::HashedStorage::Migrator do expect(project.reload.repository_read_only?).to be_falsey end + + context 'when project is already on hashed storage' do + let(:project) { create(:project, :empty_repo) } + + it 'doesnt enqueue any migration job' do + Sidekiq::Testing.fake! do + expect { subject.migrate(project) }.not_to change(ProjectMigrateHashedStorageWorker.jobs, :size) + end + end + + it 'returns false' do + expect(subject.migrate(project)).to be_falsey + end + end end end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 437f0066450..4b061b5e24f 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -3224,7 +3224,7 @@ describe Project do end context 'legacy storage' do - let(:project) { create(:project, :repository, :legacy_storage) } + set(:project) { create(:project, :repository, :legacy_storage) } let(:gitlab_shell) { Gitlab::Shell.new } let(:project_storage) { project.send(:storage) } @@ -3279,13 +3279,14 @@ describe Project do end describe '#migrate_to_hashed_storage!' do + let(:project) { create(:project, :empty_repo, :legacy_storage) } + it 'returns true' do expect(project.migrate_to_hashed_storage!).to be_truthy end - it 'does not validate project visibility' do - expect(project).not_to receive(:visibility_level_allowed_as_fork) - expect(project).not_to receive(:visibility_level_allowed_by_group) + it 'does not run validation' do + expect(project).not_to receive(:valid?) project.migrate_to_hashed_storage! end @@ -3315,7 +3316,7 @@ describe Project do end context 'hashed storage' do - let(:project) { create(:project, :repository, skip_disk_validation: true) } + set(:project) { create(:project, :repository, skip_disk_validation: true) } let(:gitlab_shell) { Gitlab::Shell.new } let(:hash) { Digest::SHA2.hexdigest(project.id.to_s) } let(:hashed_prefix) { File.join('@hashed', hash[0..1], hash[2..3]) } @@ -3372,6 +3373,8 @@ describe Project do end describe '#migrate_to_hashed_storage!' do + let(:project) { create(:project, :repository, skip_disk_validation: true) } + it 'returns nil' do expect(project.migrate_to_hashed_storage!).to be_nil end @@ -3381,10 +3384,12 @@ describe Project do end context 'when partially migrated' do - it 'returns true' do + it 'enqueues a job' do project = create(:project, storage_version: 1, skip_disk_validation: true) - expect(project.migrate_to_hashed_storage!).to be_truthy + Sidekiq::Testing.fake! do + expect { project.migrate_to_hashed_storage! }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1) + end end end end diff --git a/spec/services/projects/hashed_storage/migrate_attachments_service_spec.rb b/spec/services/projects/hashed_storage/migrate_attachments_service_spec.rb index 28d8a95fe07..bef12df583e 100644 --- a/spec/services/projects/hashed_storage/migrate_attachments_service_spec.rb +++ b/spec/services/projects/hashed_storage/migrate_attachments_service_spec.rb @@ -3,7 +3,7 @@ require 'spec_helper' describe Projects::HashedStorage::MigrateAttachmentsService do subject(:service) { described_class.new(project, project.full_path, logger: nil) } - let(:project) { create(:project, :legacy_storage) } + let(:project) { create(:project, :repository, storage_version: 1, skip_disk_validation: true) } let(:legacy_storage) { Storage::LegacyProject.new(project) } let(:hashed_storage) { Storage::HashedProject.new(project) } @@ -28,6 +28,10 @@ describe Projects::HashedStorage::MigrateAttachmentsService do expect(File.file?(old_disk_path)).to be_falsey expect(File.file?(new_disk_path)).to be_truthy end + + it 'returns true' do + expect(service.execute).to be_truthy + end end context 'when original folder does not exist anymore' do @@ -43,6 +47,12 @@ describe Projects::HashedStorage::MigrateAttachmentsService do expect(File.exist?(base_path(hashed_storage))).to be_falsey expect(File.file?(new_disk_path)).to be_falsey end + + it 'returns true' do + expect(FileUtils).not_to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage)) + + expect(service.execute).to be_truthy + end end context 'when target folder already exists' do @@ -58,6 +68,18 @@ describe Projects::HashedStorage::MigrateAttachmentsService do end end + context '#old_disk_path' do + it 'returns old disk_path for project' do + expect(service.old_disk_path).to eq(project.full_path) + end + end + + context '#new_disk_path' do + it 'returns new disk_path for project' do + expect(service.new_disk_path).to eq(project.disk_path) + end + end + def base_path(storage) File.join(FileUploader.root, storage.disk_path) end diff --git a/spec/services/projects/hashed_storage/migrate_repository_service_spec.rb b/spec/services/projects/hashed_storage/migrate_repository_service_spec.rb index b720f37ffdb..0772dc4b85b 100644 --- a/spec/services/projects/hashed_storage/migrate_repository_service_spec.rb +++ b/spec/services/projects/hashed_storage/migrate_repository_service_spec.rb @@ -8,9 +8,12 @@ describe Projects::HashedStorage::MigrateRepositoryService do let(:legacy_storage) { Storage::LegacyProject.new(project) } let(:hashed_storage) { Storage::HashedProject.new(project) } - subject(:service) { described_class.new(project, project.full_path) } + subject(:service) { described_class.new(project, project.disk_path) } describe '#execute' do + let(:old_disk_path) { legacy_storage.disk_path } + let(:new_disk_path) { hashed_storage.disk_path } + before do allow(service).to receive(:gitlab_shell) { gitlab_shell } end @@ -33,8 +36,8 @@ describe Projects::HashedStorage::MigrateRepositoryService do it 'renames project and wiki repositories' do service.execute - expect(gitlab_shell.exists?(project.repository_storage, "#{hashed_storage.disk_path}.git")).to be_truthy - expect(gitlab_shell.exists?(project.repository_storage, "#{hashed_storage.disk_path}.wiki.git")).to be_truthy + expect(gitlab_shell.exists?(project.repository_storage, "#{new_disk_path}.git")).to be_truthy + expect(gitlab_shell.exists?(project.repository_storage, "#{new_disk_path}.wiki.git")).to be_truthy end it 'updates project to be hashed and not read-only' do @@ -45,8 +48,8 @@ describe Projects::HashedStorage::MigrateRepositoryService do end it 'move operation is called for both repositories' do - expect_move_repository(project.disk_path, hashed_storage.disk_path) - expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki") + expect_move_repository(old_disk_path, new_disk_path) + expect_move_repository("#{old_disk_path}.wiki", "#{new_disk_path}.wiki") service.execute end @@ -62,32 +65,27 @@ describe Projects::HashedStorage::MigrateRepositoryService do context 'when one move fails' do it 'rollsback repositories to original name' do - from_name = project.disk_path - to_name = hashed_storage.disk_path allow(service).to receive(:move_repository).and_call_original - allow(service).to receive(:move_repository).with(from_name, to_name).once { false } # will disable first move only + allow(service).to receive(:move_repository).with(old_disk_path, new_disk_path).once { false } # will disable first move only expect(service).to receive(:rollback_folder_move).and_call_original service.execute - expect(gitlab_shell.exists?(project.repository_storage, "#{hashed_storage.disk_path}.git")).to be_falsey - expect(gitlab_shell.exists?(project.repository_storage, "#{hashed_storage.disk_path}.wiki.git")).to be_falsey + expect(gitlab_shell.exists?(project.repository_storage, "#{new_disk_path}.git")).to be_falsey + expect(gitlab_shell.exists?(project.repository_storage, "#{new_disk_path}.wiki.git")).to be_falsey expect(project.repository_read_only?).to be_falsey end context 'when rollback fails' do - let(:from_name) { legacy_storage.disk_path } - let(:to_name) { hashed_storage.disk_path } - before do hashed_storage.ensure_storage_path_exists - gitlab_shell.mv_repository(project.repository_storage, from_name, to_name) + gitlab_shell.mv_repository(project.repository_storage, old_disk_path, new_disk_path) end - it 'does not try to move nil repository over hashed' do - expect(gitlab_shell).not_to receive(:mv_repository).with(project.repository_storage, from_name, to_name) - expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki") + it 'does not try to move nil repository over existing' do + expect(gitlab_shell).not_to receive(:mv_repository).with(project.repository_storage, old_disk_path, new_disk_path) + expect_move_repository("#{old_disk_path}.wiki", "#{new_disk_path}.wiki") service.execute end diff --git a/spec/tasks/gitlab/storage_rake_spec.rb b/spec/tasks/gitlab/storage_rake_spec.rb index 3b8d5ad7015..6b50670c3c0 100644 --- a/spec/tasks/gitlab/storage_rake_spec.rb +++ b/spec/tasks/gitlab/storage_rake_spec.rb @@ -58,7 +58,7 @@ describe 'rake gitlab:storage:*' do context '0 legacy projects' do it 'does nothing' do - expect(StorageMigratorWorker).not_to receive(:perform_async) + expect(::HashedStorage::MigratorWorker).not_to receive(:perform_async) run_rake_task(task) end @@ -72,9 +72,9 @@ describe 'rake gitlab:storage:*' do stub_env('BATCH' => 1) end - it 'enqueues one StorageMigratorWorker per project' do + it 'enqueues one HashedStorage::MigratorWorker per project' do projects.each do |project| - expect(StorageMigratorWorker).to receive(:perform_async).with(project.id, project.id, :migrate) + expect(::HashedStorage::MigratorWorker).to receive(:perform_async).with(project.id, project.id) end run_rake_task(task) @@ -86,10 +86,10 @@ describe 'rake gitlab:storage:*' do stub_env('BATCH' => 2) end - it 'enqueues one StorageMigratorWorker per 2 projects' do + it 'enqueues one HashedStorage::MigratorWorker per 2 projects' do projects.map(&:id).sort.each_slice(2) do |first, last| last ||= first - expect(StorageMigratorWorker).to receive(:perform_async).with(first, last, :migrate) + expect(::HashedStorage::MigratorWorker).to receive(:perform_async).with(first, last) end run_rake_task(task) diff --git a/spec/workers/storage_migrator_worker_spec.rb b/spec/workers/hashed_storage/migrator_worker_spec.rb index 63f3c1d1c59..a85f820a3eb 100644 --- a/spec/workers/storage_migrator_worker_spec.rb +++ b/spec/workers/hashed_storage/migrator_worker_spec.rb @@ -1,13 +1,13 @@ require 'spec_helper' -describe StorageMigratorWorker do +describe HashedStorage::MigratorWorker do subject(:worker) { described_class.new } let(:projects) { create_list(:project, 2, :legacy_storage, :empty_repo) } let(:ids) { projects.map(&:id) } describe '#perform' do it 'delegates to MigratorService' do - expect_any_instance_of(Gitlab::HashedStorage::Migrator).to receive(:bulk_migrate).with(start: 5, finish: 10, operation: :migrate) + expect_any_instance_of(Gitlab::HashedStorage::Migrator).to receive(:bulk_migrate).with(start: 5, finish: 10) worker.perform(5, 10) end diff --git a/spec/workers/project_migrate_hashed_storage_worker_spec.rb b/spec/workers/project_migrate_hashed_storage_worker_spec.rb index 18470923316..333eb6a0569 100644 --- a/spec/workers/project_migrate_hashed_storage_worker_spec.rb +++ b/spec/workers/project_migrate_hashed_storage_worker_spec.rb @@ -4,12 +4,13 @@ describe ProjectMigrateHashedStorageWorker, :clean_gitlab_redis_shared_state do include ExclusiveLeaseHelpers describe '#perform' do - let(:project) { create(:project, :empty_repo) } + let(:project) { create(:project, :empty_repo, :legacy_storage) } let(:lease_key) { "project_migrate_hashed_storage_worker:#{project.id}" } - let(:lease_timeout) { ProjectMigrateHashedStorageWorker::LEASE_TIMEOUT } + let(:lease_timeout) { described_class::LEASE_TIMEOUT } + let(:migration_service) { ::Projects::HashedStorage::MigrationService } it 'skips when project no longer exists' do - expect(::Projects::HashedStorage::MigrationService).not_to receive(:new) + expect(migration_service).not_to receive(:new) subject.perform(-1) end @@ -17,29 +18,29 @@ describe ProjectMigrateHashedStorageWorker, :clean_gitlab_redis_shared_state do it 'skips when project is pending delete' do pending_delete_project = create(:project, :empty_repo, pending_delete: true) - expect(::Projects::HashedStorage::MigrationService).not_to receive(:new) + expect(migration_service).not_to receive(:new) subject.perform(pending_delete_project.id) end - it 'delegates removal to service class when have exclusive lease' do + it 'delegates migration to service class when we have exclusive lease' do stub_exclusive_lease(lease_key, 'uuid', timeout: lease_timeout) - migration_service = spy + service_spy = spy - allow(::Projects::HashedStorage::MigrationService) + allow(migration_service) .to receive(:new).with(project, project.full_path, logger: subject.logger) - .and_return(migration_service) + .and_return(service_spy) subject.perform(project.id) - expect(migration_service).to have_received(:execute) + expect(service_spy).to have_received(:execute) end - it 'skips when dont have lease when dont have exclusive lease' do + it 'skips when it cant acquire the exclusive lease' do stub_exclusive_lease_taken(lease_key, timeout: lease_timeout) - expect(::Projects::HashedStorage::MigrationService).not_to receive(:new) + expect(migration_service).not_to receive(:new) subject.perform(project.id) end |