From 859a6fb938bb9ee2a317c46dfa4fcc1af49608f0 Mon Sep 17 00:00:00 2001 From: GitLab Bot Date: Thu, 18 Feb 2021 10:34:06 +0000 Subject: Add latest changes from gitlab-org/gitlab@13-9-stable-ee --- app/services/pages/delete_service.rb | 8 ++- .../pages/migrate_from_legacy_storage_service.rb | 79 ++++++++++++++++++++++ ...migrate_legacy_storage_to_deployment_service.rb | 5 +- app/services/pages/zip_directory_service.rb | 53 +++++++-------- 4 files changed, 114 insertions(+), 31 deletions(-) create mode 100644 app/services/pages/migrate_from_legacy_storage_service.rb (limited to 'app/services/pages') diff --git a/app/services/pages/delete_service.rb b/app/services/pages/delete_service.rb index fc5d01a93a1..3dc9254718e 100644 --- a/app/services/pages/delete_service.rb +++ b/app/services/pages/delete_service.rb @@ -3,7 +3,13 @@ module Pages class DeleteService < BaseService def execute - PagesRemoveWorker.perform_async(project.id) + project.mark_pages_as_not_deployed # prevents domain from updating config when deleted + project.pages_domains.delete_all + + DestroyPagesDeploymentsWorker.perform_async(project.id) + + # TODO: remove this call https://gitlab.com/gitlab-org/gitlab/-/issues/320775 + PagesRemoveWorker.perform_async(project.id) if Feature.enabled?(:pages_update_legacy_storage, default_enabled: true) end end end diff --git a/app/services/pages/migrate_from_legacy_storage_service.rb b/app/services/pages/migrate_from_legacy_storage_service.rb new file mode 100644 index 00000000000..9b36b3f11b4 --- /dev/null +++ b/app/services/pages/migrate_from_legacy_storage_service.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +module Pages + class MigrateFromLegacyStorageService + def initialize(logger, migration_threads:, batch_size:, ignore_invalid_entries:) + @logger = logger + @migration_threads = migration_threads + @batch_size = batch_size + @ignore_invalid_entries = ignore_invalid_entries + + @migrated = 0 + @errored = 0 + @counters_lock = Mutex.new + end + + def execute + @queue = SizedQueue.new(1) + + threads = start_migration_threads + + ProjectPagesMetadatum.only_on_legacy_storage.each_batch(of: @batch_size) do |batch| + @queue.push(batch) + end + + @queue.close + + @logger.info("Waiting for threads to finish...") + threads.each(&:join) + + { migrated: @migrated, errored: @errored } + end + + def start_migration_threads + Array.new(@migration_threads) do + Thread.new do + while batch = @queue.pop + Rails.application.executor.wrap do + process_batch(batch) + end + end + end + end + end + + def process_batch(batch) + batch.with_project_route_and_deployment.each do |metadatum| + project = metadatum.project + + migrate_project(project) + end + + @logger.info("#{@migrated} projects are migrated successfully, #{@errored} projects failed to be migrated") + rescue => e + # This method should never raise exception otherwise all threads might be killed + # and this will result in queue starving (and deadlock) + Gitlab::ErrorTracking.track_exception(e) + @logger.error("failed processing a batch: #{e.message}") + end + + def migrate_project(project) + result = nil + time = Benchmark.realtime do + result = ::Pages::MigrateLegacyStorageToDeploymentService.new(project, ignore_invalid_entries: @ignore_invalid_entries).execute + end + + if result[:status] == :success + @logger.info("project_id: #{project.id} #{project.pages_path} has been migrated in #{time.round(2)} seconds") + @counters_lock.synchronize { @migrated += 1 } + else + @logger.error("project_id: #{project.id} #{project.pages_path} failed to be migrated in #{time.round(2)} seconds: #{result[:message]}") + @counters_lock.synchronize { @errored += 1 } + end + rescue => e + @counters_lock.synchronize { @errored += 1 } + @logger.error("project_id: #{project&.id} #{project&.pages_path} failed to be migrated: #{e.message}") + Gitlab::ErrorTracking.track_exception(e, project_id: project&.id) + end + end +end diff --git a/app/services/pages/migrate_legacy_storage_to_deployment_service.rb b/app/services/pages/migrate_legacy_storage_to_deployment_service.rb index dac994b2ccc..63410b9fe4a 100644 --- a/app/services/pages/migrate_legacy_storage_to_deployment_service.rb +++ b/app/services/pages/migrate_legacy_storage_to_deployment_service.rb @@ -9,8 +9,9 @@ module Pages attr_reader :project - def initialize(project) + def initialize(project, ignore_invalid_entries: false) @project = project + @ignore_invalid_entries = ignore_invalid_entries end def execute @@ -26,7 +27,7 @@ module Pages private def execute_unsafe - zip_result = ::Pages::ZipDirectoryService.new(project.pages_path).execute + zip_result = ::Pages::ZipDirectoryService.new(project.pages_path, ignore_invalid_entries: @ignore_invalid_entries).execute if zip_result[:status] == :error if !project.pages_metadatum&.reload&.pages_deployment && diff --git a/app/services/pages/zip_directory_service.rb b/app/services/pages/zip_directory_service.rb index ba7a8571e88..ae08d40ee37 100644 --- a/app/services/pages/zip_directory_service.rb +++ b/app/services/pages/zip_directory_service.rb @@ -10,12 +10,17 @@ module Pages PUBLIC_DIR = 'public' - def initialize(input_dir) + attr_reader :public_dir, :real_dir + + def initialize(input_dir, ignore_invalid_entries: false) @input_dir = input_dir + @ignore_invalid_entries = ignore_invalid_entries end def execute - return error("Can not find valid public dir in #{@input_dir}") unless valid_path?(public_dir) + unless resolve_public_dir + return error("Can not find valid public dir in #{@input_dir}") + end output_file = File.join(real_dir, "@migrated.zip") # '@' to avoid any name collision with groups or projects @@ -35,24 +40,36 @@ module Pages private + def resolve_public_dir + @real_dir = File.realpath(@input_dir) + @public_dir = File.join(real_dir, PUBLIC_DIR) + + valid_path?(public_dir) + rescue Errno::ENOENT + false + end + def write_entry(zipfile, zipfile_path) disk_file_path = File.join(real_dir, zipfile_path) unless valid_path?(disk_file_path) # archive with invalid entry will just have this entry missing - raise InvalidEntryError + raise InvalidEntryError, "#{disk_file_path} is invalid, input_dir: #{@input_dir}" end - case File.lstat(disk_file_path).ftype + ftype = File.lstat(disk_file_path).ftype + case ftype when 'directory' recursively_zip_directory(zipfile, disk_file_path, zipfile_path) when 'file', 'link' zipfile.add(zipfile_path, disk_file_path) else - raise InvalidEntryError + raise InvalidEntryError, "#{disk_file_path} has invalid ftype: #{ftype}, input_dir: #{@input_dir}" end - rescue InvalidEntryError => e + rescue Errno::ENOENT, Errno::ELOOP, InvalidEntryError => e Gitlab::ErrorTracking.track_exception(e, input_dir: @input_dir, disk_file_path: disk_file_path) + + raise e unless @ignore_invalid_entries end def recursively_zip_directory(zipfile, disk_file_path, zipfile_path) @@ -70,31 +87,11 @@ module Pages end end - # that should never happen, but we want to be safer - # in theory without this we would allow to use symlinks - # to pack any directory on disk - # it isn't possible because SafeZip doesn't extract such archives + # SafeZip was introduced only recently, + # so we have invalid entries on disk def valid_path?(disk_file_path) realpath = File.realpath(disk_file_path) - realpath == public_dir || realpath.start_with?(public_dir + "/") - # happens if target of symlink isn't there - rescue => e - Gitlab::ErrorTracking.track_exception(e, input_dir: real_dir, disk_file_path: disk_file_path) - - false - end - - def real_dir - strong_memoize(:real_dir) do - File.realpath(@input_dir) rescue nil - end - end - - def public_dir - strong_memoize(:public_dir) do - File.join(real_dir, PUBLIC_DIR) rescue nil - end end end end -- cgit v1.2.3