diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2022-03-18 23:02:30 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2022-03-18 23:02:30 +0300 |
commit | 41fe97390ceddf945f3d967b8fdb3de4c66b7dea (patch) | |
tree | 9c8d89a8624828992f06d892cd2f43818ff5dcc8 /lib/gitlab/import_export | |
parent | 0804d2dc31052fb45a1efecedc8e06ce9bc32862 (diff) |
Add latest changes from gitlab-org/gitlab@14-9-stable-eev14.9.0-rc42
Diffstat (limited to 'lib/gitlab/import_export')
-rw-r--r-- | lib/gitlab/import_export/base/relation_factory.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/import_export/base/relation_object_saver.rb | 109 | ||||
-rw-r--r-- | lib/gitlab/import_export/command_line_util.rb | 29 | ||||
-rw-r--r-- | lib/gitlab/import_export/file_importer.rb | 12 | ||||
-rw-r--r-- | lib/gitlab/import_export/group/object_builder.rb | 9 | ||||
-rw-r--r-- | lib/gitlab/import_export/group/relation_tree_restorer.rb | 24 | ||||
-rw-r--r-- | lib/gitlab/import_export/json/streaming_serializer.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/import_export/project/import_export.yml | 1 |
8 files changed, 163 insertions, 25 deletions
diff --git a/lib/gitlab/import_export/base/relation_factory.rb b/lib/gitlab/import_export/base/relation_factory.rb index 8a8c74c302d..53dd6f8cd55 100644 --- a/lib/gitlab/import_export/base/relation_factory.rb +++ b/lib/gitlab/import_export/base/relation_factory.rb @@ -300,7 +300,7 @@ module Gitlab return cache[table_name] if cache.has_key?(table_name) index_exists = - ActiveRecord::Base.connection.index_exists?( + relation_class.connection.index_exists?( relation_class.table_name, importable_foreign_key, unique: true) diff --git a/lib/gitlab/import_export/base/relation_object_saver.rb b/lib/gitlab/import_export/base/relation_object_saver.rb new file mode 100644 index 00000000000..d0fae2cbb95 --- /dev/null +++ b/lib/gitlab/import_export/base/relation_object_saver.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +# RelationObjectSaver allows for an alternative approach to persisting +# objects during Project/Group Import which persists object's +# nested collection subrelations separately, in batches. +# +# Instead of the regular `relation_object.save!` that opens one db +# transaction for the object itself and all of its subrelations we +# separate collection subrelations from the object and save them +# in batches in smaller more frequent db transactions. +module Gitlab + module ImportExport + module Base + class RelationObjectSaver + include Gitlab::Utils::StrongMemoize + + BATCH_SIZE = 100 + MIN_RECORDS_SIZE = 5 + + # @param relation_object [Object] Object of a project/group, e.g. an issue + # @param relation_key [String] Name of the object association to group/project, e.g. :issues + # @param relation_definition [Hash] Object subrelations as defined in import_export.yml + # @param importable [Project|Group] Project or group where relation object is getting saved to + # + # @example + # Gitlab::ImportExport::Base::RelationObjectSaver.new( + # relation_key: 'merge_requests', + # relation_object: #<MergeRequest id: root/mrs!1, notes: [#<Note id: nil, note: 'test', ...>, #<Note id: nil, noteL 'another note'>]>, + # relation_definition: {"metrics"=>{}, "award_emoji"=>{}, "notes"=>{"author"=>{}, ... }} + # importable: @importable + # ).execute + def initialize(relation_object:, relation_key:, relation_definition:, importable:) + @relation_object = relation_object + @relation_key = relation_key + @relation_definition = relation_definition + @importable = importable + @invalid_subrelations = [] + end + + def execute + move_subrelations + + relation_object.save! + + save_subrelations + ensure + log_invalid_subrelations + end + + private + + attr_reader :relation_object, :relation_key, :relation_definition, + :importable, :collection_subrelations, :invalid_subrelations + + # rubocop:disable GitlabSecurity/PublicSend + def save_subrelations + collection_subrelations.each_pair do |relation_name, records| + records.each_slice(BATCH_SIZE) do |batch| + valid_records, invalid_records = batch.partition { |record| record.valid? } + + invalid_subrelations << invalid_records + relation_object.public_send(relation_name) << valid_records + end + end + end + + def move_subrelations + strong_memoize(:collection_subrelations) do + relation_definition.each_key.each_with_object({}) do |definition, collection_subrelations| + subrelation = relation_object.public_send(definition) + association = relation_object.class.reflect_on_association(definition) + + if association&.collection? && subrelation.size > MIN_RECORDS_SIZE + collection_subrelations[definition] = subrelation.records + + subrelation.clear + end + end + end + end + # rubocop:enable GitlabSecurity/PublicSend + + def log_invalid_subrelations + invalid_subrelations.flatten.each do |record| + Gitlab::Import::Logger.info( + message: '[Project/Group Import] Invalid subrelation', + importable_column_name => importable.id, + relation_key: relation_key, + error_messages: record.errors.full_messages.to_sentence + ) + + ImportFailure.create( + source: 'RelationObjectSaver#save!', + relation_key: relation_key, + exception_class: 'RecordInvalid', + exception_message: record.errors.full_messages.to_sentence, + correlation_id_value: Labkit::Correlation::CorrelationId.current_or_new_id, + importable_column_name => importable.id + ) + end + end + + def importable_column_name + @column_name ||= importable.class.reflect_on_association(:import_failures).foreign_key.to_sym + end + end + end + end +end diff --git a/lib/gitlab/import_export/command_line_util.rb b/lib/gitlab/import_export/command_line_util.rb index e520cade517..2b0467d8779 100644 --- a/lib/gitlab/import_export/command_line_util.rb +++ b/lib/gitlab/import_export/command_line_util.rb @@ -6,6 +6,8 @@ module Gitlab UNTAR_MASK = 'u+rwX,go+rX,go-w' DEFAULT_DIR_MODE = 0700 + FileOversizedError = Class.new(StandardError) + def tar_czf(archive:, dir:) tar_with_options(archive: archive, dir: dir, options: 'czf') end @@ -51,19 +53,34 @@ module Gitlab private - def download_or_copy_upload(uploader, upload_path) + def download_or_copy_upload(uploader, upload_path, size_limit: nil) if uploader.upload.local? copy_files(uploader.path, upload_path) else - download(uploader.url, upload_path) + download(uploader.url, upload_path, size_limit: size_limit) end end - def download(url, upload_path) - File.open(upload_path, 'w') do |file| - # Download (stream) file from the uploader's location - IO.copy_stream(URI.parse(url).open, file) + def download(url, upload_path, size_limit: nil) + File.open(upload_path, 'wb') do |file| + current_size = 0 + + Gitlab::HTTP.get(url, stream_body: true, allow_object_storage: true) do |fragment| + if [301, 302, 307].include?(fragment.code) + Gitlab::Import::Logger.warn(message: "received redirect fragment", fragment_code: fragment.code) + elsif fragment.code == 200 + current_size += fragment.bytesize + + raise FileOversizedError if size_limit.present? && current_size > size_limit + + file.write(fragment) + else + raise Gitlab::ImportExport::Error, "unsupported response downloading fragment #{fragment.code}" + end + end end + rescue FileOversizedError + nil end def tar_with_options(archive:, dir:, options:) diff --git a/lib/gitlab/import_export/file_importer.rb b/lib/gitlab/import_export/file_importer.rb index 5274fcec43e..829b3771518 100644 --- a/lib/gitlab/import_export/file_importer.rb +++ b/lib/gitlab/import_export/file_importer.rb @@ -72,9 +72,17 @@ module Gitlab import_export_upload = @importable.import_export_upload if import_export_upload.remote_import_url.present? - download(import_export_upload.remote_import_url, @archive_file) + download( + import_export_upload.remote_import_url, + @archive_file, + size_limit: ::Import::GitlabProjects::RemoteFileValidator::FILE_SIZE_LIMIT + ) else - download_or_copy_upload(import_export_upload.import_file, @archive_file) + download_or_copy_upload( + import_export_upload.import_file, + @archive_file, + size_limit: ::Import::GitlabProjects::RemoteFileValidator::FILE_SIZE_LIMIT + ) end end diff --git a/lib/gitlab/import_export/group/object_builder.rb b/lib/gitlab/import_export/group/object_builder.rb index 43cc7a78a61..e26f37c3347 100644 --- a/lib/gitlab/import_export/group/object_builder.rb +++ b/lib/gitlab/import_export/group/object_builder.rb @@ -13,21 +13,12 @@ module Gitlab super @group = @attributes['group'] - - update_description end private attr_reader :group - # Convert description empty string to nil - # due to existing object being saved with description: nil - # Which makes object lookup to fail since nil != '' - def update_description - attributes['description'] = nil if attributes['description'] == '' - end - def where_clauses [ where_clause_base, diff --git a/lib/gitlab/import_export/group/relation_tree_restorer.rb b/lib/gitlab/import_export/group/relation_tree_restorer.rb index c2cbd2fdf47..b44874f598c 100644 --- a/lib/gitlab/import_export/group/relation_tree_restorer.rb +++ b/lib/gitlab/import_export/group/relation_tree_restorer.rb @@ -29,7 +29,7 @@ module Gitlab end def restore - ActiveRecord::Base.uncached do + Gitlab::Database.all_uncached do ActiveRecord::Base.no_touching do update_params! @@ -79,10 +79,7 @@ module Gitlab relation_object.assign_attributes(importable_class_sym => @importable) - import_failure_service.with_retry(action: 'relation_object.save!', relation_key: relation_key, relation_index: relation_index) do - relation_object.save! - log_relation_creation(@importable, relation_key, relation_object) - end + save_relation_object(relation_object, relation_key, relation_definition, relation_index) rescue StandardError => e import_failure_service.log_import_failure( source: 'process_relation_item!', @@ -91,6 +88,23 @@ module Gitlab exception: e) end + def save_relation_object(relation_object, relation_key, relation_definition, relation_index) + if Feature.enabled?(:import_relation_object_persistence, default_enabled: :yaml) && relation_object.new_record? + Gitlab::ImportExport::Base::RelationObjectSaver.new( + relation_object: relation_object, + relation_key: relation_key, + relation_definition: relation_definition, + importable: @importable + ).execute + else + import_failure_service.with_retry(action: 'relation_object.save!', relation_key: relation_key, relation_index: relation_index) do + relation_object.save! + end + end + + log_relation_creation(@importable, relation_key, relation_object) + end + def import_failure_service @import_failure_service ||= ImportFailureService.new(@importable) end diff --git a/lib/gitlab/import_export/json/streaming_serializer.rb b/lib/gitlab/import_export/json/streaming_serializer.rb index d893c8dfaa3..55b8c1d4531 100644 --- a/lib/gitlab/import_export/json/streaming_serializer.rb +++ b/lib/gitlab/import_export/json/streaming_serializer.rb @@ -166,8 +166,6 @@ module Gitlab end def read_from_replica_if_available(&block) - return yield unless ::Feature.enabled?(:load_balancing_for_export_workers, type: :development, default_enabled: :yaml) - ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries(&block) end end diff --git a/lib/gitlab/import_export/project/import_export.yml b/lib/gitlab/import_export/project/import_export.yml index 059f6bd42e3..fc05cc1a79c 100644 --- a/lib/gitlab/import_export/project/import_export.yml +++ b/lib/gitlab/import_export/project/import_export.yml @@ -370,6 +370,7 @@ included_attributes: - :name - :email events: + - :project_id - :target_type - :action - :author_id |