diff options
Diffstat (limited to 'lib/bulk_imports/common/extractors/ndjson_extractor.rb')
-rw-r--r-- | lib/bulk_imports/common/extractors/ndjson_extractor.rb | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/lib/bulk_imports/common/extractors/ndjson_extractor.rb b/lib/bulk_imports/common/extractors/ndjson_extractor.rb new file mode 100644 index 00000000000..79d626001a0 --- /dev/null +++ b/lib/bulk_imports/common/extractors/ndjson_extractor.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +module BulkImports + module Common + module Extractors + class NdjsonExtractor + include Gitlab::ImportExport::CommandLineUtil + include Gitlab::Utils::StrongMemoize + + EXPORT_DOWNLOAD_URL_PATH = "/%{resource}/%{full_path}/export_relations/download?relation=%{relation}" + + def initialize(relation:) + @relation = relation + @tmp_dir = Dir.mktmpdir + end + + def extract(context) + download_service(tmp_dir, context).execute + decompression_service(tmp_dir).execute + relations = ndjson_reader(tmp_dir).consume_relation('', relation) + + BulkImports::Pipeline::ExtractedData.new(data: relations) + end + + def remove_tmp_dir + FileUtils.remove_entry(tmp_dir) + end + + private + + attr_reader :relation, :tmp_dir + + def filename + @filename ||= "#{relation}.ndjson.gz" + end + + def download_service(tmp_dir, context) + @download_service ||= BulkImports::FileDownloadService.new( + configuration: context.configuration, + relative_url: relative_resource_url(context), + dir: tmp_dir, + filename: filename + ) + end + + def decompression_service(tmp_dir) + @decompression_service ||= BulkImports::FileDecompressionService.new( + dir: tmp_dir, + filename: filename + ) + end + + def ndjson_reader(tmp_dir) + @ndjson_reader ||= Gitlab::ImportExport::Json::NdjsonReader.new(tmp_dir) + end + + def relative_resource_url(context) + strong_memoize(:relative_resource_url) do + resource = context.portable.class.name.downcase.pluralize + encoded_full_path = context.entity.encoded_source_full_path + + EXPORT_DOWNLOAD_URL_PATH % { resource: resource, full_path: encoded_full_path, relation: relation } + end + end + end + end + end +end |