From 0eeadb2dd2cf20ab1c12f1b0e86fa1227e044b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Thu, 5 Sep 2019 15:51:27 +0200 Subject: Normalize import_export structure This brings a significant refactor to how we handle `import_export.yml`, merge it with EE and how we handle that for reader and saver. This is meant to simplify the code, and remove a ton of conditions to handle different models of the structure. This is also meant to prepare the structure to extend it much easier, like adding `preload:` or additional object types when needed. This does not change the behavior of import/export, rather unifies and simplifies the current implementation. --- lib/gitlab/import_export/attributes_finder.rb | 54 ++++--- lib/gitlab/import_export/config.rb | 81 ++++------- lib/gitlab/import_export/import_export.yml | 170 +++++++++++----------- lib/gitlab/import_export/json_hash_builder.rb | 117 --------------- lib/gitlab/import_export/project_tree_restorer.rb | 76 +++++----- lib/gitlab/import_export/project_tree_saver.rb | 21 +-- lib/gitlab/import_export/reader.rb | 30 +--- 7 files changed, 198 insertions(+), 351 deletions(-) delete mode 100644 lib/gitlab/import_export/json_hash_builder.rb (limited to 'lib') diff --git a/lib/gitlab/import_export/attributes_finder.rb b/lib/gitlab/import_export/attributes_finder.rb index 42cd94add79..13883ca7f3d 100644 --- a/lib/gitlab/import_export/attributes_finder.rb +++ b/lib/gitlab/import_export/attributes_finder.rb @@ -3,35 +3,19 @@ module Gitlab module ImportExport class AttributesFinder - def initialize(included_attributes:, excluded_attributes:, methods:) - @included_attributes = included_attributes || {} - @excluded_attributes = excluded_attributes || {} - @methods = methods || {} + def initialize(config:) + @tree = config[:tree] || {} + @included_attributes = config[:included_attributes] || {} + @excluded_attributes = config[:excluded_attributes] || {} + @methods = config[:methods] || {} end - def find(model_object) - parsed_hash = find_attributes_only(model_object) - parsed_hash.empty? ? model_object : { model_object => parsed_hash } + def find_root(model_key) + find(model_key, @tree[model_key]) end - def parse(model_object) - parsed_hash = find_attributes_only(model_object) - yield parsed_hash unless parsed_hash.empty? - end - - def find_included(value) - key = key_from_hash(value) - @included_attributes[key].nil? ? {} : { only: @included_attributes[key] } - end - - def find_excluded(value) - key = key_from_hash(value) - @excluded_attributes[key].nil? ? {} : { except: @excluded_attributes[key] } - end - - def find_method(value) - key = key_from_hash(value) - @methods[key].nil? ? {} : { methods: @methods[key] } + def find_relations_tree(model_key) + @tree[model_key] end def find_excluded_keys(klass_name) @@ -40,12 +24,24 @@ module Gitlab private - def find_attributes_only(value) - find_included(value).merge(find_excluded(value)).merge(find_method(value)) + def find(model_key, model_tree) + { + only: @included_attributes[model_key], + except: @excluded_attributes[model_key], + methods: @methods[model_key], + include: resolve_model_tree(model_tree) + }.compact + end + + def resolve_model_tree(model_tree) + return unless model_tree + + model_tree + .map(&method(:resolve_model)) end - def key_from_hash(value) - value.is_a?(Hash) ? value.first.first : value + def resolve_model(model_key, model_tree) + { model_key => find(model_key, model_tree) } end end end diff --git a/lib/gitlab/import_export/config.rb b/lib/gitlab/import_export/config.rb index f6cd4eb5e0c..6f4919ead4e 100644 --- a/lib/gitlab/import_export/config.rb +++ b/lib/gitlab/import_export/config.rb @@ -3,70 +3,49 @@ module Gitlab module ImportExport class Config + def initialize + @hash = parse_yaml + @hash.deep_symbolize_keys! + @ee_hash = @hash.delete(:ee) || {} + + @hash[:tree] = normalize_tree(@hash[:tree]) + @ee_hash[:tree] = normalize_tree(@ee_hash[:tree] || {}) + end + # Returns a Hash of the YAML file, including EE specific data if EE is # used. def to_h - hash = parse_yaml - ee_hash = hash['ee'] - - if merge? && ee_hash - ee_hash.each do |key, value| - if key == 'project_tree' - merge_project_tree(value, hash[key]) - else - merge_attributes_list(value, hash[key]) - end - end + if merge_ee? + deep_merge(@hash, @ee_hash) + else + @hash end - - # We don't want to expose this section after this point, as it is no - # longer needed. - hash.delete('ee') - - hash end - # Merges a project relationships tree into the target tree. - # - # @param [Array] source_values - # @param [Array] target_values - def merge_project_tree(source_values, target_values) - source_values.each do |value| - if value.is_a?(Hash) - # Examples: - # - # { 'project_tree' => [{ 'labels' => [...] }] } - # { 'notes' => [:author, { 'events' => [:push_event_payload] }] } - value.each do |key, val| - target = target_values - .find { |h| h.is_a?(Hash) && h[key] } + private - if target - merge_project_tree(val, target[key]) - else - target_values << { key => val.dup } - end - end - else - # Example: :priorities, :author, etc - target_values << value - end + def deep_merge(hash_a, hash_b) + hash_a.deep_merge(hash_b) do |_, this_val, other_val| + this_val.to_a + other_val.to_a end end - # Merges a Hash containing a flat list of attributes, such as the entries - # in a `excluded_attributes` section. - # - # @param [Hash] source_values - # @param [Hash] target_values - def merge_attributes_list(source_values, target_values) - source_values.each do |key, values| - target_values[key] ||= [] - target_values[key].concat(values) + def normalize_tree(item) + case item + when Array + item.reduce({}) do |hash, subitem| + hash.merge!(normalize_tree(subitem)) + end + when Hash + item.transform_values(&method(:normalize_tree)) + when Symbol + { item => {} } + else + raise ArgumentError, "#{item} needs to be Array, Hash, Symbol or NilClass" end end - def merge? + def merge_ee? Gitlab.ee? end diff --git a/lib/gitlab/import_export/import_export.yml b/lib/gitlab/import_export/import_export.yml index 40acb24d191..06c94beead8 100644 --- a/lib/gitlab/import_export/import_export.yml +++ b/lib/gitlab/import_export/import_export.yml @@ -3,89 +3,92 @@ # This list _must_ only contain relationships that are available to both CE and # EE. EE specific relationships must be defined in the `ee` section further # down below. -project_tree: - - labels: - - :priorities - - milestones: - - events: - - :push_event_payload - - issues: - - events: - - :push_event_payload - - :timelogs - - notes: - - :author - - events: - - :push_event_payload - - label_links: - - label: - - :priorities - - milestone: - - events: - - :push_event_payload - - resource_label_events: - - label: - - :priorities - - :issue_assignees - - snippets: - - :award_emoji - - notes: - - :author - - releases: - - :links - - project_members: - - :user - - merge_requests: - - :metrics - - notes: - - :author +tree: + project: + - labels: + - :priorities + - milestones: - events: - :push_event_payload - - :suggestions - - merge_request_diff: - - :merge_request_diff_commits - - :merge_request_diff_files - - events: - - :push_event_payload - - :timelogs - - label_links: - - label: - - :priorities - - milestone: + - issues: - events: - :push_event_payload - - resource_label_events: - - label: - - :priorities - - ci_pipelines: - - notes: - - :author + - :timelogs + - notes: + - :author + - events: + - :push_event_payload + - label_links: + - label: + - :priorities + - milestone: + - events: + - :push_event_payload + - resource_label_events: + - label: + - :priorities + - :issue_assignees + - snippets: + - :award_emoji + - notes: + - :author + - releases: + - :links + - project_members: + - :user + - merge_requests: + - :metrics + - notes: + - :author + - events: + - :push_event_payload + - :suggestions + - merge_request_diff: + - :merge_request_diff_commits + - :merge_request_diff_files - events: - :push_event_payload - - stages: - - :statuses - - :external_pull_request - - :external_pull_requests - - :auto_devops - - :triggers - - :pipeline_schedules - - :services - - protected_branches: - - :merge_access_levels - - :push_access_levels - - protected_tags: - - :create_access_levels - - :project_feature - - :custom_attributes - - :prometheus_metrics - - :project_badges - - :ci_cd_settings - - :error_tracking_setting - - :metrics_setting - - boards: - - lists: - - label: - - :priorities + - :timelogs + - label_links: + - label: + - :priorities + - milestone: + - events: + - :push_event_payload + - resource_label_events: + - label: + - :priorities + - ci_pipelines: + - notes: + - :author + - events: + - :push_event_payload + - stages: + - :statuses + - :external_pull_request + - :external_pull_requests + - :auto_devops + - :triggers + - :pipeline_schedules + - :services + - protected_branches: + - :merge_access_levels + - :push_access_levels + - protected_tags: + - :create_access_levels + - :project_feature + - :custom_attributes + - :prometheus_metrics + - :project_badges + - :ci_cd_settings + - :error_tracking_setting + - :metrics_setting + - boards: + - lists: + - label: + - :priorities + group_members: + - :user # Only include the following attributes for the models specified. included_attributes: @@ -225,12 +228,15 @@ methods: - :type lists: - :list_type + ci_pipelines: + - :notes # EE specific relationships and settings to include. All of this will be merged # into the previous structures if EE is used. ee: - project_tree: - - protected_branches: - - :unprotect_access_levels - - protected_environments: - - :deploy_access_levels + tree: + project: + protected_branches: + - :unprotect_access_levels + protected_environments: + - :deploy_access_levels diff --git a/lib/gitlab/import_export/json_hash_builder.rb b/lib/gitlab/import_export/json_hash_builder.rb deleted file mode 100644 index a92e3862361..00000000000 --- a/lib/gitlab/import_export/json_hash_builder.rb +++ /dev/null @@ -1,117 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module ImportExport - # Generates a hash that conforms with http://apidock.com/rails/Hash/to_json - # and its peculiar options. - class JsonHashBuilder - def self.build(model_objects, attributes_finder) - new(model_objects, attributes_finder).build - end - - def initialize(model_objects, attributes_finder) - @model_objects = model_objects - @attributes_finder = attributes_finder - end - - def build - process_model_objects(@model_objects) - end - - private - - # Called when the model is actually a hash containing other relations (more models) - # Returns the config in the right format for calling +to_json+ - # - # +model_object_hash+ - A model relationship such as: - # {:merge_requests=>[:merge_request_diff, :notes]} - def process_model_objects(model_object_hash) - json_config_hash = {} - current_key = model_object_hash.first.first - - model_object_hash.values.flatten.each do |model_object| - @attributes_finder.parse(current_key) { |hash| json_config_hash[current_key] ||= hash } - handle_model_object(current_key, model_object, json_config_hash) - end - - json_config_hash - end - - # Creates or adds to an existing hash an individual model or list - # - # +current_key+ main model that will be a key in the hash - # +model_object+ model or list of models to include in the hash - # +json_config_hash+ the original hash containing the root model - def handle_model_object(current_key, model_object, json_config_hash) - model_or_sub_model = model_object.is_a?(Hash) ? process_model_objects(model_object) : model_object - - if json_config_hash[current_key] - add_model_value(current_key, model_or_sub_model, json_config_hash) - else - create_model_value(current_key, model_or_sub_model, json_config_hash) - end - end - - # Constructs a new hash that will hold the configuration for that particular object - # It may include exceptions or other attribute detail configuration, parsed by +@attributes_finder+ - # - # +current_key+ main model that will be a key in the hash - # +value+ existing model to be included in the hash - # +json_config_hash+ the original hash containing the root model - def create_model_value(current_key, value, json_config_hash) - json_config_hash[current_key] = parse_hash(value) || { include: value } - end - - # Calls attributes finder to parse the hash and add any attributes to it - # - # +value+ existing model to be included in the hash - # +parsed_hash+ the original hash - def parse_hash(value) - return if already_contains_methods?(value) - - @attributes_finder.parse(value) do |hash| - { include: hash_or_merge(value, hash) } - end - end - - def already_contains_methods?(value) - value.is_a?(Hash) && value.values.detect { |val| val[:methods]} - end - - # Adds new model configuration to an existing hash with key +current_key+ - # It may include exceptions or other attribute detail configuration, parsed by +@attributes_finder+ - # - # +current_key+ main model that will be a key in the hash - # +value+ existing model to be included in the hash - # +json_config_hash+ the original hash containing the root model - def add_model_value(current_key, value, json_config_hash) - @attributes_finder.parse(value) do |hash| - value = { value => hash } unless value.is_a?(Hash) - end - - add_to_array(current_key, json_config_hash, value) - end - - # Adds new model configuration to an existing hash with key +current_key+ - # it creates a new array if it was previously a single value - # - # +current_key+ main model that will be a key in the hash - # +value+ existing model to be included in the hash - # +json_config_hash+ the original hash containing the root model - def add_to_array(current_key, json_config_hash, value) - old_values = json_config_hash[current_key][:include] - - json_config_hash[current_key][:include] = ([old_values] + [value]).compact.flatten - end - - # Construct a new hash or merge with an existing one a model configuration - # This is to fulfil +to_json+ requirements. - # - # +hash+ hash containing configuration generated mainly from +@attributes_finder+ - # +value+ existing model to be included in the hash - def hash_or_merge(value, hash) - value.is_a?(Hash) ? value.merge(hash) : { value => hash } - end - end - end -end diff --git a/lib/gitlab/import_export/project_tree_restorer.rb b/lib/gitlab/import_export/project_tree_restorer.rb index 685cf53f27f..2dd18616cd6 100644 --- a/lib/gitlab/import_export/project_tree_restorer.rb +++ b/lib/gitlab/import_export/project_tree_restorer.rb @@ -58,11 +58,13 @@ module Gitlab # the configuration yaml file too. # Finally, it updates each attribute in the newly imported project. def create_relations - default_relation_list.each do |relation| - if relation.is_a?(Hash) - create_sub_relations(relation, @tree_hash) - elsif @tree_hash[relation.to_s].present? - save_relation_hash(@tree_hash[relation.to_s], relation) + project_relations_without_project_members.each do |relation_key, relation_definition| + relation_key_s = relation_key.to_s + + if relation_definition.present? + create_sub_relations(relation_key_s, relation_definition, @tree_hash) + elsif @tree_hash[relation_key_s].present? + save_relation_hash(relation_key_s, @tree_hash[relation_key_s]) end end @@ -71,7 +73,7 @@ module Gitlab @saved end - def save_relation_hash(relation_hash_batch, relation_key) + def save_relation_hash(relation_key, relation_hash_batch) relation_hash = create_relation(relation_key, relation_hash_batch) remove_group_models(relation_hash) if relation_hash.is_a?(Array) @@ -91,10 +93,13 @@ module Gitlab end end - def default_relation_list - reader.tree.reject do |model| - model.is_a?(Hash) && model[:project_members] - end + def project_relations_without_project_members + # We remove `project_members` as they are deserialized separately + project_relations.except(:project_members) + end + + def project_relations + reader.attributes_finder.find_relations_tree(:project) end def restore_project @@ -150,8 +155,7 @@ module Gitlab # issue, finds any subrelations such as notes, creates them and assign them back to the hash # # Recursively calls this method if the sub-relation is a hash containing more sub-relations - def create_sub_relations(relation, tree_hash, save: true) - relation_key = relation.keys.first.to_s + def create_sub_relations(relation_key, relation_definition, tree_hash, save: true) return if tree_hash[relation_key].blank? tree_array = [tree_hash[relation_key]].flatten @@ -171,13 +175,13 @@ module Gitlab # But we can't have it in the upper level or GC won't get rid of the AR objects # after we save the batch. Project.transaction do - process_sub_relation(relation, relation_item) + process_sub_relation(relation_key, relation_definition, relation_item) # For every subrelation that hangs from Project, save the associated records altogether # This effectively batches all records per subrelation item, only keeping those in memory # We have to keep in mind that more batch granularity << Memory, but >> Slowness if save - save_relation_hash([relation_item], relation_key) + save_relation_hash(relation_key, [relation_item]) tree_hash[relation_key].delete(relation_item) end end @@ -186,37 +190,35 @@ module Gitlab tree_hash.delete(relation_key) if save end - def process_sub_relation(relation, relation_item) - relation.values.flatten.each do |sub_relation| + def process_sub_relation(relation_key, relation_definition, relation_item) + relation_definition.each do |sub_relation_key, sub_relation_definition| # We just use author to get the user ID, do not attempt to create an instance. - next if sub_relation == :author + next if sub_relation_key == :author - create_sub_relations(sub_relation, relation_item, save: false) if sub_relation.is_a?(Hash) + sub_relation_key_s = sub_relation_key.to_s - relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation) - relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank? - end - end + # create dependent relations if present + if sub_relation_definition.present? + create_sub_relations(sub_relation_key_s, sub_relation_definition, relation_item, save: false) + end - def assign_relation_hash(relation_item, sub_relation) - if sub_relation.is_a?(Hash) - relation_hash = relation_item[sub_relation.keys.first.to_s] - sub_relation = sub_relation.keys.first - else - relation_hash = relation_item[sub_relation.to_s] + # transform relation hash to actual object + sub_relation_hash = relation_item[sub_relation_key_s] + if sub_relation_hash.present? + relation_item[sub_relation_key_s] = create_relation(sub_relation_key, sub_relation_hash) + end end - - [relation_hash, sub_relation] end - def create_relation(relation, relation_hash_list) + def create_relation(relation_key, relation_hash_list) relation_array = [relation_hash_list].flatten.map do |relation_hash| - Gitlab::ImportExport::RelationFactory.create(relation_sym: relation.to_sym, - relation_hash: relation_hash, - members_mapper: members_mapper, - user: @user, - project: @restored_project, - excluded_keys: excluded_keys_for_relation(relation)) + Gitlab::ImportExport::RelationFactory.create( + relation_sym: relation_key.to_sym, + relation_hash: relation_hash, + members_mapper: members_mapper, + user: @user, + project: @restored_project, + excluded_keys: excluded_keys_for_relation(relation_key)) end.compact relation_hash_list.is_a?(Array) ? relation_array : relation_array.first diff --git a/lib/gitlab/import_export/project_tree_saver.rb b/lib/gitlab/import_export/project_tree_saver.rb index 2255635acdf..f1b3db6b208 100644 --- a/lib/gitlab/import_export/project_tree_saver.rb +++ b/lib/gitlab/import_export/project_tree_saver.rb @@ -18,7 +18,10 @@ module Gitlab def save mkdir_p(@shared.export_path) - File.write(full_path, project_json_tree) + project_tree = serialize_project_tree + fix_project_tree(project_tree) + File.write(full_path, project_tree.to_json) + true rescue => e @shared.error(e) @@ -27,27 +30,25 @@ module Gitlab private - def project_json_tree + def fix_project_tree(project_tree) if @params[:description].present? - project_json['description'] = @params[:description] + project_tree['description'] = @params[:description] end - project_json['project_members'] += group_members_json - - RelationRenameService.add_new_associations(project_json) + project_tree['project_members'] += group_members_array - project_json.to_json + RelationRenameService.add_new_associations(project_tree) end - def project_json - @project_json ||= @project.as_json(reader.project_tree) + def serialize_project_tree + @project.as_json(reader.project_tree) end def reader @reader ||= Gitlab::ImportExport::Reader.new(shared: @shared) end - def group_members_json + def group_members_array group_members.as_json(reader.group_members_tree).each do |group_member| group_member['source_type'] = 'Project' # Make group members project members of the future import end diff --git a/lib/gitlab/import_export/reader.rb b/lib/gitlab/import_export/reader.rb index 8bdf6ca491d..9e81c6a3d07 100644 --- a/lib/gitlab/import_export/reader.rb +++ b/lib/gitlab/import_export/reader.rb @@ -7,42 +7,22 @@ module Gitlab def initialize(shared:) @shared = shared - config_hash = ImportExport::Config.new.to_h.deep_symbolize_keys - @tree = config_hash[:project_tree] - @attributes_finder = Gitlab::ImportExport::AttributesFinder.new(included_attributes: config_hash[:included_attributes], - excluded_attributes: config_hash[:excluded_attributes], - methods: config_hash[:methods]) + + @attributes_finder = Gitlab::ImportExport::AttributesFinder.new( + config: ImportExport::Config.new.to_h) end # Outputs a hash in the format described here: http://api.rubyonrails.org/classes/ActiveModel/Serializers/JSON.html # for outputting a project in JSON format, including its relations and sub relations. def project_tree - attributes = @attributes_finder.find(:project) - project_attributes = attributes.is_a?(Hash) ? attributes[:project] : {} - - project_attributes.merge(include: build_hash(@tree)) + attributes_finder.find_root(:project) rescue => e @shared.error(e) false end def group_members_tree - @attributes_finder.find_included(:project_members).merge(include: @attributes_finder.find(:user)) - end - - private - - # Builds a hash in the format described here: http://api.rubyonrails.org/classes/ActiveModel/Serializers/JSON.html - # - # +model_list+ - List of models as a relation tree to be included in the generated JSON, from the _import_export.yml_ file - def build_hash(model_list) - model_list.map do |model_objects| - if model_objects.is_a?(Hash) - Gitlab::ImportExport::JsonHashBuilder.build(model_objects, @attributes_finder) - else - @attributes_finder.find(model_objects) - end - end + attributes_finder.find_root(:group_members) end end end -- cgit v1.2.3