diff options
author | Kamil TrzciĆski <ayufan@ayufan.eu> | 2019-09-09 18:40:49 +0300 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2019-09-09 18:40:49 +0300 |
commit | 0e56c1e7cb3e1bbf3e81ab9907a26d385e28022c (patch) | |
tree | 4022cd2fe891d64eb34ceb5537467737a4054538 /spec/lib | |
parent | 383f363589ac405cce07d3b54e796f9c949d2ffb (diff) |
Improve performance and memory usage of project export
ActiveModel::Serialization is simple in that it recursively calls
`as_json` on each object to serialize everything. However, for a model
like a Project, this can generate a query for every single association,
which can add up to tens of thousands of queries and lead to memory
bloat.
To improve this, we can do several things:
1. We use `tree:` and `preload:` to automatically generate
a list of all preloads that could be used to serialize
objects in bulk.
2. We observe that a single project has many issues, merge requests,
etc. Instead of serializing everything at once, which could lead to
database timeouts and high memory usage, we take each top-level
association and serialize the data in batches.
For example, we serialize the first 100 issues and preload all of
their associated events, notes, etc. before moving onto the next
batch. When we're done, we serialize merge requests in the same way.
We repeat this pattern for the remaining associations specified in
import_export.yml.
Diffstat (limited to 'spec/lib')
4 files changed, 395 insertions, 17 deletions
diff --git a/spec/lib/gitlab/import_export/attributes_finder_spec.rb b/spec/lib/gitlab/import_export/attributes_finder_spec.rb index 208b60844e3..3cbc1375d6e 100644 --- a/spec/lib/gitlab/import_export/attributes_finder_spec.rb +++ b/spec/lib/gitlab/import_export/attributes_finder_spec.rb @@ -20,20 +20,41 @@ describe Gitlab::ImportExport::AttributesFinder do except: [:iid], include: [ { merge_request_diff: { - include: [] + include: [], + preload: { source_project: nil } } }, { merge_request_test: { include: [] } } ], - only: [:id] + only: [:id], + preload: { + merge_request_diff: { source_project: nil }, + merge_request_test: nil + } } }, { commit_statuses: { - include: [{ commit: { include: [] } }] + include: [{ commit: { include: [] } }], + preload: { commit: nil } } }, { project_members: { include: [{ user: { include: [], - only: [:email] } }] + only: [:email] } }], + preload: { user: nil } } } - ] + ], + preload: { + commit_statuses: { + commit: nil + }, + issues: nil, + labels: nil, + merge_requests: { + merge_request_diff: { source_project: nil }, + merge_request_test: nil + }, + project_members: { + user: nil + } + } } end @@ -50,7 +71,8 @@ describe Gitlab::ImportExport::AttributesFinder do setup_yaml(tree: { project: [:issues] }) is_expected.to match( - include: [{ issues: { include: [] } }] + include: [{ issues: { include: [] } }], + preload: { issues: nil } ) end @@ -58,7 +80,8 @@ describe Gitlab::ImportExport::AttributesFinder do setup_yaml(tree: { project: [:project_feature] }) is_expected.to match( - include: [{ project_feature: { include: [] } }] + include: [{ project_feature: { include: [] } }], + preload: { project_feature: nil } ) end @@ -67,7 +90,8 @@ describe Gitlab::ImportExport::AttributesFinder do is_expected.to match( include: [{ issues: { include: [] } }, - { snippets: { include: [] } }] + { snippets: { include: [] } }], + preload: { issues: nil, snippets: nil } ) end @@ -75,7 +99,9 @@ describe Gitlab::ImportExport::AttributesFinder do setup_yaml(tree: { project: [issues: [:notes]] }) is_expected.to match( - include: [{ issues: { include: [{ notes: { include: [] } }] } }] + include: [{ issues: { include: [{ notes: { include: [] } }], + preload: { notes: nil } } }], + preload: { issues: { notes: nil } } ) end @@ -85,7 +111,9 @@ describe Gitlab::ImportExport::AttributesFinder do is_expected.to match( include: [{ merge_requests: { include: [{ notes: { include: [] } }, - { merge_request_diff: { include: [] } }] } }] + { merge_request_diff: { include: [] } }], + preload: { merge_request_diff: nil, notes: nil } } }], + preload: { merge_requests: { merge_request_diff: nil, notes: nil } } ) end @@ -94,8 +122,11 @@ describe Gitlab::ImportExport::AttributesFinder do is_expected.to match( include: [{ merge_requests: { - include: [{ notes: { include: [{ author: { include: [] } }] } }] - } }] + include: [{ notes: { include: [{ author: { include: [] } }], + preload: { author: nil } } }], + preload: { notes: { author: nil } } + } }], + preload: { merge_requests: { notes: { author: nil } } } ) end @@ -105,7 +136,8 @@ describe Gitlab::ImportExport::AttributesFinder do is_expected.to match( include: [{ issues: { include: [], - only: [:name, :description] } }] + only: [:name, :description] } }], + preload: { issues: nil } ) end @@ -115,7 +147,8 @@ describe Gitlab::ImportExport::AttributesFinder do is_expected.to match( include: [{ issues: { except: [:name], - include: [] } }] + include: [] } }], + preload: { issues: nil } ) end @@ -127,7 +160,8 @@ describe Gitlab::ImportExport::AttributesFinder do is_expected.to match( include: [{ issues: { except: [:name], include: [], - only: [:description] } }] + only: [:description] } }], + preload: { issues: nil } ) end @@ -137,7 +171,8 @@ describe Gitlab::ImportExport::AttributesFinder do is_expected.to match( include: [{ issues: { include: [], - methods: [:name] } }] + methods: [:name] } }], + preload: { issues: nil } ) end diff --git a/spec/lib/gitlab/import_export/config_spec.rb b/spec/lib/gitlab/import_export/config_spec.rb index e53db37def4..f09a29b84db 100644 --- a/spec/lib/gitlab/import_export/config_spec.rb +++ b/spec/lib/gitlab/import_export/config_spec.rb @@ -25,7 +25,7 @@ describe Gitlab::ImportExport::Config do expect { subject }.not_to raise_error expect(subject).to be_a(Hash) expect(subject.keys).to contain_exactly( - :tree, :excluded_attributes, :included_attributes, :methods) + :tree, :excluded_attributes, :included_attributes, :methods, :preloads) end end end @@ -55,6 +55,10 @@ describe Gitlab::ImportExport::Config do events: - :action + preloads: + statuses: + project: + ee: tree: project: @@ -71,6 +75,9 @@ describe Gitlab::ImportExport::Config do - :type_ee events_ee: - :action_ee + preloads: + statuses: + bridge_ee: EOF end @@ -111,6 +118,11 @@ describe Gitlab::ImportExport::Config do methods: { labels: [:type], events: [:action] + }, + preloads: { + statuses: { + project: nil + } } } ) @@ -150,6 +162,12 @@ describe Gitlab::ImportExport::Config do labels: [:type, :type_ee], events: [:action], events_ee: [:action_ee] + }, + preloads: { + statuses: { + project: nil, + bridge_ee: nil + } } } ) diff --git a/spec/lib/gitlab/import_export/fast_hash_serializer_spec.rb b/spec/lib/gitlab/import_export/fast_hash_serializer_spec.rb new file mode 100644 index 00000000000..d23b27c9d8e --- /dev/null +++ b/spec/lib/gitlab/import_export/fast_hash_serializer_spec.rb @@ -0,0 +1,272 @@ +require 'spec_helper' + +describe Gitlab::ImportExport::FastHashSerializer do + subject { described_class.new(project, tree).execute } + + let!(:project) { setup_project } + let(:user) { create(:user) } + let(:shared) { project.import_export_shared } + let(:reader) { Gitlab::ImportExport::Reader.new(shared: shared) } + let(:tree) { reader.project_tree } + + before do + project.add_maintainer(user) + allow_any_instance_of(MergeRequest).to receive(:source_branch_sha).and_return('ABCD') + allow_any_instance_of(MergeRequest).to receive(:target_branch_sha).and_return('DCBA') + end + + it 'saves the correct hash' do + is_expected.to include({ 'description' => 'description', 'visibility_level' => 20 }) + end + + it 'has approvals_before_merge set' do + expect(subject['approvals_before_merge']).to eq(1) + end + + it 'has milestones' do + expect(subject['milestones']).not_to be_empty + end + + it 'has merge requests' do + expect(subject['merge_requests']).not_to be_empty + end + + it 'has merge request\'s milestones' do + expect(subject['merge_requests'].first['milestone']).not_to be_empty + end + + it 'has merge request\'s source branch SHA' do + expect(subject['merge_requests'].first['source_branch_sha']).to eq('ABCD') + end + + it 'has merge request\'s target branch SHA' do + expect(subject['merge_requests'].first['target_branch_sha']).to eq('DCBA') + end + + it 'has events' do + expect(subject['merge_requests'].first['milestone']['events']).not_to be_empty + end + + it 'has snippets' do + expect(subject['snippets']).not_to be_empty + end + + it 'has snippet notes' do + expect(subject['snippets'].first['notes']).not_to be_empty + end + + it 'has releases' do + expect(subject['releases']).not_to be_empty + end + + it 'has no author on releases' do + expect(subject['releases'].first['author']).to be_nil + end + + it 'has the author ID on releases' do + expect(subject['releases'].first['author_id']).not_to be_nil + end + + it 'has issues' do + expect(subject['issues']).not_to be_empty + end + + it 'has issue comments' do + notes = subject['issues'].first['notes'] + + expect(notes).not_to be_empty + expect(notes.first['type']).to eq('DiscussionNote') + end + + it 'has issue assignees' do + expect(subject['issues'].first['issue_assignees']).not_to be_empty + end + + it 'has author on issue comments' do + expect(subject['issues'].first['notes'].first['author']).not_to be_empty + end + + it 'has project members' do + expect(subject['project_members']).not_to be_empty + end + + it 'has merge requests diffs' do + expect(subject['merge_requests'].first['merge_request_diff']).not_to be_empty + end + + it 'has merge request diff files' do + expect(subject['merge_requests'].first['merge_request_diff']['merge_request_diff_files']).not_to be_empty + end + + it 'has merge request diff commits' do + expect(subject['merge_requests'].first['merge_request_diff']['merge_request_diff_commits']).not_to be_empty + end + + it 'has merge requests comments' do + expect(subject['merge_requests'].first['notes']).not_to be_empty + end + + it 'has author on merge requests comments' do + expect(subject['merge_requests'].first['notes'].first['author']).not_to be_empty + end + + it 'has pipeline stages' do + expect(subject.dig('ci_pipelines', 0, 'stages')).not_to be_empty + end + + it 'has pipeline statuses' do + expect(subject.dig('ci_pipelines', 0, 'stages', 0, 'statuses')).not_to be_empty + end + + it 'has pipeline builds' do + builds_count = subject + .dig('ci_pipelines', 0, 'stages', 0, 'statuses') + .count { |hash| hash['type'] == 'Ci::Build' } + + expect(builds_count).to eq(1) + end + + it 'has no when YML attributes but only the DB column' do + allow_any_instance_of(Ci::Pipeline) + .to receive(:ci_yaml_file) + .and_return(File.read(Rails.root.join('spec/support/gitlab_stubs/gitlab_ci.yml'))) + + expect_any_instance_of(Gitlab::Ci::YamlProcessor).not_to receive(:build_attributes) + + subject + end + + it 'has pipeline commits' do + expect(subject['ci_pipelines']).not_to be_empty + end + + it 'has ci pipeline notes' do + expect(subject['ci_pipelines'].first['notes']).not_to be_empty + end + + it 'has labels with no associations' do + expect(subject['labels']).not_to be_empty + end + + it 'has labels associated to records' do + expect(subject['issues'].first['label_links'].first['label']).not_to be_empty + end + + it 'has project and group labels' do + label_types = subject['issues'].first['label_links'].map { |link| link['label']['type'] } + + expect(label_types).to match_array(%w(ProjectLabel GroupLabel)) + end + + it 'has priorities associated to labels' do + priorities = subject['issues'].first['label_links'].flat_map { |link| link['label']['priorities'] } + + expect(priorities).not_to be_empty + end + + it 'has issue resource label events' do + expect(subject['issues'].first['resource_label_events']).not_to be_empty + end + + it 'has merge request resource label events' do + expect(subject['merge_requests'].first['resource_label_events']).not_to be_empty + end + + it 'saves the correct service type' do + expect(subject['services'].first['type']).to eq('CustomIssueTrackerService') + end + + it 'saves the properties for a service' do + expect(subject['services'].first['properties']).to eq('one' => 'value') + end + + it 'has project feature' do + project_feature = subject['project_feature'] + expect(project_feature).not_to be_empty + expect(project_feature["issues_access_level"]).to eq(ProjectFeature::DISABLED) + expect(project_feature["wiki_access_level"]).to eq(ProjectFeature::ENABLED) + expect(project_feature["builds_access_level"]).to eq(ProjectFeature::PRIVATE) + end + + it 'has custom attributes' do + expect(subject['custom_attributes'].count).to eq(2) + end + + it 'has badges' do + expect(subject['project_badges'].count).to eq(2) + end + + it 'does not complain about non UTF-8 characters in MR diff files' do + ActiveRecord::Base.connection.execute("UPDATE merge_request_diff_files SET diff = '---\n- :diff: !binary |-\n LS0tIC9kZXYvbnVsbAorKysgYi9pbWFnZXMvbnVjb3IucGRmCkBAIC0wLDAg\n KzEsMTY3OSBAQAorJVBERi0xLjUNJeLjz9MNCisxIDAgb2JqDTw8L01ldGFk\n YXR'") + + expect(subject['merge_requests'].first['merge_request_diff']).not_to be_empty + end + + context 'project attributes' do + it 'does not contain the runners token' do + expect(subject).not_to include("runners_token" => 'token') + end + end + + it 'has a board and a list' do + expect(subject['boards'].first['lists']).not_to be_empty + end + + def setup_project + issue = create(:issue, assignees: [user]) + snippet = create(:project_snippet) + release = create(:release) + group = create(:group) + + project = create(:project, + :public, + :repository, + :issues_disabled, + :wiki_enabled, + :builds_private, + description: 'description', + issues: [issue], + snippets: [snippet], + releases: [release], + group: group, + approvals_before_merge: 1 + ) + project_label = create(:label, project: project) + group_label = create(:group_label, group: group) + create(:label_link, label: project_label, target: issue) + create(:label_link, label: group_label, target: issue) + create(:label_priority, label: group_label, priority: 1) + milestone = create(:milestone, project: project) + merge_request = create(:merge_request, source_project: project, milestone: milestone) + + ci_build = create(:ci_build, project: project, when: nil) + ci_build.pipeline.update(project: project) + create(:commit_status, project: project, pipeline: ci_build.pipeline) + + create(:milestone, project: project) + create(:discussion_note, noteable: issue, project: project) + create(:note, noteable: merge_request, project: project) + create(:note, noteable: snippet, project: project) + create(:note_on_commit, + author: user, + project: project, + commit_id: ci_build.pipeline.sha) + + create(:resource_label_event, label: project_label, issue: issue) + create(:resource_label_event, label: group_label, merge_request: merge_request) + + create(:event, :created, target: milestone, project: project, author: user) + create(:service, project: project, type: 'CustomIssueTrackerService', category: 'issue_tracker', properties: { one: 'value' }) + + create(:project_custom_attribute, project: project) + create(:project_custom_attribute, project: project) + + create(:project_badge, project: project) + create(:project_badge, project: project) + + board = create(:board, project: project, name: 'TestBoard') + create(:list, board: board, position: 0, label: project_label) + + project + end +end diff --git a/spec/lib/gitlab/import_export/project_tree_saver_spec.rb b/spec/lib/gitlab/import_export/project_tree_saver_spec.rb index fefbed93316..ff46e062a5d 100644 --- a/spec/lib/gitlab/import_export/project_tree_saver_spec.rb +++ b/spec/lib/gitlab/import_export/project_tree_saver_spec.rb @@ -23,12 +23,65 @@ describe Gitlab::ImportExport::ProjectTreeSaver do expect(project_tree_saver.save).to be true end + context ':export_fast_serialize feature flag checks' do + before do + expect(Gitlab::ImportExport::Reader).to receive(:new).with(shared: shared).and_return(reader) + expect(reader).to receive(:project_tree).and_return(project_tree) + end + + let(:serializer) { instance_double('Gitlab::ImportExport::FastHashSerializer') } + let(:reader) { instance_double('Gitlab::ImportExport::Reader') } + let(:project_tree) do + { + include: [{ issues: { include: [] } }], + preload: { issues: nil } + } + end + + context 'when :export_fast_serialize feature is enabled' do + before do + stub_feature_flags(export_fast_serialize: true) + end + + it 'uses FastHashSerializer' do + expect(Gitlab::ImportExport::FastHashSerializer) + .to receive(:new) + .with(project, project_tree) + .and_return(serializer) + + expect(serializer).to receive(:execute) + + project_tree_saver.save + end + end + + context 'when :export_fast_serialize feature is disabled' do + before do + stub_feature_flags(export_fast_serialize: false) + end + + it 'is serialized via built-in `as_json`' do + expect(project).to receive(:as_json).with(project_tree) + + project_tree_saver.save + end + end + end + + # It is mostly duplicated in + # `spec/lib/gitlab/import_export/fast_hash_serializer_spec.rb` + # except: + # context 'with description override' do + # context 'group members' do + # ^ These are specific for the ProjectTreeSaver context 'JSON' do let(:saved_project_json) do project_tree_saver.save project_json(project_tree_saver.full_path) end + # It is not duplicated in + # `spec/lib/gitlab/import_export/fast_hash_serializer_spec.rb` context 'with description override' do let(:params) { { description: 'Foo Bar' } } let(:project_tree_saver) { described_class.new(project: project, current_user: user, shared: shared, params: params) } |