diff options
Diffstat (limited to 'lib/gitlab/github_import')
9 files changed, 232 insertions, 69 deletions
diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb index 80f8f8bfbe2..28a39128ec9 100644 --- a/lib/gitlab/github_import/bulk_importing.rb +++ b/lib/gitlab/github_import/bulk_importing.rb @@ -30,7 +30,7 @@ module Gitlab # Bulk inserts the given rows into the database. def bulk_insert(model, rows, batch_size: 100) rows.each_slice(batch_size) do |slice| - Gitlab::Database.main.bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert + ApplicationRecord.legacy_bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert log_and_increment_counter(slice.size, :imported) end diff --git a/lib/gitlab/github_import/importer/diff_note_importer.rb b/lib/gitlab/github_import/importer/diff_note_importer.rb index 4cfc920e2e3..0aa0896aa57 100644 --- a/lib/gitlab/github_import/importer/diff_note_importer.rb +++ b/lib/gitlab/github_import/importer/diff_note_importer.rb @@ -4,41 +4,64 @@ module Gitlab module GithubImport module Importer class DiffNoteImporter - attr_reader :note, :project, :client, :user_finder - - # note - An instance of `Gitlab::GithubImport::Representation::DiffNote`. - # project - An instance of `Project`. - # client - An instance of `Gitlab::GithubImport::Client`. + # note - An instance of `Gitlab::GithubImport::Representation::DiffNote` + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` def initialize(note, project, client) @note = note @project = project @client = client - @user_finder = GithubImport::UserFinder.new(project, client) end def execute - return unless (mr_id = find_merge_request_id) + return if merge_request_id.blank? - author_id, author_found = user_finder.author_id_for(note) + note.project = project + note.merge_request = merge_request - note_body = MarkdownText.format(note.note, note.author, author_found) + build_author_attributes - attributes = { - discussion_id: Discussion.discussion_id(note), - noteable_type: 'MergeRequest', - noteable_id: mr_id, - project_id: project.id, - author_id: author_id, - note: note_body, - system: false, - commit_id: note.original_commit_id, - line_code: note.line_code, - type: 'LegacyDiffNote', - created_at: note.created_at, - updated_at: note.updated_at, - st_diff: note.diff_hash.to_yaml - } + # Diff notes with suggestions are imported with DiffNote, which is + # slower to import than LegacyDiffNote. Importing DiffNote is slower + # because it cannot use the BulkImporting strategy, which skips + # callbacks and validations. For this reason, notes that don't have + # suggestions are still imported with LegacyDiffNote + if import_with_diff_note? + import_with_diff_note + else + import_with_legacy_diff_note + end + rescue ActiveRecord::InvalidForeignKey => e + # It's possible the project and the issue have been deleted since + # scheduling this job. In this case we'll just skip creating the note + Logger.info( + message: e.message, + github_identifiers: note.github_identifiers + ) + end + private + + attr_reader :note, :project, :client, :author_id, :author_found + + def import_with_diff_note? + note.contains_suggestion? && use_diff_note_with_suggestions_enabled? + end + + def use_diff_note_with_suggestions_enabled? + Feature.enabled?( + :github_importer_use_diff_note_with_suggestions, + default_enabled: :yaml + ) + end + + def build_author_attributes + @author_id, @author_found = user_finder.author_id_for(note) + end + + # rubocop:disable Gitlab/BulkInsert + def import_with_legacy_diff_note + log_diff_note_creation('LegacyDiffNote') # It's possible that during an import we'll insert tens of thousands # of diff notes. If we were to use the Note/LegacyDiffNote model here # we'd also have to run additional queries for both validations and @@ -47,15 +70,70 @@ module Gitlab # To work around this we're using bulk_insert with a single row. This # allows us to efficiently insert data (even if it's just 1 row) # without having to use all sorts of hacks to disable callbacks. - Gitlab::Database.main.bulk_insert(LegacyDiffNote.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert - rescue ActiveRecord::InvalidForeignKey - # It's possible the project and the issue have been deleted since - # scheduling this job. In this case we'll just skip creating the note. + ApplicationRecord.legacy_bulk_insert(LegacyDiffNote.table_name, [{ + noteable_type: note.noteable_type, + system: false, + type: 'LegacyDiffNote', + discussion_id: note.discussion_id, + noteable_id: merge_request_id, + project_id: project.id, + author_id: author_id, + note: note_body, + commit_id: note.original_commit_id, + line_code: note.line_code, + created_at: note.created_at, + updated_at: note.updated_at, + st_diff: note.diff_hash.to_yaml + }]) + end + # rubocop:enabled Gitlab/BulkInsert + + def import_with_diff_note + log_diff_note_creation('DiffNote') + + ::Import::Github::Notes::CreateService.new(project, author, { + noteable_type: note.noteable_type, + system: false, + type: 'DiffNote', + noteable_id: merge_request_id, + project_id: project.id, + note: note_body, + discussion_id: note.discussion_id, + commit_id: note.original_commit_id, + created_at: note.created_at, + updated_at: note.updated_at, + position: note.diff_position + }).execute + end + + def note_body + @note_body ||= MarkdownText.format(note.note, note.author, author_found) + end + + def author + @author ||= User.find(author_id) + end + + def merge_request + @merge_request ||= MergeRequest.find(merge_request_id) end # Returns the ID of the merge request this note belongs to. - def find_merge_request_id - GithubImport::IssuableFinder.new(project, note).database_id + def merge_request_id + @merge_request_id ||= GithubImport::IssuableFinder.new(project, note).database_id + end + + def user_finder + @user_finder ||= GithubImport::UserFinder.new(project, client) + end + + def log_diff_note_creation(model) + Logger.info( + project_id: project.id, + importer: self.class.name, + github_identifiers: note.github_identifiers, + model: model + ) end end end diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb index f8665676ccf..7f46615f17e 100644 --- a/lib/gitlab/github_import/importer/issue_importer.rb +++ b/lib/gitlab/github_import/importer/issue_importer.rb @@ -75,7 +75,7 @@ module Gitlab end end - Gitlab::Database.main.bulk_insert(IssueAssignee.table_name, assignees) # rubocop:disable Gitlab/BulkInsert + ApplicationRecord.legacy_bulk_insert(IssueAssignee.table_name, assignees) # rubocop:disable Gitlab/BulkInsert end end end diff --git a/lib/gitlab/github_import/importer/label_links_importer.rb b/lib/gitlab/github_import/importer/label_links_importer.rb index b608bb48e38..5e248c7cfc5 100644 --- a/lib/gitlab/github_import/importer/label_links_importer.rb +++ b/lib/gitlab/github_import/importer/label_links_importer.rb @@ -40,7 +40,7 @@ module Gitlab } end - Gitlab::Database.main.bulk_insert(LabelLink.table_name, rows) # rubocop:disable Gitlab/BulkInsert + ApplicationRecord.legacy_bulk_insert(LabelLink.table_name, rows) # rubocop:disable Gitlab/BulkInsert end def find_target_id diff --git a/lib/gitlab/github_import/importer/note_importer.rb b/lib/gitlab/github_import/importer/note_importer.rb index 1fd42a69fac..2cc3a82dd9b 100644 --- a/lib/gitlab/github_import/importer/note_importer.rb +++ b/lib/gitlab/github_import/importer/note_importer.rb @@ -37,7 +37,7 @@ module Gitlab # We're using bulk_insert here so we can bypass any validations and # callbacks. Running these would result in a lot of unnecessary SQL # queries being executed when importing large projects. - Gitlab::Database.main.bulk_insert(Note.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert + ApplicationRecord.legacy_bulk_insert(Note.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert rescue ActiveRecord::InvalidForeignKey # It's possible the project and the issue have been deleted since # scheduling this job. In this case we'll just skip creating the note. diff --git a/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb b/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb index 287e0ea7f7f..c56b391cbec 100644 --- a/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb @@ -31,9 +31,7 @@ module Gitlab end def each_object_to_import - project.merge_requests.with_state(:merged).find_each do |merge_request| - next if already_imported?(merge_request) - + merge_requests_to_import.find_each do |merge_request| Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) pull_request = client.pull_request(project.import_source, merge_request.iid) @@ -42,6 +40,17 @@ module Gitlab mark_as_imported(merge_request) end end + + private + + # Returns only the merge requests that still have merged_by to be imported. + def merge_requests_to_import + project.merge_requests.id_not_in(already_imported_objects).with_state(:merged) + end + + def already_imported_objects + Gitlab::Cache::Import::Caching.values_from_set(already_imported_cache_key) + end end end end diff --git a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb index bd65eb5899c..5e55d09fe3d 100644 --- a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb @@ -86,7 +86,7 @@ module Gitlab # Returns only the merge requests that still have reviews to be imported. def merge_requests_to_import - project.merge_requests.where.not(id: already_imported_merge_requests) # rubocop: disable CodeReuse/ActiveRecord + project.merge_requests.id_not_in(already_imported_merge_requests) end def already_imported_merge_requests diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb index a3dcd2e380c..fecff0644c2 100644 --- a/lib/gitlab/github_import/representation/diff_note.rb +++ b/lib/gitlab/github_import/representation/diff_note.rb @@ -7,13 +7,14 @@ module Gitlab include ToHash include ExposeAttribute - attr_reader :attributes - - expose_attribute :noteable_type, :noteable_id, :commit_id, :file_path, - :diff_hunk, :author, :note, :created_at, :updated_at, - :original_commit_id, :note_id - + NOTEABLE_TYPE = 'MergeRequest' NOTEABLE_ID_REGEX = %r{/pull/(?<iid>\d+)}i.freeze + DISCUSSION_CACHE_KEY = 'github-importer/discussion-id-map/%{project_id}/%{noteable_id}/%{original_note_id}' + + expose_attribute :noteable_id, :commit_id, :file_path, + :diff_hunk, :author, :created_at, :updated_at, + :original_commit_id, :note_id, :end_line, :start_line, + :side, :in_reply_to_id # Builds a diff note from a GitHub API response. # @@ -30,7 +31,6 @@ module Gitlab user = Representation::User.from_api_response(note.user) if note.user hash = { - noteable_type: 'MergeRequest', noteable_id: matches[:iid].to_i, file_path: note.path, commit_id: note.commit_id, @@ -42,7 +42,9 @@ module Gitlab updated_at: note.updated_at, note_id: note.id, end_line: note.line, - start_line: note.start_line + start_line: note.start_line, + side: note.side, + in_reply_to_id: note.in_reply_to_id } new(hash) @@ -56,21 +58,41 @@ module Gitlab new(hash) end + attr_accessor :merge_request, :project + # attributes - A Hash containing the raw note details. The keys of this # Hash must be Symbols. def initialize(attributes) @attributes = attributes + + @note_formatter = DiffNotes::SuggestionFormatter.new( + note: attributes[:note], + start_line: attributes[:start_line], + end_line: attributes[:end_line] + ) + end + + def noteable_type + NOTEABLE_TYPE + end + + def contains_suggestion? + @note_formatter.contains_suggestion? + end + + def note + @note_formatter.formatted_note end def line_code diff_line = Gitlab::Diff::Parser.new.parse(diff_hunk.lines).to_a.last - Gitlab::Git - .diff_line_code(file_path, diff_line.new_pos, diff_line.old_pos) + Gitlab::Git.diff_line_code(file_path, diff_line.new_pos, diff_line.old_pos) end # Returns a Hash that can be used to populate `notes.st_diff`, removing # the need for requesting Git data for every diff note. + # Used when importing with LegacyDiffNote def diff_hash { diff: diff_hunk, @@ -85,12 +107,15 @@ module Gitlab } end - def note - @note ||= DiffNotes::SuggestionFormatter.formatted_note_for( - note: attributes[:note], - start_line: attributes[:start_line], - end_line: attributes[:end_line] - ) + # Used when importing with DiffNote + def diff_position + position_params = { + diff_refs: merge_request.diff_refs, + old_path: file_path, + new_path: file_path + } + + Gitlab::Diff::Position.new(position_params.merge(diff_line_params)) end def github_identifiers @@ -100,6 +125,53 @@ module Gitlab noteable_type: noteable_type } end + + def discussion_id + if in_reply_to_id.present? + current_discussion_id + else + Discussion.discussion_id( + Struct + .new(:noteable_id, :noteable_type) + .new(merge_request.id, NOTEABLE_TYPE) + ).tap do |discussion_id| + cache_discussion_id(discussion_id) + end + end + end + + private + + # Required by ExposeAttribute + attr_reader :attributes + + def diff_line_params + if addition? + { new_line: end_line, old_line: nil } + else + { new_line: nil, old_line: end_line } + end + end + + def addition? + side == 'RIGHT' + end + + def cache_discussion_id(discussion_id) + Gitlab::Cache::Import::Caching.write(discussion_id_cache_key(note_id), discussion_id) + end + + def current_discussion_id + Gitlab::Cache::Import::Caching.read(discussion_id_cache_key(in_reply_to_id)) + end + + def discussion_id_cache_key(id) + DISCUSSION_CACHE_KEY % { + project_id: project.id, + noteable_id: merge_request.id, + original_note_id: id + } + end end end end diff --git a/lib/gitlab/github_import/representation/diff_notes/suggestion_formatter.rb b/lib/gitlab/github_import/representation/diff_notes/suggestion_formatter.rb index 4e5855ee4cd..38b15c4b5bb 100644 --- a/lib/gitlab/github_import/representation/diff_notes/suggestion_formatter.rb +++ b/lib/gitlab/github_import/representation/diff_notes/suggestion_formatter.rb @@ -10,30 +10,38 @@ module Gitlab module Representation module DiffNotes class SuggestionFormatter + include Gitlab::Utils::StrongMemoize + # A github suggestion: # - the ```suggestion tag must be the first text of the line # - it might have up to 3 spaces before the ```suggestion tag # - extra text on the ```suggestion tag line will be ignored GITHUB_SUGGESTION = /^\ {,3}(?<suggestion>```suggestion\b).*(?<eol>\R)/.freeze - def self.formatted_note_for(...) - new(...).formatted_note - end - def initialize(note:, start_line: nil, end_line: nil) @note = note @start_line = start_line @end_line = end_line end + # Returns a tuple with: + # - a boolean indicating if the note has suggestions + # - the note with the suggestion formatted for Gitlab def formatted_note - if contains_suggestion? - note.gsub( - GITHUB_SUGGESTION, - "\\k<suggestion>:#{suggestion_range}\\k<eol>" - ) - else - note + @formatted_note ||= + if contains_suggestion? + note.gsub( + GITHUB_SUGGESTION, + "\\k<suggestion>:#{suggestion_range}\\k<eol>" + ) + else + note + end + end + + def contains_suggestion? + strong_memoize(:contain_suggestion) do + note.to_s.match?(GITHUB_SUGGESTION) end end @@ -41,10 +49,6 @@ module Gitlab attr_reader :note, :start_line, :end_line - def contains_suggestion? - note.to_s.match?(GITHUB_SUGGESTION) - end - # Github always saves the comment on the _last_ line of the range. # Therefore, the diff hunk will always be related to lines before # the comment itself. |