diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2023-05-17 19:05:49 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2023-05-17 19:05:49 +0300 |
commit | 43a25d93ebdabea52f99b05e15b06250cd8f07d7 (patch) | |
tree | dceebdc68925362117480a5d672bcff122fb625b /lib/gitlab/github_import | |
parent | 20c84b99005abd1c82101dfeff264ac50d2df211 (diff) |
Add latest changes from gitlab-org/gitlab@16-0-stable-eev16.0.0-rc42
Diffstat (limited to 'lib/gitlab/github_import')
42 files changed, 880 insertions, 520 deletions
diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb index 0c91eff1d10..d16f4d7587b 100644 --- a/lib/gitlab/github_import/bulk_importing.rb +++ b/lib/gitlab/github_import/bulk_importing.rb @@ -27,8 +27,13 @@ module Gitlab build_record = model.new(attrs) if build_record.invalid? - log_error(object[:id], build_record.errors.full_messages) - errors << build_record.errors + github_identifiers = github_identifiers(object) + + log_error(github_identifiers, build_record.errors.full_messages) + errors << { + validation_errors: build_record.errors, + github_identifiers: github_identifiers + } next end @@ -53,17 +58,18 @@ module Gitlab raise NotImplementedError end - def bulk_insert_failures(validation_errors) - rows = validation_errors.map do |error| + def bulk_insert_failures(errors) + rows = errors.map do |error| correlation_id_value = Labkit::Correlation::CorrelationId.current_or_new_id { source: self.class.name, exception_class: 'ActiveRecord::RecordInvalid', - exception_message: error.full_messages.first.truncate(255), + exception_message: error[:validation_errors].full_messages.first.truncate(255), correlation_id_value: correlation_id_value, retry_count: nil, - created_at: Time.zone.now + created_at: Time.zone.now, + external_identifiers: error[:github_identifiers] } end @@ -88,15 +94,19 @@ module Gitlab ) end - def log_error(object_id, messages) + def log_error(github_identifiers, messages) Gitlab::Import::Logger.error( import_type: :github, project_id: project.id, importer: self.class.name, message: messages, - github_identifier: object_id + github_identifiers: github_identifiers ) end + + def github_identifiers(object) + raise NotImplementedError + end end end end diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 1c9ca9f43a8..886563a6f69 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -112,6 +112,10 @@ module Gitlab each_object(:branches, *args) end + def collaborators(*args) + each_object(:collaborators, *args) + end + def branch_protection(repo_name, branch_name) with_rate_limit { octokit.branch_protection(repo_name, branch_name).to_h } end diff --git a/lib/gitlab/github_import/clients/proxy.rb b/lib/gitlab/github_import/clients/proxy.rb index b12df404640..27030f5382a 100644 --- a/lib/gitlab/github_import/clients/proxy.rb +++ b/lib/gitlab/github_import/clients/proxy.rb @@ -6,6 +6,10 @@ module Gitlab class Proxy attr_reader :client + delegate :each_object, :user, :octokit, to: :client + + REPOS_COUNT_CACHE_KEY = 'github-importer/provider-repo-count/%{type}/%{user_id}' + def initialize(access_token, client_options) @client = pick_client(access_token, client_options) end @@ -13,24 +17,26 @@ module Gitlab def repos(search_text, options) return { repos: filtered(client.repos, search_text) } if use_legacy? - if use_graphql? - fetch_repos_via_graphql(search_text, options) - else - fetch_repos_via_rest(search_text, options) - end + fetch_repos_via_graphql(search_text, options) end - private + def count_repos_by(relation_type, user_id) + return if use_legacy? + + key = format(REPOS_COUNT_CACHE_KEY, type: relation_type, user_id: user_id) - def fetch_repos_via_rest(search_text, options) - { repos: client.search_repos_by_name(search_text, options)[:items] } + ::Gitlab::Cache::Import::Caching.read_integer(key, timeout: 5.minutes) || + fetch_and_cache_repos_count_via_graphql(relation_type, key) end + private + def fetch_repos_via_graphql(search_text, options) response = client.search_repos_by_name_graphql(search_text, options) { repos: response.dig(:data, :search, :nodes), - page_info: response.dig(:data, :search, :pageInfo) + page_info: response.dig(:data, :search, :pageInfo), + count: response.dig(:data, :search, :repositoryCount) } end @@ -50,8 +56,11 @@ module Gitlab Feature.disabled?(:remove_legacy_github_client) end - def use_graphql? - Feature.enabled?(:github_client_fetch_repos_via_graphql) + def fetch_and_cache_repos_count_via_graphql(relation_type, key) + response = client.count_repos_by_relation_type_graphql(relation_type: relation_type) + count = response.dig(:data, :search, :repositoryCount) + + ::Gitlab::Cache::Import::Caching.write(key, count, timeout: 5.minutes) end end end diff --git a/lib/gitlab/github_import/clients/search_repos.rb b/lib/gitlab/github_import/clients/search_repos.rb index b72e5ac7751..a2ef6ca24eb 100644 --- a/lib/gitlab/github_import/clients/search_repos.rb +++ b/lib/gitlab/github_import/clients/search_repos.rb @@ -5,24 +5,24 @@ module Gitlab module Clients module SearchRepos def search_repos_by_name_graphql(name, options = {}) - with_retry do - octokit.post( - '/graphql', - { query: graphql_search_repos_body(name, options) }.to_json - ).to_h - end + graphql_request(graphql_search_repos_body(name, options)) + end + + def count_repos_by_relation_type_graphql(options) + graphql_request(count_by_relation_type_query(options)) end - def search_repos_by_name(name, options = {}) - search_query = search_repos_query(name, options) + private + def graphql_request(query) with_retry do - octokit.search_repositories(search_query, options).to_h + octokit.post( + '/graphql', + { query: query }.to_json + ).to_h end end - private - def graphql_search_repos_body(name, options) query = search_repos_query(name, options) query = "query: \"#{query}\"" @@ -45,14 +45,15 @@ module Gitlab endCursor hasNextPage hasPreviousPage - } + }, + repositoryCount } } TEXT end def search_repos_query(string, options = {}) - base = "#{string} in:name is:public,private" + base = "#{string} in:name is:public,private fork:true" case options[:relation_type] when 'organization' then organization_repos_query(base, options) @@ -64,7 +65,11 @@ module Gitlab end def organization_repos_query(search_string, options) - "#{search_string} org:#{options[:organization_login]}" + if options[:organization_login].present? + "#{search_string} org:#{options[:organization_login]}" + else + organizations_subquery + end end def collaborated_repos_query(search_string) @@ -95,6 +100,18 @@ module Gitlab .map { |org| "org:#{org[:login]}" } .join(' ') end + + def count_by_relation_type_query(options) + query = search_repos_query(nil, options) + query = "query: \"#{query}\"" + <<-TEXT + { + search(type: REPOSITORY, #{query}) { + repositoryCount + } + } + TEXT + end end end end diff --git a/lib/gitlab/github_import/importer/attachments/issues_importer.rb b/lib/gitlab/github_import/importer/attachments/issues_importer.rb index 090bfb4a098..c8f0b59fd18 100644 --- a/lib/gitlab/github_import/importer/attachments/issues_importer.rb +++ b/lib/gitlab/github_import/importer/attachments/issues_importer.rb @@ -24,7 +24,7 @@ module Gitlab private def collection - project.issues.select(:id, :description) + project.issues.select(:id, :description, :iid) end def ordering_column diff --git a/lib/gitlab/github_import/importer/attachments/merge_requests_importer.rb b/lib/gitlab/github_import/importer/attachments/merge_requests_importer.rb index f41071b1785..cd3a327a846 100644 --- a/lib/gitlab/github_import/importer/attachments/merge_requests_importer.rb +++ b/lib/gitlab/github_import/importer/attachments/merge_requests_importer.rb @@ -24,7 +24,7 @@ module Gitlab private def collection - project.merge_requests.select(:id, :description) + project.merge_requests.select(:id, :description, :iid) end def ordering_column diff --git a/lib/gitlab/github_import/importer/attachments/releases_importer.rb b/lib/gitlab/github_import/importer/attachments/releases_importer.rb index feaa69eff71..7d6dbeb901e 100644 --- a/lib/gitlab/github_import/importer/attachments/releases_importer.rb +++ b/lib/gitlab/github_import/importer/attachments/releases_importer.rb @@ -24,7 +24,7 @@ module Gitlab private def collection - project.releases.select(:id, :description) + project.releases.select(:id, :description, :tag) end end end diff --git a/lib/gitlab/github_import/importer/collaborator_importer.rb b/lib/gitlab/github_import/importer/collaborator_importer.rb new file mode 100644 index 00000000000..9a90ea5a4ed --- /dev/null +++ b/lib/gitlab/github_import/importer/collaborator_importer.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class CollaboratorImporter + attr_reader :collaborator, :project, :client, :members_finder + + # collaborator - An instance of `Gitlab::GithubImport::Representation::Collaborator` + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(collaborator, project, client) + @collaborator = collaborator + @project = project + @client = client + @members_finder = ::MembersFinder.new(project, project.creator) + end + + def execute + user_finder = GithubImport::UserFinder.new(project, client) + user_id = user_finder.user_id_for(collaborator) + return if user_id.nil? + + membership = existing_user_membership(user_id) + access_level = map_access_level + return if membership && membership[:access_level] >= map_access_level + + create_membership!(user_id, access_level) + end + + private + + def existing_user_membership(user_id) + members_finder.execute.find_by_user_id(user_id) + end + + def map_access_level + access_level = + case collaborator[:role_name] + when 'read' then Gitlab::Access::GUEST + when 'triage' then Gitlab::Access::REPORTER + when 'write' then Gitlab::Access::DEVELOPER + when 'maintain' then Gitlab::Access::MAINTAINER + when 'admin' then Gitlab::Access::OWNER + end + return access_level if access_level + + raise( + ::Gitlab::GithubImport::ObjectImporter::NotRetriableError, + "Unknown GitHub role: #{collaborator[:role_name]}" + ) + end + + def create_membership!(user_id, access_level) + ::ProjectMember.create!( + source: project, + access_level: access_level, + user_id: user_id, + member_namespace_id: project.project_namespace_id, + created_by_id: project.creator_id + ) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/collaborators_importer.rb b/lib/gitlab/github_import/importer/collaborators_importer.rb new file mode 100644 index 00000000000..7b18d3dba2a --- /dev/null +++ b/lib/gitlab/github_import/importer/collaborators_importer.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class CollaboratorsImporter + include ParallelScheduling + + # The method that will be called for traversing through all the objects to + # import, yielding them to the supplied block. + def each_object_to_import + repo = project.import_source + + direct_collaborators = client.collaborators(repo, affiliation: 'direct') + outside_collaborators = client.collaborators(repo, affiliation: 'outside') + collaborators_to_import = direct_collaborators.to_a - outside_collaborators.to_a + + collaborators_to_import.each do |collaborator| + next if already_imported?(collaborator) + + yield collaborator + + Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) + mark_as_imported(collaborator) + end + end + + def importer_class + CollaboratorImporter + end + + def representation_class + Representation::Collaborator + end + + def sidekiq_worker_class + ImportCollaboratorWorker + end + + def object_type + :collaborator + end + + def collection_method + :collaborators + end + + def id_for_already_imported_cache(collaborator) + collaborator[:id] + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/events/cross_referenced.rb b/lib/gitlab/github_import/importer/events/cross_referenced.rb index b56ae186d3c..4fe371e5900 100644 --- a/lib/gitlab/github_import/importer/events/cross_referenced.rb +++ b/lib/gitlab/github_import/importer/events/cross_referenced.rb @@ -55,6 +55,7 @@ module Gitlab record = record_class.new(id: db_id, iid: iid) record.project = project + record.namespace = project.project_namespace if record.respond_to?(:namespace) record.readonly! record end diff --git a/lib/gitlab/github_import/importer/label_links_importer.rb b/lib/gitlab/github_import/importer/label_links_importer.rb index 52c87dda347..a20fec4b2ba 100644 --- a/lib/gitlab/github_import/importer/label_links_importer.rb +++ b/lib/gitlab/github_import/importer/label_links_importer.rb @@ -25,6 +25,8 @@ module Gitlab items = [] target_id = find_target_id + return if target_id.blank? + issue.label_names.each do |label_name| # Although unlikely it's technically possible for an issue to be # given a label that was created and assigned after we imported all diff --git a/lib/gitlab/github_import/importer/labels_importer.rb b/lib/gitlab/github_import/importer/labels_importer.rb index d5d1cd28b7c..4554b932520 100644 --- a/lib/gitlab/github_import/importer/labels_importer.rb +++ b/lib/gitlab/github_import/importer/labels_importer.rb @@ -53,9 +53,18 @@ module Gitlab :label end + private + def model Label end + + def github_identifiers(label) + { + title: label[:name], + object_type: object_type + } + end end end end diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb index 560fbdc66e3..cd6d450f15b 100644 --- a/lib/gitlab/github_import/importer/milestones_importer.rb +++ b/lib/gitlab/github_import/importer/milestones_importer.rb @@ -57,9 +57,19 @@ module Gitlab :milestone end + private + def model Milestone end + + def github_identifiers(milestone) + { + iid: milestone[:number], + title: milestone[:title], + object_type: object_type + } + end end end end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 9901c9e76f5..266ee2938ba 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -6,7 +6,7 @@ module Gitlab class NoteAttachmentsImporter attr_reader :note_text, :project - # note_text - An instance of `NoteText`. + # note_text - An instance of `Gitlab::GithubImport::Representation::NoteText`. # project - An instance of `Project`. # client - An instance of `Gitlab::GithubImport::Client`. def initialize(note_text, project, _client = nil) @@ -19,7 +19,7 @@ module Gitlab return if attachments.blank? new_text = attachments.reduce(note_text.text) do |text, attachment| - new_url = download_attachment(attachment) + new_url = gitlab_attachment_link(attachment) text.gsub(attachment.url, new_url) end @@ -28,6 +28,28 @@ module Gitlab private + def gitlab_attachment_link(attachment) + project_import_source = project.import_source + + if attachment.part_of_project_blob?(project_import_source) + convert_project_content_link(attachment.url, project_import_source) + elsif attachment.media? || attachment.doc_belongs_to_project?(project_import_source) + download_attachment(attachment) + else # url to other GitHub project + attachment.url + end + end + + # From: https://github.com/login/test-import-attachments-source/blob/main/example.md + # To: https://gitlab.com/login/test-import-attachments-target/-/blob/main/example.md + def convert_project_content_link(attachment_url, import_source) + path_without_domain = attachment_url.gsub(::Gitlab::GithubImport::MarkdownText.github_url, '') + path_without_import_source = path_without_domain.gsub(import_source, '').delete_prefix('/') + path_with_blob_prefix = "/-#{path_without_import_source}" + + ::Gitlab::Routing.url_helpers.project_url(project) + path_with_blob_prefix + end + # in: an instance of Gitlab::GithubImport::Markdown::Attachment # out: gitlab attachment markdown url def download_attachment(attachment) diff --git a/lib/gitlab/github_import/importer/pull_request_merged_by_importer.rb b/lib/gitlab/github_import/importer/pull_request_merged_by_importer.rb deleted file mode 100644 index f05aa26a449..00000000000 --- a/lib/gitlab/github_import/importer/pull_request_merged_by_importer.rb +++ /dev/null @@ -1,73 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module GithubImport - module Importer - class PullRequestMergedByImporter - # pull_request - An instance of - # `Gitlab::GithubImport::Representation::PullRequest` - # project - An instance of `Project` - # client - An instance of `Gitlab::GithubImport::Client` - def initialize(pull_request, project, client) - @pull_request = pull_request - @project = project - @client = client - end - - def execute - user_finder = GithubImport::UserFinder.new(project, client) - - gitlab_user_id = begin - user_finder.user_id_for(pull_request.merged_by) - rescue ::Octokit::NotFound - nil - end - - metrics_upsert(gitlab_user_id) - - add_note! - end - - private - - attr_reader :project, :pull_request, :client - - def metrics_upsert(gitlab_user_id) - MergeRequest::Metrics.upsert({ - target_project_id: project.id, - merge_request_id: merge_request.id, - merged_by_id: gitlab_user_id, - merged_at: pull_request.merged_at, - created_at: timestamp, - updated_at: timestamp - }, unique_by: :merge_request_id) - end - - def add_note! - merge_request.notes.create!( - importing: true, - note: missing_author_note, - author_id: project.creator_id, - project: project, - created_at: pull_request.merged_at - ) - end - - def merge_request - @merge_request ||= project.merge_requests.find_by_iid(pull_request.iid) - end - - def timestamp - @timestamp ||= Time.new.utc - end - - def missing_author_note - s_("GitHubImporter|*Merged by: %{author} at %{timestamp}*") % { - author: pull_request.merged_by&.login || 'ghost', - timestamp: pull_request.merged_at - } - end - end - end - end -end diff --git a/lib/gitlab/github_import/importer/pull_request_review_importer.rb b/lib/gitlab/github_import/importer/pull_request_review_importer.rb deleted file mode 100644 index b1e259fe940..00000000000 --- a/lib/gitlab/github_import/importer/pull_request_review_importer.rb +++ /dev/null @@ -1,145 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module GithubImport - module Importer - class PullRequestReviewImporter - # review - An instance of `Gitlab::GithubImport::Representation::PullRequestReview` - # project - An instance of `Project` - # client - An instance of `Gitlab::GithubImport::Client` - def initialize(review, project, client) - @review = review - @project = project - @client = client - @merge_request = project.merge_requests.find_by_id(review.merge_request_id) - end - - def execute - user_finder = GithubImport::UserFinder.new(project, client) - - gitlab_user_id = begin - user_finder.user_id_for(review.author) - rescue ::Octokit::NotFound - nil - end - - if gitlab_user_id - add_review_note!(gitlab_user_id) - add_approval!(gitlab_user_id) - add_reviewer!(gitlab_user_id) - else - add_complementary_review_note!(project.creator_id) - end - end - - private - - attr_reader :review, :merge_request, :project, :client - - def add_review_note!(author_id) - return if review.note.empty? - - add_note!(author_id, review_note_content) - end - - def add_complementary_review_note!(author_id) - return if review.note.empty? && !review.approval? - - note_body = MarkdownText.format( - review_note_content, - review.author - ) - - add_note!(author_id, note_body) - end - - def review_note_content - header = "**Review:** #{review.review_type.humanize}" - - if review.note.present? - "#{header}\n\n#{review.note}" - else - header - end - end - - def add_note!(author_id, note) - note = Note.new(note_attributes(author_id, note)) - - note.save! - end - - def note_attributes(author_id, note, extra = {}) - { - importing: true, - noteable_id: merge_request.id, - noteable_type: 'MergeRequest', - project_id: project.id, - author_id: author_id, - note: note, - system: false, - created_at: submitted_at, - updated_at: submitted_at - }.merge(extra) - end - - def add_approval!(user_id) - return unless review.review_type == 'APPROVED' - - approval_attribues = { - merge_request_id: merge_request.id, - user_id: user_id, - created_at: submitted_at, - updated_at: submitted_at - } - - result = ::Approval.insert( - approval_attribues, - returning: [:id], - unique_by: [:user_id, :merge_request_id] - ) - - if result.rows.present? - add_approval_system_note!(user_id) - end - end - - def add_reviewer!(user_id) - return if review_re_requested?(user_id) - - ::MergeRequestReviewer.create!( - merge_request_id: merge_request.id, - user_id: user_id, - state: ::MergeRequestReviewer.states['reviewed'], - created_at: submitted_at - ) - rescue ActiveRecord::RecordNotUnique - # multiple reviews from single person could make a SQL concurrency issue here - nil - end - - # rubocop:disable CodeReuse/ActiveRecord - def review_re_requested?(user_id) - # records that were imported on previous stage with "unreviewed" status - MergeRequestReviewer.where(merge_request_id: merge_request.id, user_id: user_id).exists? - end - # rubocop:enable CodeReuse/ActiveRecord - - def add_approval_system_note!(user_id) - attributes = note_attributes( - user_id, - 'approved this merge request', - system: true, - system_note_metadata: SystemNoteMetadata.new(action: 'approved') - ) - - Note.create!(attributes) - end - - def submitted_at - @submitted_at ||= (review.submitted_at || merge_request.updated_at) - end - end - end - end -end diff --git a/lib/gitlab/github_import/importer/pull_requests/all_merged_by_importer.rb b/lib/gitlab/github_import/importer/pull_requests/all_merged_by_importer.rb new file mode 100644 index 00000000000..9aa55fd3eae --- /dev/null +++ b/lib/gitlab/github_import/importer/pull_requests/all_merged_by_importer.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + module PullRequests + class AllMergedByImporter + include ParallelScheduling + + def importer_class + MergedByImporter + end + + def representation_class + Gitlab::GithubImport::Representation::PullRequest + end + + def sidekiq_worker_class + Gitlab::GithubImport::PullRequests::ImportMergedByWorker + end + + def collection_method + :pull_requests_merged_by + end + + def object_type + :pull_request_merged_by + end + + def id_for_already_imported_cache(merge_request) + merge_request.id + end + + def each_object_to_import + merge_requests_to_import.find_each do |merge_request| + Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) + + pull_request = client.pull_request(project.import_source, merge_request.iid) + yield(pull_request) + + mark_as_imported(merge_request) + end + end + + private + + # Returns only the merge requests that still have merged_by to be imported. + def merge_requests_to_import + project.merge_requests.id_not_in(already_imported_objects).with_state(:merged) + end + + def already_imported_objects + Gitlab::Cache::Import::Caching.values_from_set(already_imported_cache_key) + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/pull_requests/merged_by_importer.rb b/lib/gitlab/github_import/importer/pull_requests/merged_by_importer.rb new file mode 100644 index 00000000000..19880716832 --- /dev/null +++ b/lib/gitlab/github_import/importer/pull_requests/merged_by_importer.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + module PullRequests + class MergedByImporter + # pull_request - An instance of + # `Gitlab::GithubImport::Representation::PullRequest` + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(pull_request, project, client) + @pull_request = pull_request + @project = project + @client = client + end + + def execute + user_finder = GithubImport::UserFinder.new(project, client) + + gitlab_user_id = user_finder.user_id_for(pull_request.merged_by) + + metrics_upsert(gitlab_user_id) + + add_note! + end + + private + + attr_reader :project, :pull_request, :client + + def metrics_upsert(gitlab_user_id) + MergeRequest::Metrics.upsert({ + target_project_id: project.id, + merge_request_id: merge_request.id, + merged_by_id: gitlab_user_id, + merged_at: pull_request.merged_at, + created_at: timestamp, + updated_at: timestamp + }, unique_by: :merge_request_id) + end + + def add_note! + merge_request.notes.create!( + importing: true, + note: missing_author_note, + author_id: project.creator_id, + project: project, + created_at: pull_request.merged_at + ) + end + + def merge_request + @merge_request ||= project.merge_requests.find_by_iid(pull_request.iid) + end + + def timestamp + @timestamp ||= Time.new.utc + end + + def missing_author_note + format(s_("GitHubImporter|*Merged by: %{author} at %{timestamp}*"), + author: pull_request.merged_by&.login || 'ghost', + timestamp: pull_request.merged_at + ) + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb new file mode 100644 index 00000000000..b250a42a53c --- /dev/null +++ b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb @@ -0,0 +1,141 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + module PullRequests + class ReviewImporter + # review - An instance of `Gitlab::GithubImport::Representation::PullRequestReview` + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(review, project, client) + @review = review + @project = project + @client = client + @merge_request = project.merge_requests.find_by_id(review.merge_request_id) + end + + def execute + user_finder = GithubImport::UserFinder.new(project, client) + + gitlab_user_id = user_finder.user_id_for(review.author) + + if gitlab_user_id + add_review_note!(gitlab_user_id) + add_approval!(gitlab_user_id) + add_reviewer!(gitlab_user_id) + else + add_complementary_review_note!(project.creator_id) + end + end + + private + + attr_reader :review, :merge_request, :project, :client + + def add_review_note!(author_id) + return if review.note.empty? + + add_note!(author_id, review_note_content) + end + + def add_complementary_review_note!(author_id) + return if review.note.empty? && !review.approval? + + note_body = MarkdownText.format( + review_note_content, + review.author + ) + + add_note!(author_id, note_body) + end + + def review_note_content + header = "**Review:** #{review.review_type.humanize}" + + if review.note.present? + "#{header}\n\n#{review.note}" + else + header + end + end + + def add_note!(author_id, note) + note = Note.new(note_attributes(author_id, note)) + + note.save! + end + + def note_attributes(author_id, note, extra = {}) + { + importing: true, + noteable_id: merge_request.id, + noteable_type: 'MergeRequest', + project_id: project.id, + author_id: author_id, + note: note, + system: false, + created_at: submitted_at, + updated_at: submitted_at + }.merge(extra) + end + + def add_approval!(user_id) + return unless review.review_type == 'APPROVED' + + approval_attribues = { + merge_request_id: merge_request.id, + user_id: user_id, + created_at: submitted_at, + updated_at: submitted_at + } + + result = ::Approval.insert( + approval_attribues, + returning: [:id], + unique_by: [:user_id, :merge_request_id] + ) + + add_approval_system_note!(user_id) if result.rows.present? + end + + def add_reviewer!(user_id) + return if review_re_requested?(user_id) + + ::MergeRequestReviewer.create!( + merge_request_id: merge_request.id, + user_id: user_id, + state: ::MergeRequestReviewer.states['reviewed'], + created_at: submitted_at + ) + rescue ActiveRecord::RecordNotUnique + # multiple reviews from single person could make a SQL concurrency issue here + nil + end + + # rubocop:disable CodeReuse/ActiveRecord + def review_re_requested?(user_id) + # records that were imported on previous stage with "unreviewed" status + MergeRequestReviewer.where(merge_request_id: merge_request.id, user_id: user_id).exists? + end + # rubocop:enable CodeReuse/ActiveRecord + + def add_approval_system_note!(user_id) + attributes = note_attributes( + user_id, + 'approved this merge request', + system: true, + system_note_metadata: SystemNoteMetadata.new(action: 'approved') + ) + + Note.create!(attributes) + end + + def submitted_at + @submitted_at ||= (review.submitted_at || merge_request.updated_at) + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/pull_requests/review_request_importer.rb b/lib/gitlab/github_import/importer/pull_requests/review_request_importer.rb index bb51d856d9b..f51c610f24b 100644 --- a/lib/gitlab/github_import/importer/pull_requests/review_request_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests/review_request_importer.rb @@ -8,7 +8,6 @@ module Gitlab def initialize(review_request, project, client) @review_request = review_request @user_finder = UserFinder.new(project, client) - @issue_finder = IssuableFinder.new(project, client) end def execute @@ -20,7 +19,7 @@ module Gitlab attr_reader :review_request, :user_finder def build_reviewers - reviewer_ids = review_request.users.map { |user| user_finder.user_id_for(user) }.compact + reviewer_ids = review_request.users.filter_map { |user| user_finder.user_id_for(user) } reviewer_ids.map do |reviewer_id| MergeRequestReviewer.new( diff --git a/lib/gitlab/github_import/importer/pull_requests/review_requests_importer.rb b/lib/gitlab/github_import/importer/pull_requests/review_requests_importer.rb index c5d8da3be1c..0a92aee801d 100644 --- a/lib/gitlab/github_import/importer/pull_requests/review_requests_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests/review_requests_importer.rb @@ -18,6 +18,7 @@ module Gitlab review_requests = client.pull_request_review_requests(repo, merge_request.iid) review_requests[:merge_request_id] = merge_request.id + review_requests[:merge_request_iid] = merge_request.iid yield review_requests mark_merge_request_imported(merge_request) diff --git a/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb new file mode 100644 index 00000000000..347423b0e21 --- /dev/null +++ b/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb @@ -0,0 +1,114 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + module PullRequests + class ReviewsImporter + include ParallelScheduling + + def initialize(...) + super + + @merge_requests_already_imported_cache_key = + "github-importer/merge_request/already-imported/#{project.id}" + end + + def importer_class + ReviewImporter + end + + def representation_class + Gitlab::GithubImport::Representation::PullRequestReview + end + + def sidekiq_worker_class + Gitlab::GithubImport::PullRequests::ImportReviewWorker + end + + def collection_method + :pull_request_reviews + end + + def object_type + :pull_request_review + end + + def id_for_already_imported_cache(review) + review[:id] + end + + # The worker can be interrupted, by rate limit for instance, + # in different situations. To avoid requesting already imported data, + # if the worker is interrupted: + # - before importing all reviews of a merge request + # The reviews page is cached with the `PageCounter`, by merge request. + # - before importing all merge requests reviews + # Merge requests that had all the reviews imported are cached with + # `mark_merge_request_reviews_imported` + def each_object_to_import(&_block) + each_review_page do |page, merge_request| + page.objects.each do |review| + review = review.to_h + + next if already_imported?(review) + + Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) + + review[:merge_request_id] = merge_request.id + review[:merge_request_iid] = merge_request.iid + yield(review) + + mark_as_imported(review) + end + end + end + + private + + attr_reader :merge_requests_already_imported_cache_key + + def each_review_page + merge_requests_to_import.find_each do |merge_request| + # The page counter needs to be scoped by merge request to avoid skipping + # pages of reviews from already imported merge requests. + page_counter = PageCounter.new(project, page_counter_id(merge_request)) + repo = project.import_source + options = collection_options.merge(page: page_counter.current) + + client.each_page(collection_method, repo, merge_request.iid, options) do |page| + next unless page_counter.set(page.number) + + yield(page, merge_request) + end + + # Avoid unnecessary Redis cache keys after the work is done. + page_counter.expire! + mark_merge_request_reviews_imported(merge_request) + end + end + + # Returns only the merge requests that still have reviews to be imported. + def merge_requests_to_import + project.merge_requests.id_not_in(already_imported_merge_requests) + end + + def already_imported_merge_requests + Gitlab::Cache::Import::Caching.values_from_set(merge_requests_already_imported_cache_key) + end + + def page_counter_id(merge_request) + "merge_request/#{merge_request.id}/#{collection_method}" + end + + def mark_merge_request_reviews_imported(merge_request) + Gitlab::Cache::Import::Caching.set_add( + merge_requests_already_imported_cache_key, + merge_request.id + ) + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb b/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb deleted file mode 100644 index c56b391cbec..00000000000 --- a/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module GithubImport - module Importer - class PullRequestsMergedByImporter - include ParallelScheduling - - def importer_class - PullRequestMergedByImporter - end - - def representation_class - Gitlab::GithubImport::Representation::PullRequest - end - - def sidekiq_worker_class - ImportPullRequestMergedByWorker - end - - def collection_method - :pull_requests_merged_by - end - - def object_type - :pull_request_merged_by - end - - def id_for_already_imported_cache(merge_request) - merge_request.id - end - - def each_object_to_import - merge_requests_to_import.find_each do |merge_request| - Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) - - pull_request = client.pull_request(project.import_source, merge_request.iid) - yield(pull_request) - - mark_as_imported(merge_request) - end - end - - private - - # Returns only the merge requests that still have merged_by to be imported. - def merge_requests_to_import - project.merge_requests.id_not_in(already_imported_objects).with_state(:merged) - end - - def already_imported_objects - Gitlab::Cache::Import::Caching.values_from_set(already_imported_cache_key) - end - end - end - end -end diff --git a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb deleted file mode 100644 index 543c29a21a0..00000000000 --- a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb +++ /dev/null @@ -1,111 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module GithubImport - module Importer - class PullRequestsReviewsImporter - include ParallelScheduling - - def initialize(...) - super - - @merge_requests_already_imported_cache_key = - "github-importer/merge_request/already-imported/#{project.id}" - end - - def importer_class - PullRequestReviewImporter - end - - def representation_class - Gitlab::GithubImport::Representation::PullRequestReview - end - - def sidekiq_worker_class - ImportPullRequestReviewWorker - end - - def collection_method - :pull_request_reviews - end - - def object_type - :pull_request_review - end - - def id_for_already_imported_cache(review) - review[:id] - end - - # The worker can be interrupted, by rate limit for instance, - # in different situations. To avoid requesting already imported data, - # if the worker is interrupted: - # - before importing all reviews of a merge request - # The reviews page is cached with the `PageCounter`, by merge request. - # - before importing all merge requests reviews - # Merge requests that had all the reviews imported are cached with - # `mark_merge_request_reviews_imported` - def each_object_to_import(&block) - each_review_page do |page, merge_request| - page.objects.each do |review| - review = review.to_h - - next if already_imported?(review) - - Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) - - review[:merge_request_id] = merge_request.id - yield(review) - - mark_as_imported(review) - end - end - end - - private - - attr_reader :merge_requests_already_imported_cache_key - - def each_review_page - merge_requests_to_import.find_each do |merge_request| - # The page counter needs to be scoped by merge request to avoid skipping - # pages of reviews from already imported merge requests. - page_counter = PageCounter.new(project, page_counter_id(merge_request)) - repo = project.import_source - options = collection_options.merge(page: page_counter.current) - - client.each_page(collection_method, repo, merge_request.iid, options) do |page| - next unless page_counter.set(page.number) - - yield(page, merge_request) - end - - # Avoid unnecessary Redis cache keys after the work is done. - page_counter.expire! - mark_merge_request_reviews_imported(merge_request) - end - end - - # Returns only the merge requests that still have reviews to be imported. - def merge_requests_to_import - project.merge_requests.id_not_in(already_imported_merge_requests) - end - - def already_imported_merge_requests - Gitlab::Cache::Import::Caching.values_from_set(merge_requests_already_imported_cache_key) - end - - def page_counter_id(merge_request) - "merge_request/#{merge_request.id}/#{collection_method}" - end - - def mark_merge_request_reviews_imported(merge_request) - Gitlab::Cache::Import::Caching.set_add( - merge_requests_already_imported_cache_key, - merge_request.id - ) - end - end - end - end -end diff --git a/lib/gitlab/github_import/importer/releases_importer.rb b/lib/gitlab/github_import/importer/releases_importer.rb index 62d579fda08..2f210dafd0c 100644 --- a/lib/gitlab/github_import/importer/releases_importer.rb +++ b/lib/gitlab/github_import/importer/releases_importer.rb @@ -73,6 +73,13 @@ module Gitlab def model Release end + + def github_identifiers(release) + { + tag: release[:tag_name], + object_type: object_type + } + end end end end diff --git a/lib/gitlab/github_import/importer/repository_importer.rb b/lib/gitlab/github_import/importer/repository_importer.rb index d7fe01e90f8..2654812b64a 100644 --- a/lib/gitlab/github_import/importer/repository_importer.rb +++ b/lib/gitlab/github_import/importer/repository_importer.rb @@ -66,13 +66,10 @@ module Gitlab true rescue ::Gitlab::Git::CommandError => e - if e.message !~ /repository not exported/ - project.create_wiki + return true if e.message.include?('repository not exported') - raise e - else - true - end + project.create_wiki + raise e end def wiki_url @@ -89,10 +86,8 @@ module Gitlab client_repository[:default_branch] end - def client_repository - strong_memoize(:client_repository) do - client.repository(project.import_source) - end + strong_memoize_attr def client_repository + client.repository(project.import_source) end end end diff --git a/lib/gitlab/github_import/job_delay_calculator.rb b/lib/gitlab/github_import/job_delay_calculator.rb new file mode 100644 index 00000000000..52b211c92d6 --- /dev/null +++ b/lib/gitlab/github_import/job_delay_calculator.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # Used to calculate delay to spread sidekiq jobs on fetching records during import + # and upon job reschedule when the rate limit is reached + module JobDelayCalculator + # Default batch settings for parallel import (can be redefined in Importer/Worker classes) + def parallel_import_batch + { size: 1000, delay: 1.minute } + end + + private + + def calculate_job_delay(job_index) + multiplier = (job_index / parallel_import_batch[:size]) + + (multiplier * parallel_import_batch[:delay]) + 1.second + end + end + end +end diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index 1c814e34a39..e270cfba619 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -79,6 +79,22 @@ module Gitlab @url = url end + def part_of_project_blob?(import_source) + url.start_with?( + "#{::Gitlab::GithubImport::MarkdownText.github_url}/#{import_source}/blob" + ) + end + + def doc_belongs_to_project?(import_source) + url.start_with?( + "#{::Gitlab::GithubImport::MarkdownText.github_url}/#{import_source}/files" + ) + end + + def media? + url.start_with?(::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN) + end + def inspect "<#{self.class.name}: { name: #{name}, url: #{url} }>" end diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb index 4b54a77983d..cfc1ec526b0 100644 --- a/lib/gitlab/github_import/parallel_scheduling.rb +++ b/lib/gitlab/github_import/parallel_scheduling.rb @@ -3,6 +3,8 @@ module Gitlab module GithubImport module ParallelScheduling + include JobDelayCalculator + attr_reader :project, :client, :page_counter, :already_imported_cache_key, :job_waiter_cache_key, :job_waiter_remaining_cache_key @@ -85,14 +87,10 @@ module Gitlab def parallel_import raise 'Batch settings must be defined for parallel import' if parallel_import_batch.blank? - if Feature.enabled?(:improved_spread_parallel_import) - improved_spread_parallel_import - else - spread_parallel_import - end + spread_parallel_import end - def improved_spread_parallel_import + def spread_parallel_import enqueued_job_counter = 0 each_object_to_import do |object| @@ -108,33 +106,6 @@ module Gitlab job_waiter end - def spread_parallel_import - waiter = JobWaiter.new - - import_arguments = [] - - each_object_to_import do |object| - repr = object_representation(object) - - import_arguments << [project.id, repr.to_hash, waiter.key] - - waiter.jobs_remaining += 1 - end - - # rubocop:disable Scalability/BulkPerformWithContext - Gitlab::ApplicationContext.with_context(project: project) do - sidekiq_worker_class.bulk_perform_in( - 1.second, - import_arguments, - batch_size: parallel_import_batch[:size], - batch_delay: parallel_import_batch[:delay] - ) - end - # rubocop:enable Scalability/BulkPerformWithContext - - waiter - end - # The method that will be called for traversing through all the objects to # import, yielding them to the supplied block. def each_object_to_import @@ -228,11 +199,6 @@ module Gitlab raise NotImplementedError end - # Default batch settings for parallel import (can be redefined in Importer classes) - def parallel_import_batch - { size: 1000, delay: 1.minute } - end - def abort_on_failure false end @@ -274,12 +240,6 @@ module Gitlab JobWaiter.new(jobs_remaining, key) end end - - def calculate_job_delay(job_index) - multiplier = (job_index / parallel_import_batch[:size]) - - (multiplier * parallel_import_batch[:delay]) + 1.second - end end end end diff --git a/lib/gitlab/github_import/project_relation_type.rb b/lib/gitlab/github_import/project_relation_type.rb new file mode 100644 index 00000000000..a6e598172ee --- /dev/null +++ b/lib/gitlab/github_import/project_relation_type.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class ProjectRelationType + CACHE_ORGS_EXPIRES_IN = 5.minutes + CACHE_USER_EXPIRES_IN = 1.hour + + def initialize(client) + @client = client + end + + def for(import_source) + namespace = import_source.split('/')[0] + if user?(namespace) + 'owned' + elsif organization?(namespace) + 'organization' + else + 'collaborated' + end + end + + private + + attr_reader :client + + def user?(namespace) + github_user_login == namespace + end + + def organization?(namespace) + github_org_logins.include? namespace + end + + def github_user_login + ::Rails.cache.fetch(cache_key('user_login'), expire_in: CACHE_USER_EXPIRES_IN) do + client.user(nil)[:login] + end + end + + def github_org_logins + ::Rails.cache.fetch(cache_key('organization_logins'), expires_in: CACHE_ORGS_EXPIRES_IN) do + logins = [] + client.each_object(:organizations) { |org| logins.push(org[:login]) } + logins + end + end + + def cache_key(subject) + ['github_import', Gitlab::CryptoHelper.sha256(client.octokit.access_token), subject].join('/') + end + end + end +end diff --git a/lib/gitlab/github_import/representation/collaborator.rb b/lib/gitlab/github_import/representation/collaborator.rb new file mode 100644 index 00000000000..fb58a572151 --- /dev/null +++ b/lib/gitlab/github_import/representation/collaborator.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class Collaborator + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :id, :login, :role_name + + # Builds a user from a GitHub API response. + # + # collaborator - An instance of `Hash` containing the user & role details. + def self.from_api_response(collaborator, _additional_data = {}) + new( + id: collaborator[:id], + login: collaborator[:login], + role_name: collaborator[:role_name] + ) + end + + # Builds a user using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + new(Representation.symbolize_hash(raw_hash)) + end + + # attributes - A Hash containing the user details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + + def github_identifiers + { + id: id, + login: login + } + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb index 9259d0295d5..0408b34bb02 100644 --- a/lib/gitlab/github_import/representation/diff_note.rb +++ b/lib/gitlab/github_import/representation/diff_note.rb @@ -120,7 +120,7 @@ module Gitlab def github_identifiers { note_id: note_id, - noteable_id: noteable_id, + noteable_iid: noteable_id, noteable_type: noteable_type } end diff --git a/lib/gitlab/github_import/representation/issue.rb b/lib/gitlab/github_import/representation/issue.rb index e878aeaf3b9..95a7c5ebf4b 100644 --- a/lib/gitlab/github_import/representation/issue.rb +++ b/lib/gitlab/github_import/representation/issue.rb @@ -79,7 +79,8 @@ module Gitlab def github_identifiers { iid: iid, - issuable_type: issuable_type + issuable_type: issuable_type, + title: title } end end diff --git a/lib/gitlab/github_import/representation/issue_event.rb b/lib/gitlab/github_import/representation/issue_event.rb index 39a23c016ce..068d5cf9482 100644 --- a/lib/gitlab/github_import/representation/issue_event.rb +++ b/lib/gitlab/github_import/representation/issue_event.rb @@ -20,7 +20,11 @@ module Gitlab end def github_identifiers - { id: id } + { + id: id, + issuable_iid: issuable_id, + event: event + } end def issuable_type diff --git a/lib/gitlab/github_import/representation/lfs_object.rb b/lib/gitlab/github_import/representation/lfs_object.rb index cd614db2161..716e77bf401 100644 --- a/lib/gitlab/github_import/representation/lfs_object.rb +++ b/lib/gitlab/github_import/representation/lfs_object.rb @@ -33,7 +33,8 @@ module Gitlab def github_identifiers { - oid: oid + oid: oid, + size: size } end end diff --git a/lib/gitlab/github_import/representation/note.rb b/lib/gitlab/github_import/representation/note.rb index 14379e8a4e9..7a8bdfb1c64 100644 --- a/lib/gitlab/github_import/representation/note.rb +++ b/lib/gitlab/github_import/representation/note.rb @@ -76,7 +76,7 @@ module Gitlab def github_identifiers { note_id: note_id, - noteable_id: noteable_id, + noteable_iid: noteable_id, noteable_type: noteable_type } end diff --git a/lib/gitlab/github_import/representation/note_text.rb b/lib/gitlab/github_import/representation/note_text.rb index 505d7d805d3..70dd242303a 100644 --- a/lib/gitlab/github_import/representation/note_text.rb +++ b/lib/gitlab/github_import/representation/note_text.rb @@ -16,35 +16,35 @@ module Gitlab attr_reader :attributes - expose_attribute :record_db_id, :record_type, :text - - class << self - # Builds a note text representation from DB record of Note or Release. - # - # record - An instance of `Note`, `Release`, `Issue`, `MergeRequest` model - def from_db_record(record) - check_record_class!(record) - - record_type = record.class.name - # only column for note is different along MODELS_ALLOWLIST - text = record.is_a?(::Note) ? record.note : record.description - new( - record_db_id: record.id, - record_type: record_type, - text: text - ) - end + expose_attribute :record_db_id, :record_type, :text, :iid, :tag, :noteable_type - def from_json_hash(raw_hash) - new Representation.symbolize_hash(raw_hash) - end + # Builds a note text representation from DB record of Note or Release. + # + # record - An instance of `Note`, `Release`, `Issue`, `MergeRequest` model + def self.from_db_record(record) + check_record_class!(record) - private + record_type = record.class.name + # only column for note is different along MODELS_ALLOWLIST + text = record.is_a?(::Note) ? record.note : record.description + new( + record_db_id: record.id, + record_type: record_type, + text: text, + iid: record.try(:iid), + tag: record.try(:tag), + noteable_type: record.try(:noteable_type) + ) + end - def check_record_class!(record) - raise ModelNotSupported, record.class.name if MODELS_ALLOWLIST.exclude?(record.class) - end + def self.from_json_hash(raw_hash) + new Representation.symbolize_hash(raw_hash) + end + + def self.check_record_class!(record) + raise ModelNotSupported, record.class.name if MODELS_ALLOWLIST.exclude?(record.class) end + private_class_method :check_record_class! # attributes - A Hash containing the event details. The keys of this # Hash (and any nested hashes) must be symbols. @@ -53,7 +53,22 @@ module Gitlab end def github_identifiers - { db_id: record_db_id } + { + db_id: record_db_id + }.merge(record_type_specific_attribute) + end + + private + + def record_type_specific_attribute + case record_type + when ::Release.name + { tag: tag } + when ::Issue.name, ::MergeRequest.name + { noteable_iid: iid } + when ::Note.name + { noteable_type: noteable_type } + end end end end diff --git a/lib/gitlab/github_import/representation/pull_request.rb b/lib/gitlab/github_import/representation/pull_request.rb index 4b8ae1f8eab..f26fa953773 100644 --- a/lib/gitlab/github_import/representation/pull_request.rb +++ b/lib/gitlab/github_import/representation/pull_request.rb @@ -111,7 +111,8 @@ module Gitlab def github_identifiers { iid: iid, - issuable_type: issuable_type + issuable_type: issuable_type, + title: title } end end diff --git a/lib/gitlab/github_import/representation/pull_request_review.rb b/lib/gitlab/github_import/representation/pull_request_review.rb index 8fb57ae89a4..0c6e281cd6d 100644 --- a/lib/gitlab/github_import/representation/pull_request_review.rb +++ b/lib/gitlab/github_import/representation/pull_request_review.rb @@ -9,7 +9,7 @@ module Gitlab attr_reader :attributes - expose_attribute :author, :note, :review_type, :submitted_at, :merge_request_id, :review_id + expose_attribute :author, :note, :review_type, :submitted_at, :merge_request_id, :merge_request_iid, :review_id # Builds a PullRequestReview from a GitHub API response. # @@ -19,6 +19,7 @@ module Gitlab new( merge_request_id: review[:merge_request_id], + merge_request_iid: review[:merge_request_iid], author: user, note: review[:body], review_type: review[:state], @@ -49,8 +50,8 @@ module Gitlab def github_identifiers { - review_id: review_id, - merge_request_id: merge_request_id + merge_request_iid: merge_request_iid, + review_id: review_id } end end diff --git a/lib/gitlab/github_import/representation/pull_requests/review_requests.rb b/lib/gitlab/github_import/representation/pull_requests/review_requests.rb index 692004c4460..a6ec1d3178b 100644 --- a/lib/gitlab/github_import/representation/pull_requests/review_requests.rb +++ b/lib/gitlab/github_import/representation/pull_requests/review_requests.rb @@ -10,7 +10,7 @@ module Gitlab attr_reader :attributes - expose_attribute :merge_request_id, :users + expose_attribute :merge_request_id, :merge_request_iid, :users class << self # Builds a list of requested reviewers from a GitHub API response. @@ -24,6 +24,7 @@ module Gitlab new( merge_request_id: review_requests[:merge_request_id], + merge_request_iid: review_requests[:merge_request_iid], users: users ) end @@ -37,7 +38,10 @@ module Gitlab end def github_identifiers - { merge_request_id: merge_request_id } + { + merge_request_iid: merge_request_iid, + requested_reviewers: users.pluck(:login) # rubocop: disable CodeReuse/ActiveRecord + } end end end diff --git a/lib/gitlab/github_import/settings.rb b/lib/gitlab/github_import/settings.rb index 77288b9fb98..0b883de8ed0 100644 --- a/lib/gitlab/github_import/settings.rb +++ b/lib/gitlab/github_import/settings.rb @@ -6,6 +6,7 @@ module Gitlab OPTIONAL_STAGES = { single_endpoint_issue_events_import: { label: 'Import issue and pull request events', + selected: false, details: <<-TEXT.split("\n").map(&:strip).join(' ') For example, opened or closed, renamed, and labeled or unlabeled. Time required to import these events depends on how many issues or pull requests your project has. @@ -13,17 +14,27 @@ module Gitlab }, single_endpoint_notes_import: { label: 'Use alternative comments import method', + selected: false, details: <<-TEXT.split("\n").map(&:strip).join(' ') The default method can skip some comments in large projects because of limitations of the GitHub API. TEXT }, attachments_import: { - label: 'Import Markdown attachments', + label: 'Import Markdown attachments (links)', + selected: false, details: <<-TEXT.split("\n").map(&:strip).join(' ') - Import Markdown attachments from repository comments, release posts, issue descriptions, + Import Markdown attachments (links) from repository comments, release posts, issue descriptions, and pull request descriptions. These can include images, text, or binary attachments. If not imported, links in Markdown to attachments break after you remove the attachments from GitHub. TEXT + }, + collaborators_import: { + label: 'Import collaborators', + selected: true, + details: <<-TEXT.split("\n").map(&:strip).join(' ') + Import direct repository collaborators who are not outside collaborators. + Imported collaborators who aren't members of the group you imported the project into consume seats on your GitLab instance. + TEXT } }.freeze @@ -32,6 +43,7 @@ module Gitlab { name: stage_name.to_s, label: s_(format("GitHubImport|%{text}", text: data[:label])), + selected: data[:selected], details: s_(format("GitHubImport|%{text}", text: data[:details])) } end diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb index b8751def08f..dd71edbd205 100644 --- a/lib/gitlab/github_import/user_finder.rb +++ b/lib/gitlab/github_import/user_finder.rb @@ -28,6 +28,9 @@ module Gitlab EMAIL_FOR_USERNAME_CACHE_KEY = 'github-import/user-finder/email-for-username/%s' + # The base cache key to use for caching inexistence of GitHub usernames. + INEXISTENCE_OF_GITHUB_USERNAME_CACHE_KEY = 'github-import/user-finder/inexistence-of-username/%s' + # project - An instance of `Project` # client - An instance of `Gitlab::GithubImport::Client` def initialize(project, client) @@ -113,12 +116,15 @@ module Gitlab cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username email = Gitlab::Cache::Import::Caching.read(cache_key) - unless email + if email.blank? && !github_username_inexists?(username) user = client.user(username) email = Gitlab::Cache::Import::Caching.write(cache_key, user[:email], timeout: timeout(user[:email])) if user end email + rescue ::Octokit::NotFound + cache_github_username_inexistence(username) + nil end def cached_id_for_github_id(id) @@ -190,6 +196,18 @@ module Gitlab Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT end end + + def github_username_inexists?(username) + cache_key = INEXISTENCE_OF_GITHUB_USERNAME_CACHE_KEY % username + + Gitlab::Cache::Import::Caching.read(cache_key) == 'true' + end + + def cache_github_username_inexistence(username) + cache_key = INEXISTENCE_OF_GITHUB_USERNAME_CACHE_KEY % username + + Gitlab::Cache::Import::Caching.write(cache_key, true) + end end end end |