diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2022-09-20 02:18:09 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2022-09-20 02:18:09 +0300 |
commit | 6ed4ec3e0b1340f96b7c043ef51d1b33bbe85fde (patch) | |
tree | dc4d20fe6064752c0bd323187252c77e0a89144b /lib/gitlab/github_import | |
parent | 9868dae7fc0655bd7ce4a6887d4e6d487690eeed (diff) |
Add latest changes from gitlab-org/gitlab@15-4-stable-eev15.4.0-rc42
Diffstat (limited to 'lib/gitlab/github_import')
27 files changed, 575 insertions, 59 deletions
diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb new file mode 100644 index 00000000000..b71d5f753f2 --- /dev/null +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class AttachmentsDownloader + include ::Gitlab::ImportExport::CommandLineUtil + include ::BulkImports::FileDownloads::FilenameFetch + include ::BulkImports::FileDownloads::Validations + + DownloadError = Class.new(StandardError) + + FILENAME_SIZE_LIMIT = 255 # chars before the extension + DEFAULT_FILE_SIZE_LIMIT = 25.megabytes + TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze + + attr_reader :file_url, :filename, :file_size_limit + + def initialize(file_url, file_size_limit: DEFAULT_FILE_SIZE_LIMIT) + @file_url = file_url + @file_size_limit = file_size_limit + + filename = URI(file_url).path.split('/').last + @filename = ensure_filename_size(filename) + end + + def perform + validate_content_length + validate_filepath + + file = download + validate_symlink + file + end + + def delete + FileUtils.rm_rf File.dirname(filepath) + end + + private + + def raise_error(message) + raise DownloadError, message + end + + def response_headers + @response_headers ||= + Gitlab::HTTP.perform_request(Net::HTTP::Head, file_url, {}).headers + end + + def download + file = File.open(filepath, 'wb') + Gitlab::HTTP.perform_request(Net::HTTP::Get, file_url, stream_body: true) { |batch| file.write(batch) } + file + end + + def filepath + @filepath ||= begin + dir = File.join(TMP_DIR, SecureRandom.uuid) + mkdir_p dir + File.join(dir, filename) + end + end + end + end +end diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 11a41149274..6cff15a204f 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -76,11 +76,15 @@ module Gitlab each_object(:pull_request_reviews, repo_name, iid) end + def repos(options = {}) + octokit.repos(nil, options).map(&:to_h) + end + # Returns the details of a GitHub repository. # # name - The path (in the form `owner/repository`) of the repository. def repository(name) - with_rate_limit { octokit.repo(name) } + with_rate_limit { octokit.repo(name).to_h } end def pull_request(repo_name, iid) @@ -99,6 +103,14 @@ module Gitlab each_object(:releases, *args) end + def branches(*args) + each_object(:branches, *args) + end + + def branch_protection(repo_name, branch_name) + with_rate_limit { octokit.branch_protection(repo_name, branch_name) } + end + # Fetches data from the GitHub API and yields a Page object for every page # of data, without loading all of them into memory. # @@ -167,7 +179,7 @@ module Gitlab end def search_repos_by_name(name, options = {}) - with_retry { octokit.search_repositories(search_query(str: name, type: :name), options) } + with_retry { octokit.search_repositories(search_query(str: name, type: :name), options).to_h } end def search_query(str:, type:, include_collaborations: true, include_orgs: true) diff --git a/lib/gitlab/github_import/importer/events/base_importer.rb b/lib/gitlab/github_import/importer/events/base_importer.rb index 9ab1d916d33..8218acf2bfb 100644 --- a/lib/gitlab/github_import/importer/events/base_importer.rb +++ b/lib/gitlab/github_import/importer/events/base_importer.rb @@ -29,6 +29,19 @@ module Gitlab def issuable_db_id(object) IssuableFinder.new(project, object).database_id end + + def issuable_type(issue_event) + merge_request_event?(issue_event) ? MergeRequest.name : Issue.name + end + + def merge_request_event?(issue_event) + issue_event.issuable_type == MergeRequest.name + end + + def resource_event_belongs_to(issue_event) + belongs_to_key = merge_request_event?(issue_event) ? :merge_request_id : :issue_id + { belongs_to_key => issuable_db_id(issue_event) } + end end end end diff --git a/lib/gitlab/github_import/importer/events/changed_assignee.rb b/lib/gitlab/github_import/importer/events/changed_assignee.rb index c8f6335e4a8..b75d41f40de 100644 --- a/lib/gitlab/github_import/importer/events/changed_assignee.rb +++ b/lib/gitlab/github_import/importer/events/changed_assignee.rb @@ -7,22 +7,22 @@ module Gitlab class ChangedAssignee < BaseImporter def execute(issue_event) assignee_id = author_id(issue_event, author_key: :assignee) - assigner_id = author_id(issue_event, author_key: :assigner) + author_id = author_id(issue_event, author_key: :actor) - note_body = parse_body(issue_event, assigner_id, assignee_id) + note_body = parse_body(issue_event, assignee_id) - create_note(issue_event, note_body, assigner_id) + create_note(issue_event, note_body, author_id) end private - def create_note(issue_event, note_body, assigner_id) + def create_note(issue_event, note_body, author_id) Note.create!( system: true, - noteable_type: Issue.name, + noteable_type: issuable_type(issue_event), noteable_id: issuable_db_id(issue_event), project: project, - author_id: assigner_id, + author_id: author_id, note: note_body, system_note_metadata: SystemNoteMetadata.new( { @@ -36,12 +36,14 @@ module Gitlab ) end - def parse_body(issue_event, assigner_id, assignee_id) + def parse_body(issue_event, assignee_id) + assignee = User.find(assignee_id).to_reference + Gitlab::I18n.with_default_locale do if issue_event.event == "unassigned" - "unassigned #{User.find(assigner_id).to_reference}" + "unassigned #{assignee}" else - "assigned to #{User.find(assignee_id).to_reference}" + "assigned to #{assignee}" end end end diff --git a/lib/gitlab/github_import/importer/events/changed_label.rb b/lib/gitlab/github_import/importer/events/changed_label.rb index 818a9202745..83130d18db9 100644 --- a/lib/gitlab/github_import/importer/events/changed_label.rb +++ b/lib/gitlab/github_import/importer/events/changed_label.rb @@ -12,13 +12,14 @@ module Gitlab private def create_event(issue_event) - ResourceLabelEvent.create!( - issue_id: issuable_db_id(issue_event), + attrs = { user_id: author_id(issue_event), label_id: label_finder.id_for(issue_event.label_title), action: action(issue_event.event), created_at: issue_event.created_at - ) + }.merge(resource_event_belongs_to(issue_event)) + + ResourceLabelEvent.create!(attrs) end def label_finder diff --git a/lib/gitlab/github_import/importer/events/changed_milestone.rb b/lib/gitlab/github_import/importer/events/changed_milestone.rb index 3164c041dc3..39b92d88b58 100644 --- a/lib/gitlab/github_import/importer/events/changed_milestone.rb +++ b/lib/gitlab/github_import/importer/events/changed_milestone.rb @@ -17,14 +17,15 @@ module Gitlab private def create_event(issue_event) - ResourceMilestoneEvent.create!( - issue_id: issuable_db_id(issue_event), + attrs = { user_id: author_id(issue_event), created_at: issue_event.created_at, milestone_id: project.milestones.find_by_title(issue_event.milestone_title)&.id, action: action(issue_event.event), state: DEFAULT_STATE - ) + }.merge(resource_event_belongs_to(issue_event)) + + ResourceMilestoneEvent.create!(attrs) end def action(event_type) diff --git a/lib/gitlab/github_import/importer/events/changed_reviewer.rb b/lib/gitlab/github_import/importer/events/changed_reviewer.rb new file mode 100644 index 00000000000..17b1fa4ab45 --- /dev/null +++ b/lib/gitlab/github_import/importer/events/changed_reviewer.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + module Events + class ChangedReviewer < BaseImporter + def execute(issue_event) + requested_reviewer_id = author_id(issue_event, author_key: :requested_reviewer) + review_requester_id = author_id(issue_event, author_key: :review_requester) + + note_body = parse_body(issue_event, requested_reviewer_id) + + create_note(issue_event, note_body, review_requester_id) + end + + private + + def create_note(issue_event, note_body, review_requester_id) + Note.create!( + system: true, + noteable_type: issuable_type(issue_event), + noteable_id: issuable_db_id(issue_event), + project: project, + author_id: review_requester_id, + note: note_body, + system_note_metadata: SystemNoteMetadata.new( + { + action: 'reviewer', + created_at: issue_event.created_at, + updated_at: issue_event.created_at + } + ), + created_at: issue_event.created_at, + updated_at: issue_event.created_at + ) + end + + def parse_body(issue_event, requested_reviewer_id) + requested_reviewer = User.find(requested_reviewer_id).to_reference + + if issue_event.event == 'review_request_removed' + "#{SystemNotes::IssuablesService.issuable_events[:review_request_removed]}" \ + " #{requested_reviewer}" + else + "#{SystemNotes::IssuablesService.issuable_events[:review_requested]}" \ + " #{requested_reviewer}" + end + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/events/closed.rb b/lib/gitlab/github_import/importer/events/closed.rb index ca8730d0f27..58d9dbf826c 100644 --- a/lib/gitlab/github_import/importer/events/closed.rb +++ b/lib/gitlab/github_import/importer/events/closed.rb @@ -17,7 +17,7 @@ module Gitlab project_id: project.id, author_id: author_id(issue_event), action: 'closed', - target_type: Issue.name, + target_type: issuable_type(issue_event), target_id: issuable_db_id(issue_event), created_at: issue_event.created_at, updated_at: issue_event.created_at @@ -25,15 +25,16 @@ module Gitlab end def create_state_event(issue_event) - ResourceStateEvent.create!( + attrs = { user_id: author_id(issue_event), - issue_id: issuable_db_id(issue_event), source_commit: issue_event.commit_id, state: 'closed', close_after_error_tracking_resolve: false, close_auto_resolve_prometheus_alert: false, created_at: issue_event.created_at - ) + }.merge(resource_event_belongs_to(issue_event)) + + ResourceStateEvent.create!(attrs) end end end diff --git a/lib/gitlab/github_import/importer/events/cross_referenced.rb b/lib/gitlab/github_import/importer/events/cross_referenced.rb index 89fc1bdeb09..b56ae186d3c 100644 --- a/lib/gitlab/github_import/importer/events/cross_referenced.rb +++ b/lib/gitlab/github_import/importer/events/cross_referenced.rb @@ -33,7 +33,7 @@ module Gitlab def create_note(issue_event, note_body, user_id) Note.create!( system: true, - noteable_type: Issue.name, + noteable_type: issuable_type(issue_event), noteable_id: issuable_db_id(issue_event), project: project, author_id: user_id, diff --git a/lib/gitlab/github_import/importer/events/renamed.rb b/lib/gitlab/github_import/importer/events/renamed.rb index 96d112b04c6..fb9e08116ba 100644 --- a/lib/gitlab/github_import/importer/events/renamed.rb +++ b/lib/gitlab/github_import/importer/events/renamed.rb @@ -14,7 +14,7 @@ module Gitlab def note_params(issue_event) { noteable_id: issuable_db_id(issue_event), - noteable_type: Issue.name, + noteable_type: issuable_type(issue_event), project_id: project.id, author_id: author_id(issue_event), note: parse_body(issue_event), diff --git a/lib/gitlab/github_import/importer/events/reopened.rb b/lib/gitlab/github_import/importer/events/reopened.rb index b75344bf817..8abeba0777d 100644 --- a/lib/gitlab/github_import/importer/events/reopened.rb +++ b/lib/gitlab/github_import/importer/events/reopened.rb @@ -17,7 +17,7 @@ module Gitlab project_id: project.id, author_id: author_id(issue_event), action: 'reopened', - target_type: Issue.name, + target_type: issuable_type(issue_event), target_id: issuable_db_id(issue_event), created_at: issue_event.created_at, updated_at: issue_event.created_at @@ -25,12 +25,13 @@ module Gitlab end def create_state_event(issue_event) - ResourceStateEvent.create!( + attrs = { user_id: author_id(issue_event), - issue_id: issuable_db_id(issue_event), state: 'reopened', created_at: issue_event.created_at - ) + }.merge(resource_event_belongs_to(issue_event)) + + ResourceStateEvent.create!(attrs) end end end diff --git a/lib/gitlab/github_import/importer/issue_event_importer.rb b/lib/gitlab/github_import/importer/issue_event_importer.rb index ef456e56ee1..80749aae93c 100644 --- a/lib/gitlab/github_import/importer/issue_event_importer.rb +++ b/lib/gitlab/github_import/importer/issue_event_importer.rb @@ -15,11 +15,7 @@ module Gitlab @client = client end - # TODO: Add MergeRequest events support - # https://gitlab.com/groups/gitlab-org/-/epics/7673 def execute - return if issue_event.issuable_type == 'MergeRequest' - importer = event_importer_class(issue_event) if importer importer.new(project, client).execute(issue_event) @@ -49,6 +45,8 @@ module Gitlab Gitlab::GithubImport::Importer::Events::CrossReferenced when 'assigned', 'unassigned' Gitlab::GithubImport::Importer::Events::ChangedAssignee + when 'review_requested', 'review_request_removed' + Gitlab::GithubImport::Importer::Events::ChangedReviewer end end end diff --git a/lib/gitlab/github_import/importer/protected_branch_importer.rb b/lib/gitlab/github_import/importer/protected_branch_importer.rb new file mode 100644 index 00000000000..16215fdce8e --- /dev/null +++ b/lib/gitlab/github_import/importer/protected_branch_importer.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class ProtectedBranchImporter + attr_reader :protected_branch, :project, :client + + # protected_branch - An instance of + # `Gitlab::GithubImport::Representation::ProtectedBranch`. + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(protected_branch, project, client) + @protected_branch = protected_branch + @project = project + @client = client + end + + def execute + # The creator of the project is always allowed to create protected + # branches, so we skip the authorization check in this service class. + ProtectedBranches::CreateService + .new(project, project.creator, params) + .execute(skip_authorization: true) + end + + private + + def params + { + name: protected_branch.id, + push_access_levels_attributes: [{ access_level: Gitlab::Access::MAINTAINER }], + merge_access_levels_attributes: [{ access_level: Gitlab::Access::MAINTAINER }], + allow_force_push: allow_force_push? + } + end + + def allow_force_push? + if ProtectedBranch.protected?(project, protected_branch.id) + ProtectedBranch.allow_force_push?(project, protected_branch.id) && protected_branch.allow_force_pushes + else + protected_branch.allow_force_pushes + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/protected_branches_importer.rb b/lib/gitlab/github_import/importer/protected_branches_importer.rb new file mode 100644 index 00000000000..b5be823d5ab --- /dev/null +++ b/lib/gitlab/github_import/importer/protected_branches_importer.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class ProtectedBranchesImporter + include ParallelScheduling + + # The method that will be called for traversing through all the objects to + # import, yielding them to the supplied block. + def each_object_to_import + repo = project.import_source + + protected_branches = client.branches(repo).select { |branch| branch.protection&.enabled } + protected_branches.each do |protected_branch| + object = client.branch_protection(repo, protected_branch.name) + next if object.nil? || already_imported?(object) + + yield object + + Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) + mark_as_imported(object) + end + end + + def importer_class + ProtectedBranchImporter + end + + def representation_class + Gitlab::GithubImport::Representation::ProtectedBranch + end + + def sidekiq_worker_class + ImportProtectedBranchWorker + end + + def object_type + :protected_branch + end + + def collection_method + :protected_branches + end + + def id_for_already_imported_cache(protected_branch) + protected_branch.name + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/release_attachments_importer.rb b/lib/gitlab/github_import/importer/release_attachments_importer.rb new file mode 100644 index 00000000000..6419851623c --- /dev/null +++ b/lib/gitlab/github_import/importer/release_attachments_importer.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class ReleaseAttachmentsImporter + attr_reader :release_db_id, :release_description, :project + + # release - An instance of `ReleaseAttachments`. + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + def initialize(release_attachments, project, _client = nil) + @release_db_id = release_attachments.release_db_id + @release_description = release_attachments.description + @project = project + end + + def execute + attachment_urls = MarkdownText.fetch_attachment_urls(release_description) + new_description = attachment_urls.reduce(release_description) do |description, url| + new_url = download_attachment(url) + description.gsub(url, new_url) + end + + Release.find(release_db_id).update_column(:description, new_description) + end + + private + + # in: github attachment markdown url + # out: gitlab attachment markdown url + def download_attachment(markdown_url) + url = extract_url_from_markdown(markdown_url) + name_prefix = extract_name_from_markdown(markdown_url) + + downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(url) + file = downloader.perform + uploader = UploadService.new(project, file, FileUploader).execute + "#{name_prefix}(#{uploader.to_h[:url]})" + ensure + downloader&.delete + end + + # in: "![image-icon](https://user-images.githubusercontent.com/..)" + # out: https://user-images.githubusercontent.com/.. + def extract_url_from_markdown(text) + text.match(%r{https://.*\)$}).to_a[0].chop + end + + # in: "![image-icon](https://user-images.githubusercontent.com/..)" + # out: ![image-icon] + def extract_name_from_markdown(text) + text.match(%r{^!?\[.*\]}).to_a[0] + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/releases_attachments_importer.rb b/lib/gitlab/github_import/importer/releases_attachments_importer.rb new file mode 100644 index 00000000000..7221c802d83 --- /dev/null +++ b/lib/gitlab/github_import/importer/releases_attachments_importer.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class ReleasesAttachmentsImporter + include ParallelScheduling + + BATCH_SIZE = 100 + + # The method that will be called for traversing through all the objects to + # import, yielding them to the supplied block. + def each_object_to_import + project.releases.select(:id, :description).each_batch(of: BATCH_SIZE, column: :id) do |batch| + batch.each do |release| + next if already_imported?(release) + + Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) + + yield release + + # We mark the object as imported immediately so we don't end up + # scheduling it multiple times. + mark_as_imported(release) + end + end + end + + def representation_class + Representation::ReleaseAttachments + end + + def importer_class + ReleaseAttachmentsImporter + end + + def sidekiq_worker_class + ImportReleaseAttachmentsWorker + end + + def collection_method + :release_attachments + end + + def object_type + :release_attachment + end + + def id_for_already_imported_cache(release) + release.id + end + + def object_representation(object) + representation_class.from_db_record(object) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/repository_importer.rb b/lib/gitlab/github_import/importer/repository_importer.rb index aba4729e9c8..708768a60cf 100644 --- a/lib/gitlab/github_import/importer/repository_importer.rb +++ b/lib/gitlab/github_import/importer/repository_importer.rb @@ -17,7 +17,7 @@ module Gitlab # Returns true if we should import the wiki for the project. # rubocop: disable CodeReuse/ActiveRecord def import_wiki? - client_repository&.has_wiki && + client_repository[:has_wiki] && !project.wiki_repository_exists? && Gitlab::GitalyClient::RemoteService.exists?(wiki_url) end @@ -86,7 +86,7 @@ module Gitlab private def default_branch - client_repository&.default_branch + client_repository[:default_branch] end def client_repository diff --git a/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb b/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb index 8e4015acbbc..8a9ddfc6ec0 100644 --- a/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb +++ b/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb @@ -7,7 +7,7 @@ module Gitlab include ParallelScheduling include SingleEndpointNotesImporting - PROCESSED_PAGE_CACHE_KEY = 'issues/%{issue_iid}/%{collection}' + PROCESSED_PAGE_CACHE_KEY = 'issues/%{issuable_iid}/%{collection}' BATCH_SIZE = 100 def initialize(project, client, parallel: true) @@ -27,12 +27,20 @@ module Gitlab Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) - associated.issue = { 'number' => parent_record.iid } + pull_request = parent_record.is_a? MergeRequest + associated.issue = { 'number' => parent_record.iid, 'pull_request' => pull_request } yield(associated) mark_as_imported(associated) end + # In Github Issues and MergeRequests uses the same API to get their events. + # Even more - they have commonly uniq iid + def each_associated_page(&block) + issues_collection.each_batch(of: BATCH_SIZE, column: :iid) { |batch| process_batch(batch, &block) } + merge_requests_collection.each_batch(of: BATCH_SIZE, column: :iid) { |batch| process_batch(batch, &block) } + end + def importer_class IssueEventImporter end @@ -53,16 +61,20 @@ module Gitlab :issue_timeline end - def parent_collection + def issues_collection project.issues.where.not(iid: already_imported_parents).select(:id, :iid) # rubocop: disable CodeReuse/ActiveRecord end + def merge_requests_collection + project.merge_requests.where.not(iid: already_imported_parents).select(:id, :iid) # rubocop: disable CodeReuse/ActiveRecord + end + def parent_imported_cache_key "github-importer/issues/#{collection_method}/already-imported/#{project.id}" end - def page_counter_id(issue) - PROCESSED_PAGE_CACHE_KEY % { issue_iid: issue.iid, collection: collection_method } + def page_counter_id(issuable) + PROCESSED_PAGE_CACHE_KEY % { issuable_iid: issuable.iid, collection: collection_method } end def id_for_already_imported_cache(event) @@ -74,10 +86,10 @@ module Gitlab end # Cross-referenced events on Github doesn't have id. - def compose_associated_id!(issue, event) + def compose_associated_id!(issuable, event) return if event.event != 'cross-referenced' - event.id = "cross-reference##{issue.id}-in-#{event.source.issue.id}" + event.id = "cross-reference##{issuable.iid}-in-#{event.source.issue.id}" end end end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 692016bd005..bf2856bc77f 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -1,5 +1,8 @@ # frozen_string_literal: true +# This class includes overriding Kernel#format method +# what makes impossible to use it here +# rubocop:disable Style/FormatString module Gitlab module GithubImport class MarkdownText @@ -8,6 +11,21 @@ module Gitlab ISSUE_REF_MATCHER = '%{github_url}/%{import_source}/issues' PULL_REF_MATCHER = '%{github_url}/%{import_source}/pull' + MEDIA_TYPES = %w[gif jpeg jpg mov mp4 png svg webm].freeze + DOC_TYPES = %w[ + csv docx fodg fodp fods fodt gz log md odf odg odp ods + odt pdf pptx tgz txt xls xlsx zip + ].freeze + ALL_TYPES = (MEDIA_TYPES + DOC_TYPES).freeze + + # On github.com we have base url for docs and CDN url for media. + # On github EE as far as we can know there is no CDN urls and media is placed on base url. + # To no escape the escaping symbol we use single quotes instead of double with interpolation. + # rubocop:disable Style/StringConcatenation + CDN_URL_MATCHER = '(!\[.+\]\(%{github_media_cdn}/\d+/(\w|-)+\.(' + MEDIA_TYPES.join('|') + ')\))' + BASE_URL_MATCHER = '(\[.+\]\(%{github_url}/.+/.+/files/\d+/.+\.(' + ALL_TYPES.join('|') + ')\))' + # rubocop:enable Style/StringConcatenation + class << self def format(*args) new(*args).to_s @@ -24,8 +42,20 @@ module Gitlab .gsub(pull_ref_matcher, url_helpers.project_merge_requests_url(project)) end + def fetch_attachment_urls(text) + cdn_url_matcher = CDN_URL_MATCHER % { github_media_cdn: Regexp.escape(github_media_cdn) } + doc_url_matcher = BASE_URL_MATCHER % { github_url: Regexp.escape(github_url) } + + text.scan(Regexp.new(cdn_url_matcher)).map(&:first) + + text.scan(Regexp.new(doc_url_matcher)).map(&:first) + end + private + def github_media_cdn + 'https://user-images.githubusercontent.com' + end + # Returns github domain without slash in the end def github_url oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} @@ -63,3 +93,4 @@ module Gitlab end end end +# rubocop:enable Style/FormatString diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb index a8c18c74d24..bf5046de36c 100644 --- a/lib/gitlab/github_import/parallel_scheduling.rb +++ b/lib/gitlab/github_import/parallel_scheduling.rb @@ -63,7 +63,7 @@ module Gitlab # Imports all the objects in sequence in the current thread. def sequential_import each_object_to_import do |object| - repr = representation_class.from_api_response(object, additional_object_data) + repr = object_representation(object) importer_class.new(repr, project, client).execute end @@ -83,7 +83,7 @@ module Gitlab import_arguments = [] each_object_to_import do |object| - repr = representation_class.from_api_response(object, additional_object_data) + repr = object_representation(object) import_arguments << [project.id, repr.to_hash, waiter.key] @@ -210,6 +210,10 @@ module Gitlab {} end + def object_representation(object) + representation_class.from_api_response(object, additional_object_data) + end + def info(project_id, extra = {}) Logger.info(log_attributes(project_id, extra)) end diff --git a/lib/gitlab/github_import/representation/expose_attribute.rb b/lib/gitlab/github_import/representation/expose_attribute.rb index d2438ee8094..84de4d4798d 100644 --- a/lib/gitlab/github_import/representation/expose_attribute.rb +++ b/lib/gitlab/github_import/representation/expose_attribute.rb @@ -20,6 +20,10 @@ module Gitlab end end end + + def [](key) + respond_to?(key.to_sym) ? attributes[key] : nil + end end end end diff --git a/lib/gitlab/github_import/representation/issue_event.rb b/lib/gitlab/github_import/representation/issue_event.rb index 67a5df73a97..89271a7dcd6 100644 --- a/lib/gitlab/github_import/representation/issue_event.rb +++ b/lib/gitlab/github_import/representation/issue_event.rb @@ -10,7 +10,8 @@ module Gitlab attr_reader :attributes expose_attribute :id, :actor, :event, :commit_id, :label_title, :old_title, :new_title, - :milestone_title, :issue, :source, :assignee, :assigner, :created_at + :milestone_title, :issue, :source, :assignee, :review_requester, + :requested_reviewer, :created_at # attributes - A Hash containing the event details. The keys of this # Hash (and any nested hashes) must be symbols. @@ -47,7 +48,8 @@ module Gitlab issue: event.issue&.to_h&.symbolize_keys, source: event.source, assignee: user_representation(event.assignee), - assigner: user_representation(event.assigner), + requested_reviewer: user_representation(event.requested_reviewer), + review_requester: user_representation(event.review_requester), created_at: event.created_at ) end @@ -57,7 +59,8 @@ module Gitlab hash = Representation.symbolize_hash(raw_hash) hash[:actor] = user_representation(hash[:actor], source: :hash) hash[:assignee] = user_representation(hash[:assignee], source: :hash) - hash[:assigner] = user_representation(hash[:assigner], source: :hash) + hash[:requested_reviewer] = user_representation(hash[:requested_reviewer], source: :hash) + hash[:review_requester] = user_representation(hash[:review_requester], source: :hash) new(hash) end diff --git a/lib/gitlab/github_import/representation/protected_branch.rb b/lib/gitlab/github_import/representation/protected_branch.rb new file mode 100644 index 00000000000..b80b7cf1076 --- /dev/null +++ b/lib/gitlab/github_import/representation/protected_branch.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class ProtectedBranch + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :id, :allow_force_pushes + + # Builds a Branch Protection info from a GitHub API response. + # Resource structure details: + # https://docs.github.com/en/rest/branches/branch-protection#get-branch-protection + # branch_protection - An instance of `Sawyer::Resource` containing the protection details. + def self.from_api_response(branch_protection, _additional_object_data = {}) + branch_name = branch_protection.url.match(%r{/branches/(\S{1,255})/protection$})[1] + + hash = { + id: branch_name, + allow_force_pushes: branch_protection.allow_force_pushes.enabled + } + + new(hash) + end + + # Builds a new Protection using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + new(Representation.symbolize_hash(raw_hash)) + end + + # attributes - A Hash containing the raw Protection details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + + def github_identifiers + { id: id } + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/release_attachments.rb b/lib/gitlab/github_import/representation/release_attachments.rb new file mode 100644 index 00000000000..fd272be2405 --- /dev/null +++ b/lib/gitlab/github_import/representation/release_attachments.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +# This class only partly represents Release record from DB and +# is used to connect ReleasesAttachmentsImporter with ReleaseAttachmentsImporter +# without modifying ObjectImporter a lot. +# Attachments are inside release's `description`. +module Gitlab + module GithubImport + module Representation + class ReleaseAttachments + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :release_db_id, :description + + # Builds a event from a GitHub API response. + # + # release - An instance of `Release` model. + def self.from_db_record(release) + new( + release_db_id: release.id, + description: release.description + ) + end + + def self.from_json_hash(raw_hash) + new Representation.symbolize_hash(raw_hash) + end + + # attributes - A Hash containing the event details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + + def github_identifiers + { db_id: release_db_id } + end + end + end + end +end diff --git a/lib/gitlab/github_import/sequential_importer.rb b/lib/gitlab/github_import/sequential_importer.rb index 6bc37337799..ab37bc92ee7 100644 --- a/lib/gitlab/github_import/sequential_importer.rb +++ b/lib/gitlab/github_import/sequential_importer.rb @@ -16,6 +16,7 @@ module Gitlab ].freeze PARALLEL_IMPORTERS = [ + Importer::ProtectedBranchesImporter, Importer::PullRequestsImporter, Importer::IssuesImporter, Importer::DiffNotesImporter, diff --git a/lib/gitlab/github_import/single_endpoint_notes_importing.rb b/lib/gitlab/github_import/single_endpoint_notes_importing.rb index 0a3559adde3..aea4059dfbc 100644 --- a/lib/gitlab/github_import/single_endpoint_notes_importing.rb +++ b/lib/gitlab/github_import/single_endpoint_notes_importing.rb @@ -63,23 +63,27 @@ module Gitlab mark_as_imported(associated) end - def each_associated_page + def each_associated_page(&block) parent_collection.each_batch(of: BATCH_SIZE, column: :iid) do |batch| - batch.each do |parent_record| - # The page counter needs to be scoped by parent_record to avoid skipping - # pages of notes from already imported parent_record. - page_counter = PageCounter.new(project, page_counter_id(parent_record)) - repo = project.import_source - options = collection_options.merge(page: page_counter.current) + process_batch(batch, &block) + end + end - client.each_page(collection_method, repo, parent_record.iid, options) do |page| - next unless page_counter.set(page.number) + def process_batch(batch) + batch.each do |parent_record| + # The page counter needs to be scoped by parent_record to avoid skipping + # pages of notes from already imported parent_record. + page_counter = PageCounter.new(project, page_counter_id(parent_record)) + repo = project.import_source + options = collection_options.merge(page: page_counter.current) - yield parent_record, page - end + client.each_page(collection_method, repo, parent_record.iid, options) do |page| + next unless page_counter.set(page.number) - mark_parent_imported(parent_record) + yield parent_record, page end + + mark_parent_imported(parent_record) end end diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb index 6d6a00d260d..1feb0d450f0 100644 --- a/lib/gitlab/github_import/user_finder.rb +++ b/lib/gitlab/github_import/user_finder.rb @@ -45,8 +45,10 @@ module Gitlab object&.actor when :assignee object&.assignee - when :assigner - object&.assigner + when :requested_reviewer + object&.requested_reviewer + when :review_requester + object&.review_requester else object&.author end |