diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2024-01-16 13:42:19 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2024-01-16 13:42:19 +0300 |
commit | 84d1bd786125c1c14a3ba5f63e38a4cc736a9027 (patch) | |
tree | f550fa965f507077e20dbb6d61a8269a99ef7107 /lib/gitlab/github_import | |
parent | 3a105e36e689f7b75482236712f1a47fd5a76814 (diff) |
Add latest changes from gitlab-org/gitlab@16-8-stable-eev16.8.0-rc42
Diffstat (limited to 'lib/gitlab/github_import')
23 files changed, 407 insertions, 80 deletions
diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index df9c6c8342d..e9192b97506 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -11,7 +11,7 @@ module Gitlab UnsupportedAttachmentError = Class.new(StandardError) FILENAME_SIZE_LIMIT = 255 # chars before the extension - DEFAULT_FILE_SIZE_LIMIT = 25.megabytes + DEFAULT_FILE_SIZE_LIMIT = Gitlab::CurrentSettings.max_attachment_size.megabytes TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze attr_reader :file_url, :filename, :file_size_limit, :options @@ -26,7 +26,6 @@ module Gitlab end def perform - validate_content_length validate_filepath download_url = get_assets_download_redirection_url @@ -46,11 +45,6 @@ module Gitlab raise DownloadError, message end - def response_headers - @response_headers ||= - Gitlab::HTTP.perform_request(Net::HTTP::Head, file_url, {}).headers - end - # Github /assets redirection link will redirect to aws which has its own authorization. # Keeping our bearer token will cause request rejection # eg. Only one auth mechanism allowed; only the X-Amz-Algorithm query parameter, @@ -78,7 +72,19 @@ module Gitlab def download_from(url) file = File.open(filepath, 'wb') - Gitlab::HTTP.perform_request(Net::HTTP::Get, url, stream_body: true) { |batch| file.write(batch) } + + Gitlab::HTTP.perform_request(Net::HTTP::Get, url, stream_body: true) do |chunk| + next if [301, 302, 303, 307, 308].include?(chunk.code) + + raise DownloadError, "Error downloading file from #{url}. Error code: #{chunk.code}" if chunk.code != 200 + + file.write(chunk) + validate_size!(file.size) + rescue Gitlab::GithubImport::AttachmentsDownloader::DownloadError + delete + raise + end + file end diff --git a/lib/gitlab/github_import/events_cache.rb b/lib/gitlab/github_import/events_cache.rb new file mode 100644 index 00000000000..0986ccfaed1 --- /dev/null +++ b/lib/gitlab/github_import/events_cache.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class EventsCache + MAX_NUMBER_OF_EVENTS = 100 + MAX_EVENT_SIZE = 100.kilobytes + + def initialize(project) + @project = project + end + + # Add issue event as JSON to the cache + # + # @param record [ActiveRecord::Model] Model that responds to :iid + # @param event [GitLab::GitHubImport::Representation::IssueEvent] + def add(record, issue_event) + json = issue_event.to_hash.to_json + + if json.bytesize > MAX_EVENT_SIZE + Logger.warn( + message: 'Event too large to cache', + project_id: project.id, + github_identifiers: issue_event.github_identifiers + ) + + return + end + + Gitlab::Cache::Import::Caching.list_add(events_cache_key(record), json, limit: MAX_NUMBER_OF_EVENTS) + end + + # Reads issue events from cache + # + # @param record [ActiveRecord::Model] Model that responds to :iid + # @retun [Array<GitLab::GitHubImport::Representation::IssueEvent>] List of issue events + def events(record) + events = Gitlab::Cache::Import::Caching.values_from_list(events_cache_key(record)).map do |event| + Representation::IssueEvent.from_json_hash(Gitlab::Json.parse(event)) + end + + events.sort_by(&:created_at) + end + + # Deletes the cache + # + # @param record [ActiveRecord::Model] Model that responds to :iid + def delete(record) + Gitlab::Cache::Import::Caching.del(events_cache_key(record)) + end + + private + + attr_reader :project + + def events_cache_key(record) + "github-importer/events/#{project.id}/#{record.class.name}/#{record.iid}" + end + end + end +end diff --git a/lib/gitlab/github_import/importer/attachments/base_importer.rb b/lib/gitlab/github_import/importer/attachments/base_importer.rb index eaff99aed43..844008f8087 100644 --- a/lib/gitlab/github_import/importer/attachments/base_importer.rb +++ b/lib/gitlab/github_import/importer/attachments/base_importer.rb @@ -16,9 +16,11 @@ module Gitlab batch.each do |record| next if already_imported?(record) - Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) + if has_attachments?(record) + Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) - yield record + yield record + end # We mark the object as imported immediately so we don't end up # scheduling it multiple times. @@ -48,6 +50,12 @@ module Gitlab def object_representation(object) representation_class.from_db_record(object) end + + def has_attachments?(object) + return true if Feature.disabled?(:github_importer_attachments, project, type: :gitlab_com_derisk) + + object_representation(object).has_attachments? + end end end end diff --git a/lib/gitlab/github_import/importer/events/base_importer.rb b/lib/gitlab/github_import/importer/events/base_importer.rb index 8218acf2bfb..1ebafec5afc 100644 --- a/lib/gitlab/github_import/importer/events/base_importer.rb +++ b/lib/gitlab/github_import/importer/events/base_importer.rb @@ -10,6 +10,7 @@ module Gitlab # client - An instance of `Gitlab::GithubImport::Client`. def initialize(project, client) @project = project + @client = client @user_finder = UserFinder.new(project, client) end @@ -20,7 +21,7 @@ module Gitlab private - attr_reader :project, :user_finder + attr_reader :project, :user_finder, :client def author_id(issue_event, author_key: :actor) user_finder.author_id_for(issue_event, author_key: author_key).first @@ -42,6 +43,10 @@ module Gitlab belongs_to_key = merge_request_event?(issue_event) ? :merge_request_id : :issue_id { belongs_to_key => issuable_db_id(issue_event) } end + + def import_settings + @import_settings ||= Gitlab::GithubImport::Settings.new(project) + end end end end diff --git a/lib/gitlab/github_import/importer/events/commented.rb b/lib/gitlab/github_import/importer/events/commented.rb new file mode 100644 index 00000000000..c9ebc31fa06 --- /dev/null +++ b/lib/gitlab/github_import/importer/events/commented.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + module Events + class Commented < BaseImporter + def execute(issue_event) + return true unless import_settings.extended_events? + + note = Representation::Note.from_json_hash( + noteable_id: issue_event.issuable_id, + noteable_type: issue_event.issuable_type, + author: issue_event.actor&.to_hash, + note: issue_event.body, + created_at: issue_event.created_at, + updated_at: issue_event.updated_at, + note_id: issue_event.id + ) + + NoteImporter.new(note, project, client).execute + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/events/merged.rb b/lib/gitlab/github_import/importer/events/merged.rb index 6189fa8f429..702ea7f1fd5 100644 --- a/lib/gitlab/github_import/importer/events/merged.rb +++ b/lib/gitlab/github_import/importer/events/merged.rb @@ -6,6 +6,8 @@ module Gitlab module Events class Merged < BaseImporter def execute(issue_event) + create_note(issue_event) if import_settings.extended_events? + create_event(issue_event) create_state_event(issue_event) end @@ -37,6 +39,17 @@ module Gitlab ResourceStateEvent.create!(attrs) end + + def create_note(issue_event) + pull_request = Representation::PullRequest.from_json_hash({ + merged_by: issue_event.actor&.to_hash, + merged_at: issue_event.created_at, + iid: issue_event.issuable_id, + state: :closed + }) + + PullRequests::MergedByImporter.new(pull_request, project, client).execute + end end end end diff --git a/lib/gitlab/github_import/importer/events/reviewed.rb b/lib/gitlab/github_import/importer/events/reviewed.rb new file mode 100644 index 00000000000..1c0e8a9e6e8 --- /dev/null +++ b/lib/gitlab/github_import/importer/events/reviewed.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + module Events + class Reviewed < BaseImporter + def execute(issue_event) + return true unless import_settings.extended_events? + + review = Representation::PullRequestReview.new( + merge_request_iid: issue_event.issuable_id, + author: issue_event.actor&.to_hash, + note: issue_event.body.to_s, + review_type: issue_event.state.upcase, # On timeline API, the state is in lower case + submitted_at: issue_event.submitted_at, + review_id: issue_event.id + ) + + PullRequests::ReviewImporter.new(review, project, client).execute({ add_reviewer: false }) + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/issue_event_importer.rb b/lib/gitlab/github_import/importer/issue_event_importer.rb index d20482eca6f..9f15e9a25d8 100644 --- a/lib/gitlab/github_import/importer/issue_event_importer.rb +++ b/lib/gitlab/github_import/importer/issue_event_importer.rb @@ -22,6 +22,17 @@ module Gitlab unlabeled ].freeze + EXTENDED_SUPPORTED_EVENTS = SUPPORTED_EVENTS + %w[ + commented + reviewed + ].freeze + + EVENT_COUNTER_MAP = { + 'commented' => 'note', + 'reviewed' => 'pull_request_review', + 'merged' => 'pull_request_merged_by' + }.freeze + # issue_event - An instance of `Gitlab::GithubImport::Representation::IssueEvent`. # project - An instance of `Project`. # client - An instance of `Gitlab::GithubImport::Client`. @@ -65,6 +76,10 @@ module Gitlab Gitlab::GithubImport::Importer::Events::ChangedReviewer when 'merged' Gitlab::GithubImport::Importer::Events::Merged + when 'commented' + Gitlab::GithubImport::Importer::Events::Commented + when 'reviewed' + Gitlab::GithubImport::Importer::Events::Reviewed end end end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 26472b0d468..36a256bbef5 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -16,10 +16,9 @@ module Gitlab end def execute - attachments = MarkdownText.fetch_attachments(note_text.text) - return if attachments.blank? + return unless note_text.has_attachments? - new_text = attachments.reduce(note_text.text) do |text, attachment| + new_text = note_text.attachments.reduce(note_text.text) do |text, attachment| new_url = gitlab_attachment_link(attachment) text.gsub(attachment.url, new_url) end diff --git a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb index 6df130eb6e8..384880651ef 100644 --- a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb @@ -14,10 +14,12 @@ module Gitlab @review = review @project = project @client = client - @merge_request = project.merge_requests.find_by_id(review.merge_request_id) + @merge_request = project.merge_requests.find_by_iid(review.merge_request_iid) end - def execute + def execute(options = {}) + options = { add_reviewer: true }.merge(options) + user_finder = GithubImport::UserFinder.new(project, client) gitlab_user_id = user_finder.user_id_for(review.author) @@ -25,7 +27,7 @@ module Gitlab if gitlab_user_id add_review_note!(gitlab_user_id) add_approval!(gitlab_user_id) - add_reviewer!(gitlab_user_id) + add_reviewer!(gitlab_user_id) if options[:add_reviewer] else add_complementary_review_note!(project.creator_id) end diff --git a/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb index 347423b0e21..62c9e6469d7 100644 --- a/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb @@ -72,7 +72,7 @@ module Gitlab merge_requests_to_import.find_each do |merge_request| # The page counter needs to be scoped by merge request to avoid skipping # pages of reviews from already imported merge requests. - page_counter = PageCounter.new(project, page_counter_id(merge_request)) + page_counter = Gitlab::Import::PageCounter.new(project, page_counter_id(merge_request)) repo = project.import_source options = collection_options.merge(page: page_counter.current) diff --git a/lib/gitlab/github_import/importer/replay_events_importer.rb b/lib/gitlab/github_import/importer/replay_events_importer.rb new file mode 100644 index 00000000000..83578cf7672 --- /dev/null +++ b/lib/gitlab/github_import/importer/replay_events_importer.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class ReplayEventsImporter + SUPPORTED_EVENTS = %w[review_request_removed review_requested].freeze + + # replay_event - An instance of `Gitlab::GithubImport::Representation::ReplayEvent`. + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(replay_event, project, client) + @project = project + @client = client + @replay_event = replay_event + end + + def execute + association = case replay_event.issuable_type + when 'MergeRequest' + project.merge_requests.find_by_iid(replay_event.issuable_iid) + end + + return unless association + + events_cache = EventsCache.new(project) + + handle_review_requests(association, events_cache.events(association)) + + events_cache.delete(association) + end + + private + + attr_reader :project, :client, :replay_event + + def handle_review_requests(association, events) + reviewers = {} + + events.each do |event| + case event.event + when 'review_requested' + reviewers[event.requested_reviewer.login] = event.requested_reviewer.to_hash if event.requested_reviewer + when 'review_request_removed' + reviewers[event.requested_reviewer.login] = nil if event.requested_reviewer + end + end + + representation = Representation::PullRequests::ReviewRequests.from_json_hash( + merge_request_id: association.id, + merge_request_iid: association.iid, + users: reviewers.values.compact + ) + + Importer::PullRequests::ReviewRequestImporter.new(representation, project, client).execute + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb b/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb index d7fa098a775..126a0b8fa4a 100644 --- a/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb +++ b/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb @@ -30,9 +30,11 @@ module Gitlab compose_associated_id!(parent_record, associated) - return if already_imported?(associated) || importer_class::SUPPORTED_EVENTS.exclude?(associated[:event]) + return if already_imported?(associated) || supported_events.exclude?(associated[:event]) - Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) + cache_event(parent_record, associated) + + increment_object_counter(associated[:event]) pull_request = parent_record.is_a? MergeRequest associated[:issue] = { number: parent_record.iid, pull_request: pull_request } @@ -64,6 +66,12 @@ module Gitlab :issue_event end + def increment_object_counter(event_name) + counter_type = importer_class::EVENT_COUNTER_MAP[event_name] if import_settings.extended_events? + counter_type ||= object_type + Gitlab::GithubImport::ObjectCounter.increment(project, counter_type, :fetched) + end + def collection_method :issue_timeline end @@ -98,6 +106,43 @@ module Gitlab event[:id] = "cross-reference##{issuable.iid}-in-#{event.dig(:source, :issue, :id)}" end + + def import_settings + @import_settings ||= Gitlab::GithubImport::Settings.new(project) + end + + def after_batch_processed(parent) + return unless import_settings.extended_events? + + events = events_cache.events(parent) + + return if events.empty? + + hash = Representation::ReplayEvent.new(issuable_type: parent.class.name.to_s, issuable_iid: parent.iid) + .to_hash.deep_stringify_keys + ReplayEventsWorker.perform_async(project.id, hash, job_waiter.key.to_s) + job_waiter.jobs_remaining = Gitlab::Cache::Import::Caching.increment(job_waiter_remaining_cache_key) + end + + def supported_events + return importer_class::EXTENDED_SUPPORTED_EVENTS if import_settings.extended_events? + + importer_class::SUPPORTED_EVENTS + end + + def cache_event(parent_record, associated) + return unless import_settings.extended_events? + + return if Importer::ReplayEventsImporter::SUPPORTED_EVENTS.exclude?(associated[:event]) + + representation = representation_class.from_api_response(associated) + + events_cache.add(parent_record, representation) + end + + def events_cache + @events_cache ||= EventsCache.new(project) + end end end end diff --git a/lib/gitlab/github_import/job_delay_calculator.rb b/lib/gitlab/github_import/job_delay_calculator.rb index 50cad1aae19..a456e198afd 100644 --- a/lib/gitlab/github_import/job_delay_calculator.rb +++ b/lib/gitlab/github_import/job_delay_calculator.rb @@ -15,9 +15,9 @@ module Gitlab private def calculate_job_delay(job_index) - multiplier = (job_index / parallel_import_batch[:size]) + multiplier = (job_index / parallel_import_batch[:size].to_f) - (multiplier * parallel_import_batch[:delay]).to_i + 1 + (multiplier * parallel_import_batch[:delay]) + 1 end end end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 8e9d6d8dd50..5880aa04358 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -41,6 +41,8 @@ module Gitlab def fetch_attachments(text) attachments = [] + return attachments if text.nil? + doc = CommonMarker.render_doc(text) doc.walk do |node| diff --git a/lib/gitlab/github_import/page_counter.rb b/lib/gitlab/github_import/page_counter.rb deleted file mode 100644 index c238ccb8932..00000000000 --- a/lib/gitlab/github_import/page_counter.rb +++ /dev/null @@ -1,35 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module GithubImport - # PageCounter can be used to keep track of the last imported page of a - # collection, allowing workers to resume where they left off in the event of - # an error. - class PageCounter - attr_reader :cache_key - - # The base cache key to use for storing the last page number. - CACHE_KEY = '%{import_type}/page-counter/%{object}/%{collection}' - - def initialize(object, collection, import_type = 'github-importer') - @cache_key = CACHE_KEY % { import_type: import_type, object: object.id, collection: collection } - end - - # Sets the page number to the given value. - # - # Returns true if the page number was overwritten, false otherwise. - def set(page) - Gitlab::Cache::Import::Caching.write_if_greater(cache_key, page) - end - - # Returns the current value from the cache. - def current - Gitlab::Cache::Import::Caching.read_integer(cache_key) || 1 - end - - def expire! - Gitlab::Cache::Import::Caching.expire(cache_key, 0) - end - end - end -end diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb index ce93b5203df..2286dcf767f 100644 --- a/lib/gitlab/github_import/parallel_scheduling.rb +++ b/lib/gitlab/github_import/parallel_scheduling.rb @@ -8,6 +8,8 @@ module Gitlab attr_reader :project, :client, :page_counter, :already_imported_cache_key, :job_waiter_cache_key, :job_waiter_remaining_cache_key + attr_accessor :job_started_at, :enqueued_job_counter + # The base cache key to use for tracking already imported objects. ALREADY_IMPORTED_CACHE_KEY = 'github-importer/already-imported/%{project}/%{collection}' @@ -25,7 +27,7 @@ module Gitlab @project = project @client = client @parallel = parallel - @page_counter = PageCounter.new(project, collection_method) + @page_counter = Gitlab::Import::PageCounter.new(project, collection_method) @already_imported_cache_key = format(ALREADY_IMPORTED_CACHE_KEY, project: project.id, collection: collection_method) @job_waiter_cache_key = format(JOB_WAITER_CACHE_KEY, project: project.id, collection: collection_method) @@ -91,14 +93,15 @@ module Gitlab end def spread_parallel_import - enqueued_job_counter = 0 + self.job_started_at = Time.current + self.enqueued_job_counter = 0 each_object_to_import do |object| repr = object_representation(object) - job_delay = calculate_job_delay(enqueued_job_counter) sidekiq_worker_class.perform_in(job_delay, project.id, repr.to_hash.deep_stringify_keys, job_waiter.key.to_s) - enqueued_job_counter += 1 + + self.enqueued_job_counter += 1 job_waiter.jobs_remaining = Gitlab::Cache::Import::Caching.increment(job_waiter_remaining_cache_key) end @@ -246,6 +249,14 @@ module Gitlab JobWaiter.new(jobs_remaining, key) end end + + def job_delay + runtime = Time.current - job_started_at + + delay = calculate_job_delay(enqueued_job_counter) - runtime + + delay > 0 ? delay : 1.0.second + end end end end diff --git a/lib/gitlab/github_import/representation/issue_event.rb b/lib/gitlab/github_import/representation/issue_event.rb index 30608112f85..fc3bc5a48ef 100644 --- a/lib/gitlab/github_import/representation/issue_event.rb +++ b/lib/gitlab/github_import/representation/issue_event.rb @@ -8,7 +8,8 @@ module Gitlab expose_attribute :id, :actor, :event, :commit_id, :label_title, :old_title, :new_title, :milestone_title, :issue, :source, :assignee, :review_requester, - :requested_reviewer, :created_at + :requested_reviewer, :created_at, :updated_at, :submitted_at, + :state, :body # attributes - A Hash containing the event details. The keys of this # Hash (and any nested hashes) must be symbols. @@ -51,7 +52,11 @@ module Gitlab assignee: user_representation(event[:assignee]), requested_reviewer: user_representation(event[:requested_reviewer]), review_requester: user_representation(event[:review_requester]), - created_at: event[:created_at] + created_at: event[:created_at], + updated_at: event[:updated_at], + submitted_at: event[:submitted_at], + state: event[:state], + body: event[:body] ) end diff --git a/lib/gitlab/github_import/representation/note_text.rb b/lib/gitlab/github_import/representation/note_text.rb index 43e18a923d6..79bef4ec363 100644 --- a/lib/gitlab/github_import/representation/note_text.rb +++ b/lib/gitlab/github_import/representation/note_text.rb @@ -55,6 +55,14 @@ module Gitlab }.merge(record_type_specific_attribute) end + def has_attachments? + attachments.present? + end + + def attachments + @attachments ||= MarkdownText.fetch_attachments(text) + end + private def record_type_specific_attribute diff --git a/lib/gitlab/github_import/representation/replay_event.rb b/lib/gitlab/github_import/representation/replay_event.rb new file mode 100644 index 00000000000..2d71c26abbb --- /dev/null +++ b/lib/gitlab/github_import/representation/replay_event.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class ReplayEvent + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :issuable_type, :issuable_iid + + def self.from_json_hash(raw_hash) + new Representation.symbolize_hash(raw_hash) + end + + def initialize(attributes) + @attributes = attributes + end + + def github_identifiers + { + issuable_type: issuable_type, + issuable_iid: issuable_iid + } + end + end + end + end +end diff --git a/lib/gitlab/github_import/settings.rb b/lib/gitlab/github_import/settings.rb index 3947ae3c63d..da5833df3a1 100644 --- a/lib/gitlab/github_import/settings.rb +++ b/lib/gitlab/github_import/settings.rb @@ -38,8 +38,13 @@ module Gitlab } }.freeze - def self.stages_array - OPTIONAL_STAGES.map do |stage_name, data| + def self.stages_array(current_user) + deprecated_options = %i[single_endpoint_issue_events_import] + + OPTIONAL_STAGES.filter_map do |stage_name, data| + next if deprecated_options.include?(stage_name) && + Feature.enabled?(:github_import_extended_events, current_user) + { name: stage_name.to_s, label: s_(format("GitHubImport|%{text}", text: data[:label])), @@ -61,7 +66,8 @@ module Gitlab import_data = project.build_or_assign_import_data( data: { optional_stages: optional_stages, - timeout_strategy: user_settings[:timeout_strategy] + timeout_strategy: user_settings[:timeout_strategy], + extended_events: user_settings[:extended_events] }, credentials: project.import_data&.credentials ) @@ -77,6 +83,10 @@ module Gitlab !enabled?(stage_name) end + def extended_events? + !!project.import_data&.data&.dig('extended_events') + end + private attr_reader :project diff --git a/lib/gitlab/github_import/single_endpoint_notes_importing.rb b/lib/gitlab/github_import/single_endpoint_notes_importing.rb index 3584288da57..d4d9bd47e63 100644 --- a/lib/gitlab/github_import/single_endpoint_notes_importing.rb +++ b/lib/gitlab/github_import/single_endpoint_notes_importing.rb @@ -75,7 +75,7 @@ module Gitlab batch.each do |parent_record| # The page counter needs to be scoped by parent_record to avoid skipping # pages of notes from already imported parent_record. - page_counter = PageCounter.new(project, page_counter_id(parent_record)) + page_counter = Gitlab::Import::PageCounter.new(project, page_counter_id(parent_record)) repo = project.import_source options = collection_options.merge(page: page_counter.current) @@ -85,6 +85,7 @@ module Gitlab yield parent_record, page end + after_batch_processed(parent_record) mark_parent_imported(parent_record) end end @@ -96,6 +97,8 @@ module Gitlab ) end + def after_batch_processed(_parent); end + def already_imported_parents Gitlab::Cache::Import::Caching.values_from_set(parent_imported_cache_key) end diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb index 4bf2d8a0aca..bec4c7fc4d4 100644 --- a/lib/gitlab/github_import/user_finder.rb +++ b/lib/gitlab/github_import/user_finder.rb @@ -12,21 +12,18 @@ module Gitlab # Lookups are cached even if no ID was found to remove the need for querying # the database when most queries are not going to return results anyway. class UserFinder + include Gitlab::ExclusiveLeaseHelpers + attr_reader :project, :client - # The base cache key to use for caching user IDs for a given GitHub user - # ID. + # The base cache key to use for caching user IDs for a given GitHub user ID. ID_CACHE_KEY = 'github-import/user-finder/user-id/%s' - # The base cache key to use for caching user IDs for a given GitHub email - # address. - ID_FOR_EMAIL_CACHE_KEY = - 'github-import/user-finder/id-for-email/%s' + # The base cache key to use for caching user IDs for a given GitHub email address. + ID_FOR_EMAIL_CACHE_KEY = 'github-import/user-finder/id-for-email/%s' - # The base cache key to use for caching the Email addresses of GitHub - # usernames. - EMAIL_FOR_USERNAME_CACHE_KEY = - 'github-import/user-finder/email-for-username/%s' + # The base cache key to use for caching the Email addresses of GitHub usernames. + EMAIL_FOR_USERNAME_CACHE_KEY = 'github-import/user-finder/email-for-username/%s' # The base cache key to use for caching the user ETAG response headers USERNAME_ETAG_CACHE_KEY = 'github-import/user-finder/user-etag/%s' @@ -218,6 +215,17 @@ module Gitlab private + def lease_key + "gitlab:github_import:user_finder:#{project.id}" + end + + # Retrieves the email associated with the given username from the cache. + # + # The return value can be an email, an empty string, or nil. + # + # If an empty string is returned, it indicates that the user's email was fetched but not set on GitHub. + # If nil is returned, it indicates that the user's email wasn't fetched or the cache has expired. + # If an email is returned, it means the user has a public email set, and it has been successfully cached. def read_email_from_cache(username) Gitlab::Cache::Import::Caching.read(email_cache_key(username)) end @@ -232,12 +240,27 @@ module Gitlab end def fetch_email_from_github(username, etag: nil) - log(EMAIL_API_CALL_LOGGING_MESSAGE[etag.present?], username: username) - user = client.user(username, { headers: { 'If-None-Match' => etag }.compact }) + in_lock(lease_key, ttl: 3.minutes, sleep_sec: 1.second, retries: 30) do |retried| + # when retried, check the cache again as the other process that had the lease may have fetched the email + if retried + email = read_email_from_cache(username) - user[:email] || '' if user + next email if email.present? + end + + log(EMAIL_API_CALL_LOGGING_MESSAGE[etag.present?], username: username) + + # Only make a rate-limited API call if the ETAG is not available }) + user = client.user(username, { headers: { 'If-None-Match' => etag }.compact }) + user[:email] || '' if user + end end + # Caches the email associated to the username + # + # An empty email is cached when the user email isn't set on GitHub. + # This is done to prevent UserFinder from fetching the user's email again when the user's email isn't set on + # GitHub def cache_email!(username, email) return unless email @@ -245,6 +268,8 @@ module Gitlab end def cache_etag!(username) + return unless client.octokit.last_response + etag = client.octokit.last_response.headers[:etag] Gitlab::Cache::Import::Caching.write(etag_cache_key(username), etag) end |