Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2024-01-16 13:42:19 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2024-01-16 13:42:19 +0300
commit84d1bd786125c1c14a3ba5f63e38a4cc736a9027 (patch)
treef550fa965f507077e20dbb6d61a8269a99ef7107 /lib/gitlab/github_import
parent3a105e36e689f7b75482236712f1a47fd5a76814 (diff)
Add latest changes from gitlab-org/gitlab@16-8-stable-eev16.8.0-rc42
Diffstat (limited to 'lib/gitlab/github_import')
-rw-r--r--lib/gitlab/github_import/attachments_downloader.rb22
-rw-r--r--lib/gitlab/github_import/events_cache.rb61
-rw-r--r--lib/gitlab/github_import/importer/attachments/base_importer.rb12
-rw-r--r--lib/gitlab/github_import/importer/events/base_importer.rb7
-rw-r--r--lib/gitlab/github_import/importer/events/commented.rb27
-rw-r--r--lib/gitlab/github_import/importer/events/merged.rb13
-rw-r--r--lib/gitlab/github_import/importer/events/reviewed.rb26
-rw-r--r--lib/gitlab/github_import/importer/issue_event_importer.rb15
-rw-r--r--lib/gitlab/github_import/importer/note_attachments_importer.rb5
-rw-r--r--lib/gitlab/github_import/importer/pull_requests/review_importer.rb8
-rw-r--r--lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb2
-rw-r--r--lib/gitlab/github_import/importer/replay_events_importer.rb60
-rw-r--r--lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb49
-rw-r--r--lib/gitlab/github_import/job_delay_calculator.rb4
-rw-r--r--lib/gitlab/github_import/markdown_text.rb2
-rw-r--r--lib/gitlab/github_import/page_counter.rb35
-rw-r--r--lib/gitlab/github_import/parallel_scheduling.rb19
-rw-r--r--lib/gitlab/github_import/representation/issue_event.rb9
-rw-r--r--lib/gitlab/github_import/representation/note_text.rb8
-rw-r--r--lib/gitlab/github_import/representation/replay_event.rb31
-rw-r--r--lib/gitlab/github_import/settings.rb16
-rw-r--r--lib/gitlab/github_import/single_endpoint_notes_importing.rb5
-rw-r--r--lib/gitlab/github_import/user_finder.rb51
23 files changed, 407 insertions, 80 deletions
diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb
index df9c6c8342d..e9192b97506 100644
--- a/lib/gitlab/github_import/attachments_downloader.rb
+++ b/lib/gitlab/github_import/attachments_downloader.rb
@@ -11,7 +11,7 @@ module Gitlab
UnsupportedAttachmentError = Class.new(StandardError)
FILENAME_SIZE_LIMIT = 255 # chars before the extension
- DEFAULT_FILE_SIZE_LIMIT = 25.megabytes
+ DEFAULT_FILE_SIZE_LIMIT = Gitlab::CurrentSettings.max_attachment_size.megabytes
TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze
attr_reader :file_url, :filename, :file_size_limit, :options
@@ -26,7 +26,6 @@ module Gitlab
end
def perform
- validate_content_length
validate_filepath
download_url = get_assets_download_redirection_url
@@ -46,11 +45,6 @@ module Gitlab
raise DownloadError, message
end
- def response_headers
- @response_headers ||=
- Gitlab::HTTP.perform_request(Net::HTTP::Head, file_url, {}).headers
- end
-
# Github /assets redirection link will redirect to aws which has its own authorization.
# Keeping our bearer token will cause request rejection
# eg. Only one auth mechanism allowed; only the X-Amz-Algorithm query parameter,
@@ -78,7 +72,19 @@ module Gitlab
def download_from(url)
file = File.open(filepath, 'wb')
- Gitlab::HTTP.perform_request(Net::HTTP::Get, url, stream_body: true) { |batch| file.write(batch) }
+
+ Gitlab::HTTP.perform_request(Net::HTTP::Get, url, stream_body: true) do |chunk|
+ next if [301, 302, 303, 307, 308].include?(chunk.code)
+
+ raise DownloadError, "Error downloading file from #{url}. Error code: #{chunk.code}" if chunk.code != 200
+
+ file.write(chunk)
+ validate_size!(file.size)
+ rescue Gitlab::GithubImport::AttachmentsDownloader::DownloadError
+ delete
+ raise
+ end
+
file
end
diff --git a/lib/gitlab/github_import/events_cache.rb b/lib/gitlab/github_import/events_cache.rb
new file mode 100644
index 00000000000..0986ccfaed1
--- /dev/null
+++ b/lib/gitlab/github_import/events_cache.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module GithubImport
+ class EventsCache
+ MAX_NUMBER_OF_EVENTS = 100
+ MAX_EVENT_SIZE = 100.kilobytes
+
+ def initialize(project)
+ @project = project
+ end
+
+ # Add issue event as JSON to the cache
+ #
+ # @param record [ActiveRecord::Model] Model that responds to :iid
+ # @param event [GitLab::GitHubImport::Representation::IssueEvent]
+ def add(record, issue_event)
+ json = issue_event.to_hash.to_json
+
+ if json.bytesize > MAX_EVENT_SIZE
+ Logger.warn(
+ message: 'Event too large to cache',
+ project_id: project.id,
+ github_identifiers: issue_event.github_identifiers
+ )
+
+ return
+ end
+
+ Gitlab::Cache::Import::Caching.list_add(events_cache_key(record), json, limit: MAX_NUMBER_OF_EVENTS)
+ end
+
+ # Reads issue events from cache
+ #
+ # @param record [ActiveRecord::Model] Model that responds to :iid
+ # @retun [Array<GitLab::GitHubImport::Representation::IssueEvent>] List of issue events
+ def events(record)
+ events = Gitlab::Cache::Import::Caching.values_from_list(events_cache_key(record)).map do |event|
+ Representation::IssueEvent.from_json_hash(Gitlab::Json.parse(event))
+ end
+
+ events.sort_by(&:created_at)
+ end
+
+ # Deletes the cache
+ #
+ # @param record [ActiveRecord::Model] Model that responds to :iid
+ def delete(record)
+ Gitlab::Cache::Import::Caching.del(events_cache_key(record))
+ end
+
+ private
+
+ attr_reader :project
+
+ def events_cache_key(record)
+ "github-importer/events/#{project.id}/#{record.class.name}/#{record.iid}"
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/importer/attachments/base_importer.rb b/lib/gitlab/github_import/importer/attachments/base_importer.rb
index eaff99aed43..844008f8087 100644
--- a/lib/gitlab/github_import/importer/attachments/base_importer.rb
+++ b/lib/gitlab/github_import/importer/attachments/base_importer.rb
@@ -16,9 +16,11 @@ module Gitlab
batch.each do |record|
next if already_imported?(record)
- Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
+ if has_attachments?(record)
+ Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
- yield record
+ yield record
+ end
# We mark the object as imported immediately so we don't end up
# scheduling it multiple times.
@@ -48,6 +50,12 @@ module Gitlab
def object_representation(object)
representation_class.from_db_record(object)
end
+
+ def has_attachments?(object)
+ return true if Feature.disabled?(:github_importer_attachments, project, type: :gitlab_com_derisk)
+
+ object_representation(object).has_attachments?
+ end
end
end
end
diff --git a/lib/gitlab/github_import/importer/events/base_importer.rb b/lib/gitlab/github_import/importer/events/base_importer.rb
index 8218acf2bfb..1ebafec5afc 100644
--- a/lib/gitlab/github_import/importer/events/base_importer.rb
+++ b/lib/gitlab/github_import/importer/events/base_importer.rb
@@ -10,6 +10,7 @@ module Gitlab
# client - An instance of `Gitlab::GithubImport::Client`.
def initialize(project, client)
@project = project
+ @client = client
@user_finder = UserFinder.new(project, client)
end
@@ -20,7 +21,7 @@ module Gitlab
private
- attr_reader :project, :user_finder
+ attr_reader :project, :user_finder, :client
def author_id(issue_event, author_key: :actor)
user_finder.author_id_for(issue_event, author_key: author_key).first
@@ -42,6 +43,10 @@ module Gitlab
belongs_to_key = merge_request_event?(issue_event) ? :merge_request_id : :issue_id
{ belongs_to_key => issuable_db_id(issue_event) }
end
+
+ def import_settings
+ @import_settings ||= Gitlab::GithubImport::Settings.new(project)
+ end
end
end
end
diff --git a/lib/gitlab/github_import/importer/events/commented.rb b/lib/gitlab/github_import/importer/events/commented.rb
new file mode 100644
index 00000000000..c9ebc31fa06
--- /dev/null
+++ b/lib/gitlab/github_import/importer/events/commented.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module GithubImport
+ module Importer
+ module Events
+ class Commented < BaseImporter
+ def execute(issue_event)
+ return true unless import_settings.extended_events?
+
+ note = Representation::Note.from_json_hash(
+ noteable_id: issue_event.issuable_id,
+ noteable_type: issue_event.issuable_type,
+ author: issue_event.actor&.to_hash,
+ note: issue_event.body,
+ created_at: issue_event.created_at,
+ updated_at: issue_event.updated_at,
+ note_id: issue_event.id
+ )
+
+ NoteImporter.new(note, project, client).execute
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/importer/events/merged.rb b/lib/gitlab/github_import/importer/events/merged.rb
index 6189fa8f429..702ea7f1fd5 100644
--- a/lib/gitlab/github_import/importer/events/merged.rb
+++ b/lib/gitlab/github_import/importer/events/merged.rb
@@ -6,6 +6,8 @@ module Gitlab
module Events
class Merged < BaseImporter
def execute(issue_event)
+ create_note(issue_event) if import_settings.extended_events?
+
create_event(issue_event)
create_state_event(issue_event)
end
@@ -37,6 +39,17 @@ module Gitlab
ResourceStateEvent.create!(attrs)
end
+
+ def create_note(issue_event)
+ pull_request = Representation::PullRequest.from_json_hash({
+ merged_by: issue_event.actor&.to_hash,
+ merged_at: issue_event.created_at,
+ iid: issue_event.issuable_id,
+ state: :closed
+ })
+
+ PullRequests::MergedByImporter.new(pull_request, project, client).execute
+ end
end
end
end
diff --git a/lib/gitlab/github_import/importer/events/reviewed.rb b/lib/gitlab/github_import/importer/events/reviewed.rb
new file mode 100644
index 00000000000..1c0e8a9e6e8
--- /dev/null
+++ b/lib/gitlab/github_import/importer/events/reviewed.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module GithubImport
+ module Importer
+ module Events
+ class Reviewed < BaseImporter
+ def execute(issue_event)
+ return true unless import_settings.extended_events?
+
+ review = Representation::PullRequestReview.new(
+ merge_request_iid: issue_event.issuable_id,
+ author: issue_event.actor&.to_hash,
+ note: issue_event.body.to_s,
+ review_type: issue_event.state.upcase, # On timeline API, the state is in lower case
+ submitted_at: issue_event.submitted_at,
+ review_id: issue_event.id
+ )
+
+ PullRequests::ReviewImporter.new(review, project, client).execute({ add_reviewer: false })
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/importer/issue_event_importer.rb b/lib/gitlab/github_import/importer/issue_event_importer.rb
index d20482eca6f..9f15e9a25d8 100644
--- a/lib/gitlab/github_import/importer/issue_event_importer.rb
+++ b/lib/gitlab/github_import/importer/issue_event_importer.rb
@@ -22,6 +22,17 @@ module Gitlab
unlabeled
].freeze
+ EXTENDED_SUPPORTED_EVENTS = SUPPORTED_EVENTS + %w[
+ commented
+ reviewed
+ ].freeze
+
+ EVENT_COUNTER_MAP = {
+ 'commented' => 'note',
+ 'reviewed' => 'pull_request_review',
+ 'merged' => 'pull_request_merged_by'
+ }.freeze
+
# issue_event - An instance of `Gitlab::GithubImport::Representation::IssueEvent`.
# project - An instance of `Project`.
# client - An instance of `Gitlab::GithubImport::Client`.
@@ -65,6 +76,10 @@ module Gitlab
Gitlab::GithubImport::Importer::Events::ChangedReviewer
when 'merged'
Gitlab::GithubImport::Importer::Events::Merged
+ when 'commented'
+ Gitlab::GithubImport::Importer::Events::Commented
+ when 'reviewed'
+ Gitlab::GithubImport::Importer::Events::Reviewed
end
end
end
diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb
index 26472b0d468..36a256bbef5 100644
--- a/lib/gitlab/github_import/importer/note_attachments_importer.rb
+++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb
@@ -16,10 +16,9 @@ module Gitlab
end
def execute
- attachments = MarkdownText.fetch_attachments(note_text.text)
- return if attachments.blank?
+ return unless note_text.has_attachments?
- new_text = attachments.reduce(note_text.text) do |text, attachment|
+ new_text = note_text.attachments.reduce(note_text.text) do |text, attachment|
new_url = gitlab_attachment_link(attachment)
text.gsub(attachment.url, new_url)
end
diff --git a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb
index 6df130eb6e8..384880651ef 100644
--- a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb
+++ b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb
@@ -14,10 +14,12 @@ module Gitlab
@review = review
@project = project
@client = client
- @merge_request = project.merge_requests.find_by_id(review.merge_request_id)
+ @merge_request = project.merge_requests.find_by_iid(review.merge_request_iid)
end
- def execute
+ def execute(options = {})
+ options = { add_reviewer: true }.merge(options)
+
user_finder = GithubImport::UserFinder.new(project, client)
gitlab_user_id = user_finder.user_id_for(review.author)
@@ -25,7 +27,7 @@ module Gitlab
if gitlab_user_id
add_review_note!(gitlab_user_id)
add_approval!(gitlab_user_id)
- add_reviewer!(gitlab_user_id)
+ add_reviewer!(gitlab_user_id) if options[:add_reviewer]
else
add_complementary_review_note!(project.creator_id)
end
diff --git a/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb
index 347423b0e21..62c9e6469d7 100644
--- a/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb
+++ b/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb
@@ -72,7 +72,7 @@ module Gitlab
merge_requests_to_import.find_each do |merge_request|
# The page counter needs to be scoped by merge request to avoid skipping
# pages of reviews from already imported merge requests.
- page_counter = PageCounter.new(project, page_counter_id(merge_request))
+ page_counter = Gitlab::Import::PageCounter.new(project, page_counter_id(merge_request))
repo = project.import_source
options = collection_options.merge(page: page_counter.current)
diff --git a/lib/gitlab/github_import/importer/replay_events_importer.rb b/lib/gitlab/github_import/importer/replay_events_importer.rb
new file mode 100644
index 00000000000..83578cf7672
--- /dev/null
+++ b/lib/gitlab/github_import/importer/replay_events_importer.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module GithubImport
+ module Importer
+ class ReplayEventsImporter
+ SUPPORTED_EVENTS = %w[review_request_removed review_requested].freeze
+
+ # replay_event - An instance of `Gitlab::GithubImport::Representation::ReplayEvent`.
+ # project - An instance of `Project`
+ # client - An instance of `Gitlab::GithubImport::Client`
+ def initialize(replay_event, project, client)
+ @project = project
+ @client = client
+ @replay_event = replay_event
+ end
+
+ def execute
+ association = case replay_event.issuable_type
+ when 'MergeRequest'
+ project.merge_requests.find_by_iid(replay_event.issuable_iid)
+ end
+
+ return unless association
+
+ events_cache = EventsCache.new(project)
+
+ handle_review_requests(association, events_cache.events(association))
+
+ events_cache.delete(association)
+ end
+
+ private
+
+ attr_reader :project, :client, :replay_event
+
+ def handle_review_requests(association, events)
+ reviewers = {}
+
+ events.each do |event|
+ case event.event
+ when 'review_requested'
+ reviewers[event.requested_reviewer.login] = event.requested_reviewer.to_hash if event.requested_reviewer
+ when 'review_request_removed'
+ reviewers[event.requested_reviewer.login] = nil if event.requested_reviewer
+ end
+ end
+
+ representation = Representation::PullRequests::ReviewRequests.from_json_hash(
+ merge_request_id: association.id,
+ merge_request_iid: association.iid,
+ users: reviewers.values.compact
+ )
+
+ Importer::PullRequests::ReviewRequestImporter.new(representation, project, client).execute
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb b/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb
index d7fa098a775..126a0b8fa4a 100644
--- a/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb
+++ b/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb
@@ -30,9 +30,11 @@ module Gitlab
compose_associated_id!(parent_record, associated)
- return if already_imported?(associated) || importer_class::SUPPORTED_EVENTS.exclude?(associated[:event])
+ return if already_imported?(associated) || supported_events.exclude?(associated[:event])
- Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
+ cache_event(parent_record, associated)
+
+ increment_object_counter(associated[:event])
pull_request = parent_record.is_a? MergeRequest
associated[:issue] = { number: parent_record.iid, pull_request: pull_request }
@@ -64,6 +66,12 @@ module Gitlab
:issue_event
end
+ def increment_object_counter(event_name)
+ counter_type = importer_class::EVENT_COUNTER_MAP[event_name] if import_settings.extended_events?
+ counter_type ||= object_type
+ Gitlab::GithubImport::ObjectCounter.increment(project, counter_type, :fetched)
+ end
+
def collection_method
:issue_timeline
end
@@ -98,6 +106,43 @@ module Gitlab
event[:id] = "cross-reference##{issuable.iid}-in-#{event.dig(:source, :issue, :id)}"
end
+
+ def import_settings
+ @import_settings ||= Gitlab::GithubImport::Settings.new(project)
+ end
+
+ def after_batch_processed(parent)
+ return unless import_settings.extended_events?
+
+ events = events_cache.events(parent)
+
+ return if events.empty?
+
+ hash = Representation::ReplayEvent.new(issuable_type: parent.class.name.to_s, issuable_iid: parent.iid)
+ .to_hash.deep_stringify_keys
+ ReplayEventsWorker.perform_async(project.id, hash, job_waiter.key.to_s)
+ job_waiter.jobs_remaining = Gitlab::Cache::Import::Caching.increment(job_waiter_remaining_cache_key)
+ end
+
+ def supported_events
+ return importer_class::EXTENDED_SUPPORTED_EVENTS if import_settings.extended_events?
+
+ importer_class::SUPPORTED_EVENTS
+ end
+
+ def cache_event(parent_record, associated)
+ return unless import_settings.extended_events?
+
+ return if Importer::ReplayEventsImporter::SUPPORTED_EVENTS.exclude?(associated[:event])
+
+ representation = representation_class.from_api_response(associated)
+
+ events_cache.add(parent_record, representation)
+ end
+
+ def events_cache
+ @events_cache ||= EventsCache.new(project)
+ end
end
end
end
diff --git a/lib/gitlab/github_import/job_delay_calculator.rb b/lib/gitlab/github_import/job_delay_calculator.rb
index 50cad1aae19..a456e198afd 100644
--- a/lib/gitlab/github_import/job_delay_calculator.rb
+++ b/lib/gitlab/github_import/job_delay_calculator.rb
@@ -15,9 +15,9 @@ module Gitlab
private
def calculate_job_delay(job_index)
- multiplier = (job_index / parallel_import_batch[:size])
+ multiplier = (job_index / parallel_import_batch[:size].to_f)
- (multiplier * parallel_import_batch[:delay]).to_i + 1
+ (multiplier * parallel_import_batch[:delay]) + 1
end
end
end
diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb
index 8e9d6d8dd50..5880aa04358 100644
--- a/lib/gitlab/github_import/markdown_text.rb
+++ b/lib/gitlab/github_import/markdown_text.rb
@@ -41,6 +41,8 @@ module Gitlab
def fetch_attachments(text)
attachments = []
+ return attachments if text.nil?
+
doc = CommonMarker.render_doc(text)
doc.walk do |node|
diff --git a/lib/gitlab/github_import/page_counter.rb b/lib/gitlab/github_import/page_counter.rb
deleted file mode 100644
index c238ccb8932..00000000000
--- a/lib/gitlab/github_import/page_counter.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module GithubImport
- # PageCounter can be used to keep track of the last imported page of a
- # collection, allowing workers to resume where they left off in the event of
- # an error.
- class PageCounter
- attr_reader :cache_key
-
- # The base cache key to use for storing the last page number.
- CACHE_KEY = '%{import_type}/page-counter/%{object}/%{collection}'
-
- def initialize(object, collection, import_type = 'github-importer')
- @cache_key = CACHE_KEY % { import_type: import_type, object: object.id, collection: collection }
- end
-
- # Sets the page number to the given value.
- #
- # Returns true if the page number was overwritten, false otherwise.
- def set(page)
- Gitlab::Cache::Import::Caching.write_if_greater(cache_key, page)
- end
-
- # Returns the current value from the cache.
- def current
- Gitlab::Cache::Import::Caching.read_integer(cache_key) || 1
- end
-
- def expire!
- Gitlab::Cache::Import::Caching.expire(cache_key, 0)
- end
- end
- end
-end
diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb
index ce93b5203df..2286dcf767f 100644
--- a/lib/gitlab/github_import/parallel_scheduling.rb
+++ b/lib/gitlab/github_import/parallel_scheduling.rb
@@ -8,6 +8,8 @@ module Gitlab
attr_reader :project, :client, :page_counter, :already_imported_cache_key,
:job_waiter_cache_key, :job_waiter_remaining_cache_key
+ attr_accessor :job_started_at, :enqueued_job_counter
+
# The base cache key to use for tracking already imported objects.
ALREADY_IMPORTED_CACHE_KEY =
'github-importer/already-imported/%{project}/%{collection}'
@@ -25,7 +27,7 @@ module Gitlab
@project = project
@client = client
@parallel = parallel
- @page_counter = PageCounter.new(project, collection_method)
+ @page_counter = Gitlab::Import::PageCounter.new(project, collection_method)
@already_imported_cache_key = format(ALREADY_IMPORTED_CACHE_KEY, project: project.id,
collection: collection_method)
@job_waiter_cache_key = format(JOB_WAITER_CACHE_KEY, project: project.id, collection: collection_method)
@@ -91,14 +93,15 @@ module Gitlab
end
def spread_parallel_import
- enqueued_job_counter = 0
+ self.job_started_at = Time.current
+ self.enqueued_job_counter = 0
each_object_to_import do |object|
repr = object_representation(object)
- job_delay = calculate_job_delay(enqueued_job_counter)
sidekiq_worker_class.perform_in(job_delay, project.id, repr.to_hash.deep_stringify_keys, job_waiter.key.to_s)
- enqueued_job_counter += 1
+
+ self.enqueued_job_counter += 1
job_waiter.jobs_remaining = Gitlab::Cache::Import::Caching.increment(job_waiter_remaining_cache_key)
end
@@ -246,6 +249,14 @@ module Gitlab
JobWaiter.new(jobs_remaining, key)
end
end
+
+ def job_delay
+ runtime = Time.current - job_started_at
+
+ delay = calculate_job_delay(enqueued_job_counter) - runtime
+
+ delay > 0 ? delay : 1.0.second
+ end
end
end
end
diff --git a/lib/gitlab/github_import/representation/issue_event.rb b/lib/gitlab/github_import/representation/issue_event.rb
index 30608112f85..fc3bc5a48ef 100644
--- a/lib/gitlab/github_import/representation/issue_event.rb
+++ b/lib/gitlab/github_import/representation/issue_event.rb
@@ -8,7 +8,8 @@ module Gitlab
expose_attribute :id, :actor, :event, :commit_id, :label_title, :old_title, :new_title,
:milestone_title, :issue, :source, :assignee, :review_requester,
- :requested_reviewer, :created_at
+ :requested_reviewer, :created_at, :updated_at, :submitted_at,
+ :state, :body
# attributes - A Hash containing the event details. The keys of this
# Hash (and any nested hashes) must be symbols.
@@ -51,7 +52,11 @@ module Gitlab
assignee: user_representation(event[:assignee]),
requested_reviewer: user_representation(event[:requested_reviewer]),
review_requester: user_representation(event[:review_requester]),
- created_at: event[:created_at]
+ created_at: event[:created_at],
+ updated_at: event[:updated_at],
+ submitted_at: event[:submitted_at],
+ state: event[:state],
+ body: event[:body]
)
end
diff --git a/lib/gitlab/github_import/representation/note_text.rb b/lib/gitlab/github_import/representation/note_text.rb
index 43e18a923d6..79bef4ec363 100644
--- a/lib/gitlab/github_import/representation/note_text.rb
+++ b/lib/gitlab/github_import/representation/note_text.rb
@@ -55,6 +55,14 @@ module Gitlab
}.merge(record_type_specific_attribute)
end
+ def has_attachments?
+ attachments.present?
+ end
+
+ def attachments
+ @attachments ||= MarkdownText.fetch_attachments(text)
+ end
+
private
def record_type_specific_attribute
diff --git a/lib/gitlab/github_import/representation/replay_event.rb b/lib/gitlab/github_import/representation/replay_event.rb
new file mode 100644
index 00000000000..2d71c26abbb
--- /dev/null
+++ b/lib/gitlab/github_import/representation/replay_event.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module GithubImport
+ module Representation
+ class ReplayEvent
+ include ToHash
+ include ExposeAttribute
+
+ attr_reader :attributes
+
+ expose_attribute :issuable_type, :issuable_iid
+
+ def self.from_json_hash(raw_hash)
+ new Representation.symbolize_hash(raw_hash)
+ end
+
+ def initialize(attributes)
+ @attributes = attributes
+ end
+
+ def github_identifiers
+ {
+ issuable_type: issuable_type,
+ issuable_iid: issuable_iid
+ }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/settings.rb b/lib/gitlab/github_import/settings.rb
index 3947ae3c63d..da5833df3a1 100644
--- a/lib/gitlab/github_import/settings.rb
+++ b/lib/gitlab/github_import/settings.rb
@@ -38,8 +38,13 @@ module Gitlab
}
}.freeze
- def self.stages_array
- OPTIONAL_STAGES.map do |stage_name, data|
+ def self.stages_array(current_user)
+ deprecated_options = %i[single_endpoint_issue_events_import]
+
+ OPTIONAL_STAGES.filter_map do |stage_name, data|
+ next if deprecated_options.include?(stage_name) &&
+ Feature.enabled?(:github_import_extended_events, current_user)
+
{
name: stage_name.to_s,
label: s_(format("GitHubImport|%{text}", text: data[:label])),
@@ -61,7 +66,8 @@ module Gitlab
import_data = project.build_or_assign_import_data(
data: {
optional_stages: optional_stages,
- timeout_strategy: user_settings[:timeout_strategy]
+ timeout_strategy: user_settings[:timeout_strategy],
+ extended_events: user_settings[:extended_events]
},
credentials: project.import_data&.credentials
)
@@ -77,6 +83,10 @@ module Gitlab
!enabled?(stage_name)
end
+ def extended_events?
+ !!project.import_data&.data&.dig('extended_events')
+ end
+
private
attr_reader :project
diff --git a/lib/gitlab/github_import/single_endpoint_notes_importing.rb b/lib/gitlab/github_import/single_endpoint_notes_importing.rb
index 3584288da57..d4d9bd47e63 100644
--- a/lib/gitlab/github_import/single_endpoint_notes_importing.rb
+++ b/lib/gitlab/github_import/single_endpoint_notes_importing.rb
@@ -75,7 +75,7 @@ module Gitlab
batch.each do |parent_record|
# The page counter needs to be scoped by parent_record to avoid skipping
# pages of notes from already imported parent_record.
- page_counter = PageCounter.new(project, page_counter_id(parent_record))
+ page_counter = Gitlab::Import::PageCounter.new(project, page_counter_id(parent_record))
repo = project.import_source
options = collection_options.merge(page: page_counter.current)
@@ -85,6 +85,7 @@ module Gitlab
yield parent_record, page
end
+ after_batch_processed(parent_record)
mark_parent_imported(parent_record)
end
end
@@ -96,6 +97,8 @@ module Gitlab
)
end
+ def after_batch_processed(_parent); end
+
def already_imported_parents
Gitlab::Cache::Import::Caching.values_from_set(parent_imported_cache_key)
end
diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb
index 4bf2d8a0aca..bec4c7fc4d4 100644
--- a/lib/gitlab/github_import/user_finder.rb
+++ b/lib/gitlab/github_import/user_finder.rb
@@ -12,21 +12,18 @@ module Gitlab
# Lookups are cached even if no ID was found to remove the need for querying
# the database when most queries are not going to return results anyway.
class UserFinder
+ include Gitlab::ExclusiveLeaseHelpers
+
attr_reader :project, :client
- # The base cache key to use for caching user IDs for a given GitHub user
- # ID.
+ # The base cache key to use for caching user IDs for a given GitHub user ID.
ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'
- # The base cache key to use for caching user IDs for a given GitHub email
- # address.
- ID_FOR_EMAIL_CACHE_KEY =
- 'github-import/user-finder/id-for-email/%s'
+ # The base cache key to use for caching user IDs for a given GitHub email address.
+ ID_FOR_EMAIL_CACHE_KEY = 'github-import/user-finder/id-for-email/%s'
- # The base cache key to use for caching the Email addresses of GitHub
- # usernames.
- EMAIL_FOR_USERNAME_CACHE_KEY =
- 'github-import/user-finder/email-for-username/%s'
+ # The base cache key to use for caching the Email addresses of GitHub usernames.
+ EMAIL_FOR_USERNAME_CACHE_KEY = 'github-import/user-finder/email-for-username/%s'
# The base cache key to use for caching the user ETAG response headers
USERNAME_ETAG_CACHE_KEY = 'github-import/user-finder/user-etag/%s'
@@ -218,6 +215,17 @@ module Gitlab
private
+ def lease_key
+ "gitlab:github_import:user_finder:#{project.id}"
+ end
+
+ # Retrieves the email associated with the given username from the cache.
+ #
+ # The return value can be an email, an empty string, or nil.
+ #
+ # If an empty string is returned, it indicates that the user's email was fetched but not set on GitHub.
+ # If nil is returned, it indicates that the user's email wasn't fetched or the cache has expired.
+ # If an email is returned, it means the user has a public email set, and it has been successfully cached.
def read_email_from_cache(username)
Gitlab::Cache::Import::Caching.read(email_cache_key(username))
end
@@ -232,12 +240,27 @@ module Gitlab
end
def fetch_email_from_github(username, etag: nil)
- log(EMAIL_API_CALL_LOGGING_MESSAGE[etag.present?], username: username)
- user = client.user(username, { headers: { 'If-None-Match' => etag }.compact })
+ in_lock(lease_key, ttl: 3.minutes, sleep_sec: 1.second, retries: 30) do |retried|
+ # when retried, check the cache again as the other process that had the lease may have fetched the email
+ if retried
+ email = read_email_from_cache(username)
- user[:email] || '' if user
+ next email if email.present?
+ end
+
+ log(EMAIL_API_CALL_LOGGING_MESSAGE[etag.present?], username: username)
+
+ # Only make a rate-limited API call if the ETAG is not available })
+ user = client.user(username, { headers: { 'If-None-Match' => etag }.compact })
+ user[:email] || '' if user
+ end
end
+ # Caches the email associated to the username
+ #
+ # An empty email is cached when the user email isn't set on GitHub.
+ # This is done to prevent UserFinder from fetching the user's email again when the user's email isn't set on
+ # GitHub
def cache_email!(username, email)
return unless email
@@ -245,6 +268,8 @@ module Gitlab
end
def cache_etag!(username)
+ return unless client.octokit.last_response
+
etag = client.octokit.last_response.headers[:etag]
Gitlab::Cache::Import::Caching.write(etag_cache_key(username), etag)
end