Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2023-11-14 11:41:52 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2023-11-14 11:41:52 +0300
commit585826cb22ecea5998a2c2a4675735c94bdeedac (patch)
tree5b05f0b30d33cef48963609e8a18a4dff260eab3 /lib/gitlab/github_import
parentdf221d036e5d0c6c0ee4d55b9c97f481ee05dee8 (diff)
Add latest changes from gitlab-org/gitlab@16-6-stable-eev16.6.0-rc42
Diffstat (limited to 'lib/gitlab/github_import')
-rw-r--r--lib/gitlab/github_import/attachments_downloader.rb24
-rw-r--r--lib/gitlab/github_import/client.rb10
-rw-r--r--lib/gitlab/github_import/issuable_finder.rb14
-rw-r--r--lib/gitlab/github_import/job_delay_calculator.rb2
-rw-r--r--lib/gitlab/github_import/label_finder.rb16
-rw-r--r--lib/gitlab/github_import/milestone_finder.rb18
-rw-r--r--lib/gitlab/github_import/object_counter.rb2
-rw-r--r--lib/gitlab/github_import/parallel_scheduling.rb18
-rw-r--r--lib/gitlab/github_import/representation/to_hash.rb4
9 files changed, 77 insertions, 31 deletions
diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb
index 4db55a6aabb..df9c6c8342d 100644
--- a/lib/gitlab/github_import/attachments_downloader.rb
+++ b/lib/gitlab/github_import/attachments_downloader.rb
@@ -29,8 +29,8 @@ module Gitlab
validate_content_length
validate_filepath
- redirection_url = get_assets_download_redirection_url
- file = download_from(redirection_url)
+ download_url = get_assets_download_redirection_url
+ file = download_from(download_url)
validate_symlink
file
@@ -60,16 +60,16 @@ module Gitlab
options[:follow_redirects] = false
response = Gitlab::HTTP.perform_request(Net::HTTP::Get, file_url, options)
- raise_error("expected a redirect response, got #{response.code}") unless response.redirection?
- redirection_url = response.headers[:location]
- filename = URI.parse(redirection_url).path
+ download_url = if response.redirection?
+ response.headers[:location]
+ else
+ file_url
+ end
- unless Gitlab::GithubImport::Markdown::Attachment::MEDIA_TYPES.any? { |type| filename.ends_with?(type) }
- raise UnsupportedAttachmentError
- end
+ file_type_valid?(URI.parse(download_url).path)
- redirection_url
+ download_url
end
def github_assets_url_regex
@@ -89,6 +89,12 @@ module Gitlab
File.join(dir, filename)
end
end
+
+ def file_type_valid?(file_url)
+ return if Gitlab::GithubImport::Markdown::Attachment::MEDIA_TYPES.any? { |type| file_url.ends_with?(type) }
+
+ raise UnsupportedAttachmentError
+ end
end
end
end
diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb
index 5a0ae680ab8..33e74c90115 100644
--- a/lib/gitlab/github_import/client.rb
+++ b/lib/gitlab/github_import/client.rb
@@ -182,12 +182,12 @@ module Gitlab
request_count_counter.increment
- raise_or_wait_for_rate_limit unless requests_remaining?
+ raise_or_wait_for_rate_limit('Internal threshold reached') unless requests_remaining?
begin
with_retry { yield }
- rescue ::Octokit::TooManyRequests
- raise_or_wait_for_rate_limit
+ rescue ::Octokit::TooManyRequests => e
+ raise_or_wait_for_rate_limit(e.response_body)
# This retry will only happen when running in sequential mode as we'll
# raise an error in parallel mode.
@@ -213,11 +213,11 @@ module Gitlab
octokit.rate_limit.limit
end
- def raise_or_wait_for_rate_limit
+ def raise_or_wait_for_rate_limit(message)
rate_limit_counter.increment
if parallel?
- raise RateLimitError
+ raise RateLimitError, message
else
sleep(rate_limit_resets_in)
end
diff --git a/lib/gitlab/github_import/issuable_finder.rb b/lib/gitlab/github_import/issuable_finder.rb
index b960df581e4..0780ba6119f 100644
--- a/lib/gitlab/github_import/issuable_finder.rb
+++ b/lib/gitlab/github_import/issuable_finder.rb
@@ -11,6 +11,7 @@ module Gitlab
# The base cache key to use for storing/retrieving issuable IDs.
CACHE_KEY = 'github-import/issuable-finder/%{project}/%{type}/%{iid}'
+ CACHE_OBJECT_NOT_FOUND = -1
# project - An instance of `Project`.
# object - The object to look up or set a database ID for.
@@ -23,9 +24,18 @@ module Gitlab
#
# This method will return `nil` if no ID could be found.
def database_id
- val = Gitlab::Cache::Import::Caching.read(cache_key, timeout: timeout)
+ val = Gitlab::Cache::Import::Caching.read_integer(cache_key, timeout: timeout)
- val.to_i if val.present?
+ return val if Feature.disabled?(:import_fallback_to_db_empty_cache, project)
+
+ return if val == CACHE_OBJECT_NOT_FOUND
+ return val if val.present?
+
+ object_id = cache_key_type.safe_constantize&.find_by(project_id: project.id, iid: cache_key_iid)&.id ||
+ CACHE_OBJECT_NOT_FOUND
+
+ cache_database_id(object_id)
+ object_id == CACHE_OBJECT_NOT_FOUND ? nil : object_id
end
# Associates the given database ID with the current object.
diff --git a/lib/gitlab/github_import/job_delay_calculator.rb b/lib/gitlab/github_import/job_delay_calculator.rb
index 52b211c92d6..077a27df16c 100644
--- a/lib/gitlab/github_import/job_delay_calculator.rb
+++ b/lib/gitlab/github_import/job_delay_calculator.rb
@@ -15,7 +15,7 @@ module Gitlab
def calculate_job_delay(job_index)
multiplier = (job_index / parallel_import_batch[:size])
- (multiplier * parallel_import_batch[:delay]) + 1.second
+ (multiplier * parallel_import_batch[:delay]).to_i + 1
end
end
end
diff --git a/lib/gitlab/github_import/label_finder.rb b/lib/gitlab/github_import/label_finder.rb
index 39e669dbba4..d0bbd2bc7cf 100644
--- a/lib/gitlab/github_import/label_finder.rb
+++ b/lib/gitlab/github_import/label_finder.rb
@@ -7,6 +7,7 @@ module Gitlab
# The base cache key to use for storing/retrieving label IDs.
CACHE_KEY = 'github-import/label-finder/%{project}/%{name}'
+ CACHE_OBJECT_NOT_FOUND = -1
# project - An instance of `Project`.
def initialize(project)
@@ -15,7 +16,18 @@ module Gitlab
# Returns the label ID for the given name.
def id_for(name)
- Gitlab::Cache::Import::Caching.read_integer(cache_key_for(name))
+ cache_key = cache_key_for(name)
+ val = Gitlab::Cache::Import::Caching.read_integer(cache_key)
+
+ return val if Feature.disabled?(:import_fallback_to_db_empty_cache, project)
+
+ return if val == CACHE_OBJECT_NOT_FOUND
+ return val if val.present?
+
+ object_id = project.labels.with_title(name).pick(:id) || CACHE_OBJECT_NOT_FOUND
+
+ Gitlab::Cache::Import::Caching.write(cache_key, object_id)
+ object_id == CACHE_OBJECT_NOT_FOUND ? nil : object_id
end
# rubocop: disable CodeReuse/ActiveRecord
@@ -32,7 +44,7 @@ module Gitlab
# rubocop: enable CodeReuse/ActiveRecord
def cache_key_for(name)
- CACHE_KEY % { project: project.id, name: name }
+ format(CACHE_KEY, project: project.id, name: name)
end
end
end
diff --git a/lib/gitlab/github_import/milestone_finder.rb b/lib/gitlab/github_import/milestone_finder.rb
index d9290e36ea1..dcb679fda6d 100644
--- a/lib/gitlab/github_import/milestone_finder.rb
+++ b/lib/gitlab/github_import/milestone_finder.rb
@@ -7,6 +7,7 @@ module Gitlab
# The base cache key to use for storing/retrieving milestone IDs.
CACHE_KEY = 'github-import/milestone-finder/%{project}/%{iid}'
+ CACHE_OBJECT_NOT_FOUND = -1
# project - An instance of `Project`
def initialize(project)
@@ -18,7 +19,20 @@ module Gitlab
def id_for(issuable)
return unless issuable.milestone_number
- Gitlab::Cache::Import::Caching.read_integer(cache_key_for(issuable.milestone_number))
+ milestone_iid = issuable.milestone_number
+ cache_key = cache_key_for(milestone_iid)
+
+ val = Gitlab::Cache::Import::Caching.read_integer(cache_key)
+
+ return val if Feature.disabled?(:import_fallback_to_db_empty_cache, project)
+
+ return if val == CACHE_OBJECT_NOT_FOUND
+ return val if val.present?
+
+ object_id = project.milestones.by_iid(milestone_iid).pick(:id) || CACHE_OBJECT_NOT_FOUND
+
+ Gitlab::Cache::Import::Caching.write(cache_key, object_id)
+ object_id == CACHE_OBJECT_NOT_FOUND ? nil : object_id
end
# rubocop: disable CodeReuse/ActiveRecord
@@ -35,7 +49,7 @@ module Gitlab
# rubocop: enable CodeReuse/ActiveRecord
def cache_key_for(iid)
- CACHE_KEY % { project: project.id, iid: iid }
+ format(CACHE_KEY, project: project.id, iid: iid)
end
end
end
diff --git a/lib/gitlab/github_import/object_counter.rb b/lib/gitlab/github_import/object_counter.rb
index 88e91800cee..5618cfc6044 100644
--- a/lib/gitlab/github_import/object_counter.rb
+++ b/lib/gitlab/github_import/object_counter.rb
@@ -52,7 +52,7 @@ module Gitlab
.sort
.each do |counter|
object_type = counter.split('/').last
- result[operation][object_type] = CACHING.read_integer(counter) || 0
+ result[operation][object_type] = CACHING.read_integer(counter, timeout: IMPORT_CACHING_TIMEOUT) || 0
end
end
end
diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb
index cccd99f48b1..ce93b5203df 100644
--- a/lib/gitlab/github_import/parallel_scheduling.rb
+++ b/lib/gitlab/github_import/parallel_scheduling.rb
@@ -6,7 +6,7 @@ module Gitlab
include JobDelayCalculator
attr_reader :project, :client, :page_counter, :already_imported_cache_key,
- :job_waiter_cache_key, :job_waiter_remaining_cache_key
+ :job_waiter_cache_key, :job_waiter_remaining_cache_key
# The base cache key to use for tracking already imported objects.
ALREADY_IMPORTED_CACHE_KEY =
@@ -26,12 +26,11 @@ module Gitlab
@client = client
@parallel = parallel
@page_counter = PageCounter.new(project, collection_method)
- @already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY %
- { project: project.id, collection: collection_method }
- @job_waiter_cache_key = JOB_WAITER_CACHE_KEY %
- { project: project.id, collection: collection_method }
- @job_waiter_remaining_cache_key = JOB_WAITER_REMAINING_CACHE_KEY %
- { project: project.id, collection: collection_method }
+ @already_imported_cache_key = format(ALREADY_IMPORTED_CACHE_KEY, project: project.id,
+ collection: collection_method)
+ @job_waiter_cache_key = format(JOB_WAITER_CACHE_KEY, project: project.id, collection: collection_method)
+ @job_waiter_remaining_cache_key = format(JOB_WAITER_REMAINING_CACHE_KEY, project: project.id,
+ collection: collection_method)
end
def parallel?
@@ -57,7 +56,8 @@ module Gitlab
# still scheduling duplicates while. Since all work has already been
# completed those jobs will just cycle through any remaining pages while
# not scheduling anything.
- Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT)
+ Gitlab::Cache::Import::Caching.expire(already_imported_cache_key,
+ Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT)
info(project.id, message: "importer finished")
retval
@@ -97,7 +97,7 @@ module Gitlab
repr = object_representation(object)
job_delay = calculate_job_delay(enqueued_job_counter)
- sidekiq_worker_class.perform_in(job_delay, project.id, repr.to_hash, job_waiter.key)
+ sidekiq_worker_class.perform_in(job_delay, project.id, repr.to_hash.deep_stringify_keys, job_waiter.key.to_s)
enqueued_job_counter += 1
job_waiter.jobs_remaining = Gitlab::Cache::Import::Caching.increment(job_waiter_remaining_cache_key)
diff --git a/lib/gitlab/github_import/representation/to_hash.rb b/lib/gitlab/github_import/representation/to_hash.rb
index 4a0f36ab8f0..54faa51a787 100644
--- a/lib/gitlab/github_import/representation/to_hash.rb
+++ b/lib/gitlab/github_import/representation/to_hash.rb
@@ -16,11 +16,15 @@ module Gitlab
hash
end
+ # This method allow objects to be safely passed directly to Sidekiq without errors.
+ # It returns JSON datatypes: string, integer, float, boolean, null(nil), array and hash.
def convert_value_for_to_hash(value)
if value.is_a?(Array)
value.map { |v| convert_value_for_to_hash(v) }
elsif value.respond_to?(:to_hash)
value.to_hash
+ elsif value.respond_to?(:strftime) || value.is_a?(Symbol)
+ value.to_s
else
value
end