diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2023-11-14 11:41:52 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2023-11-14 11:41:52 +0300 |
commit | 585826cb22ecea5998a2c2a4675735c94bdeedac (patch) | |
tree | 5b05f0b30d33cef48963609e8a18a4dff260eab3 /lib/gitlab/github_import | |
parent | df221d036e5d0c6c0ee4d55b9c97f481ee05dee8 (diff) |
Add latest changes from gitlab-org/gitlab@16-6-stable-eev16.6.0-rc42
Diffstat (limited to 'lib/gitlab/github_import')
-rw-r--r-- | lib/gitlab/github_import/attachments_downloader.rb | 24 | ||||
-rw-r--r-- | lib/gitlab/github_import/client.rb | 10 | ||||
-rw-r--r-- | lib/gitlab/github_import/issuable_finder.rb | 14 | ||||
-rw-r--r-- | lib/gitlab/github_import/job_delay_calculator.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/github_import/label_finder.rb | 16 | ||||
-rw-r--r-- | lib/gitlab/github_import/milestone_finder.rb | 18 | ||||
-rw-r--r-- | lib/gitlab/github_import/object_counter.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/github_import/parallel_scheduling.rb | 18 | ||||
-rw-r--r-- | lib/gitlab/github_import/representation/to_hash.rb | 4 |
9 files changed, 77 insertions, 31 deletions
diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 4db55a6aabb..df9c6c8342d 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -29,8 +29,8 @@ module Gitlab validate_content_length validate_filepath - redirection_url = get_assets_download_redirection_url - file = download_from(redirection_url) + download_url = get_assets_download_redirection_url + file = download_from(download_url) validate_symlink file @@ -60,16 +60,16 @@ module Gitlab options[:follow_redirects] = false response = Gitlab::HTTP.perform_request(Net::HTTP::Get, file_url, options) - raise_error("expected a redirect response, got #{response.code}") unless response.redirection? - redirection_url = response.headers[:location] - filename = URI.parse(redirection_url).path + download_url = if response.redirection? + response.headers[:location] + else + file_url + end - unless Gitlab::GithubImport::Markdown::Attachment::MEDIA_TYPES.any? { |type| filename.ends_with?(type) } - raise UnsupportedAttachmentError - end + file_type_valid?(URI.parse(download_url).path) - redirection_url + download_url end def github_assets_url_regex @@ -89,6 +89,12 @@ module Gitlab File.join(dir, filename) end end + + def file_type_valid?(file_url) + return if Gitlab::GithubImport::Markdown::Attachment::MEDIA_TYPES.any? { |type| file_url.ends_with?(type) } + + raise UnsupportedAttachmentError + end end end end diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 5a0ae680ab8..33e74c90115 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -182,12 +182,12 @@ module Gitlab request_count_counter.increment - raise_or_wait_for_rate_limit unless requests_remaining? + raise_or_wait_for_rate_limit('Internal threshold reached') unless requests_remaining? begin with_retry { yield } - rescue ::Octokit::TooManyRequests - raise_or_wait_for_rate_limit + rescue ::Octokit::TooManyRequests => e + raise_or_wait_for_rate_limit(e.response_body) # This retry will only happen when running in sequential mode as we'll # raise an error in parallel mode. @@ -213,11 +213,11 @@ module Gitlab octokit.rate_limit.limit end - def raise_or_wait_for_rate_limit + def raise_or_wait_for_rate_limit(message) rate_limit_counter.increment if parallel? - raise RateLimitError + raise RateLimitError, message else sleep(rate_limit_resets_in) end diff --git a/lib/gitlab/github_import/issuable_finder.rb b/lib/gitlab/github_import/issuable_finder.rb index b960df581e4..0780ba6119f 100644 --- a/lib/gitlab/github_import/issuable_finder.rb +++ b/lib/gitlab/github_import/issuable_finder.rb @@ -11,6 +11,7 @@ module Gitlab # The base cache key to use for storing/retrieving issuable IDs. CACHE_KEY = 'github-import/issuable-finder/%{project}/%{type}/%{iid}' + CACHE_OBJECT_NOT_FOUND = -1 # project - An instance of `Project`. # object - The object to look up or set a database ID for. @@ -23,9 +24,18 @@ module Gitlab # # This method will return `nil` if no ID could be found. def database_id - val = Gitlab::Cache::Import::Caching.read(cache_key, timeout: timeout) + val = Gitlab::Cache::Import::Caching.read_integer(cache_key, timeout: timeout) - val.to_i if val.present? + return val if Feature.disabled?(:import_fallback_to_db_empty_cache, project) + + return if val == CACHE_OBJECT_NOT_FOUND + return val if val.present? + + object_id = cache_key_type.safe_constantize&.find_by(project_id: project.id, iid: cache_key_iid)&.id || + CACHE_OBJECT_NOT_FOUND + + cache_database_id(object_id) + object_id == CACHE_OBJECT_NOT_FOUND ? nil : object_id end # Associates the given database ID with the current object. diff --git a/lib/gitlab/github_import/job_delay_calculator.rb b/lib/gitlab/github_import/job_delay_calculator.rb index 52b211c92d6..077a27df16c 100644 --- a/lib/gitlab/github_import/job_delay_calculator.rb +++ b/lib/gitlab/github_import/job_delay_calculator.rb @@ -15,7 +15,7 @@ module Gitlab def calculate_job_delay(job_index) multiplier = (job_index / parallel_import_batch[:size]) - (multiplier * parallel_import_batch[:delay]) + 1.second + (multiplier * parallel_import_batch[:delay]).to_i + 1 end end end diff --git a/lib/gitlab/github_import/label_finder.rb b/lib/gitlab/github_import/label_finder.rb index 39e669dbba4..d0bbd2bc7cf 100644 --- a/lib/gitlab/github_import/label_finder.rb +++ b/lib/gitlab/github_import/label_finder.rb @@ -7,6 +7,7 @@ module Gitlab # The base cache key to use for storing/retrieving label IDs. CACHE_KEY = 'github-import/label-finder/%{project}/%{name}' + CACHE_OBJECT_NOT_FOUND = -1 # project - An instance of `Project`. def initialize(project) @@ -15,7 +16,18 @@ module Gitlab # Returns the label ID for the given name. def id_for(name) - Gitlab::Cache::Import::Caching.read_integer(cache_key_for(name)) + cache_key = cache_key_for(name) + val = Gitlab::Cache::Import::Caching.read_integer(cache_key) + + return val if Feature.disabled?(:import_fallback_to_db_empty_cache, project) + + return if val == CACHE_OBJECT_NOT_FOUND + return val if val.present? + + object_id = project.labels.with_title(name).pick(:id) || CACHE_OBJECT_NOT_FOUND + + Gitlab::Cache::Import::Caching.write(cache_key, object_id) + object_id == CACHE_OBJECT_NOT_FOUND ? nil : object_id end # rubocop: disable CodeReuse/ActiveRecord @@ -32,7 +44,7 @@ module Gitlab # rubocop: enable CodeReuse/ActiveRecord def cache_key_for(name) - CACHE_KEY % { project: project.id, name: name } + format(CACHE_KEY, project: project.id, name: name) end end end diff --git a/lib/gitlab/github_import/milestone_finder.rb b/lib/gitlab/github_import/milestone_finder.rb index d9290e36ea1..dcb679fda6d 100644 --- a/lib/gitlab/github_import/milestone_finder.rb +++ b/lib/gitlab/github_import/milestone_finder.rb @@ -7,6 +7,7 @@ module Gitlab # The base cache key to use for storing/retrieving milestone IDs. CACHE_KEY = 'github-import/milestone-finder/%{project}/%{iid}' + CACHE_OBJECT_NOT_FOUND = -1 # project - An instance of `Project` def initialize(project) @@ -18,7 +19,20 @@ module Gitlab def id_for(issuable) return unless issuable.milestone_number - Gitlab::Cache::Import::Caching.read_integer(cache_key_for(issuable.milestone_number)) + milestone_iid = issuable.milestone_number + cache_key = cache_key_for(milestone_iid) + + val = Gitlab::Cache::Import::Caching.read_integer(cache_key) + + return val if Feature.disabled?(:import_fallback_to_db_empty_cache, project) + + return if val == CACHE_OBJECT_NOT_FOUND + return val if val.present? + + object_id = project.milestones.by_iid(milestone_iid).pick(:id) || CACHE_OBJECT_NOT_FOUND + + Gitlab::Cache::Import::Caching.write(cache_key, object_id) + object_id == CACHE_OBJECT_NOT_FOUND ? nil : object_id end # rubocop: disable CodeReuse/ActiveRecord @@ -35,7 +49,7 @@ module Gitlab # rubocop: enable CodeReuse/ActiveRecord def cache_key_for(iid) - CACHE_KEY % { project: project.id, iid: iid } + format(CACHE_KEY, project: project.id, iid: iid) end end end diff --git a/lib/gitlab/github_import/object_counter.rb b/lib/gitlab/github_import/object_counter.rb index 88e91800cee..5618cfc6044 100644 --- a/lib/gitlab/github_import/object_counter.rb +++ b/lib/gitlab/github_import/object_counter.rb @@ -52,7 +52,7 @@ module Gitlab .sort .each do |counter| object_type = counter.split('/').last - result[operation][object_type] = CACHING.read_integer(counter) || 0 + result[operation][object_type] = CACHING.read_integer(counter, timeout: IMPORT_CACHING_TIMEOUT) || 0 end end end diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb index cccd99f48b1..ce93b5203df 100644 --- a/lib/gitlab/github_import/parallel_scheduling.rb +++ b/lib/gitlab/github_import/parallel_scheduling.rb @@ -6,7 +6,7 @@ module Gitlab include JobDelayCalculator attr_reader :project, :client, :page_counter, :already_imported_cache_key, - :job_waiter_cache_key, :job_waiter_remaining_cache_key + :job_waiter_cache_key, :job_waiter_remaining_cache_key # The base cache key to use for tracking already imported objects. ALREADY_IMPORTED_CACHE_KEY = @@ -26,12 +26,11 @@ module Gitlab @client = client @parallel = parallel @page_counter = PageCounter.new(project, collection_method) - @already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY % - { project: project.id, collection: collection_method } - @job_waiter_cache_key = JOB_WAITER_CACHE_KEY % - { project: project.id, collection: collection_method } - @job_waiter_remaining_cache_key = JOB_WAITER_REMAINING_CACHE_KEY % - { project: project.id, collection: collection_method } + @already_imported_cache_key = format(ALREADY_IMPORTED_CACHE_KEY, project: project.id, + collection: collection_method) + @job_waiter_cache_key = format(JOB_WAITER_CACHE_KEY, project: project.id, collection: collection_method) + @job_waiter_remaining_cache_key = format(JOB_WAITER_REMAINING_CACHE_KEY, project: project.id, + collection: collection_method) end def parallel? @@ -57,7 +56,8 @@ module Gitlab # still scheduling duplicates while. Since all work has already been # completed those jobs will just cycle through any remaining pages while # not scheduling anything. - Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT) + Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, + Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT) info(project.id, message: "importer finished") retval @@ -97,7 +97,7 @@ module Gitlab repr = object_representation(object) job_delay = calculate_job_delay(enqueued_job_counter) - sidekiq_worker_class.perform_in(job_delay, project.id, repr.to_hash, job_waiter.key) + sidekiq_worker_class.perform_in(job_delay, project.id, repr.to_hash.deep_stringify_keys, job_waiter.key.to_s) enqueued_job_counter += 1 job_waiter.jobs_remaining = Gitlab::Cache::Import::Caching.increment(job_waiter_remaining_cache_key) diff --git a/lib/gitlab/github_import/representation/to_hash.rb b/lib/gitlab/github_import/representation/to_hash.rb index 4a0f36ab8f0..54faa51a787 100644 --- a/lib/gitlab/github_import/representation/to_hash.rb +++ b/lib/gitlab/github_import/representation/to_hash.rb @@ -16,11 +16,15 @@ module Gitlab hash end + # This method allow objects to be safely passed directly to Sidekiq without errors. + # It returns JSON datatypes: string, integer, float, boolean, null(nil), array and hash. def convert_value_for_to_hash(value) if value.is_a?(Array) value.map { |v| convert_value_for_to_hash(v) } elsif value.respond_to?(:to_hash) value.to_hash + elsif value.respond_to?(:strftime) || value.is_a?(Symbol) + value.to_s else value end |