Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/github_import')
-rw-r--r--lib/gitlab/github_import/attachments_downloader.rb39
-rw-r--r--lib/gitlab/github_import/client.rb14
-rw-r--r--lib/gitlab/github_import/importer/note_attachments_importer.rb19
-rw-r--r--lib/gitlab/github_import/markdown/attachment.rb13
-rw-r--r--lib/gitlab/github_import/object_counter.rb10
-rw-r--r--lib/gitlab/github_import/user_finder.rb110
6 files changed, 171 insertions, 34 deletions
diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb
index b71d5f753f2..4db55a6aabb 100644
--- a/lib/gitlab/github_import/attachments_downloader.rb
+++ b/lib/gitlab/github_import/attachments_downloader.rb
@@ -8,15 +8,17 @@ module Gitlab
include ::BulkImports::FileDownloads::Validations
DownloadError = Class.new(StandardError)
+ UnsupportedAttachmentError = Class.new(StandardError)
FILENAME_SIZE_LIMIT = 255 # chars before the extension
DEFAULT_FILE_SIZE_LIMIT = 25.megabytes
TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze
- attr_reader :file_url, :filename, :file_size_limit
+ attr_reader :file_url, :filename, :file_size_limit, :options
- def initialize(file_url, file_size_limit: DEFAULT_FILE_SIZE_LIMIT)
+ def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT)
@file_url = file_url
+ @options = options
@file_size_limit = file_size_limit
filename = URI(file_url).path.split('/').last
@@ -27,7 +29,9 @@ module Gitlab
validate_content_length
validate_filepath
- file = download
+ redirection_url = get_assets_download_redirection_url
+ file = download_from(redirection_url)
+
validate_symlink
file
end
@@ -47,9 +51,34 @@ module Gitlab
Gitlab::HTTP.perform_request(Net::HTTP::Head, file_url, {}).headers
end
- def download
+ # Github /assets redirection link will redirect to aws which has its own authorization.
+ # Keeping our bearer token will cause request rejection
+ # eg. Only one auth mechanism allowed; only the X-Amz-Algorithm query parameter,
+ # Signature query string parameter or the Authorization header should be specified.
+ def get_assets_download_redirection_url
+ return file_url unless file_url.starts_with?(github_assets_url_regex)
+
+ options[:follow_redirects] = false
+ response = Gitlab::HTTP.perform_request(Net::HTTP::Get, file_url, options)
+ raise_error("expected a redirect response, got #{response.code}") unless response.redirection?
+
+ redirection_url = response.headers[:location]
+ filename = URI.parse(redirection_url).path
+
+ unless Gitlab::GithubImport::Markdown::Attachment::MEDIA_TYPES.any? { |type| filename.ends_with?(type) }
+ raise UnsupportedAttachmentError
+ end
+
+ redirection_url
+ end
+
+ def github_assets_url_regex
+ %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url)}/.*/assets/}
+ end
+
+ def download_from(url)
file = File.open(filepath, 'wb')
- Gitlab::HTTP.perform_request(Net::HTTP::Get, file_url, stream_body: true) { |batch| file.write(batch) }
+ Gitlab::HTTP.perform_request(Net::HTTP::Get, url, stream_body: true) { |batch| file.write(batch) }
file
end
diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb
index 886563a6f69..23d4faa3dde 100644
--- a/lib/gitlab/github_import/client.rb
+++ b/lib/gitlab/github_import/client.rb
@@ -67,10 +67,18 @@ module Gitlab
end
# Returns the details of a GitHub user.
+ # 304 (Not Modified) status means the user is cached - API won't return user data.
#
- # username - The username of the user.
- def user(username)
- with_rate_limit { octokit.user(username).to_h }
+ # @param username[String] the username of the user.
+ # @param options[Hash] the optional parameters.
+ def user(username, options = {})
+ with_rate_limit do
+ user = octokit.user(username, options)
+
+ next if octokit.last_response&.status == 304
+
+ user.to_h
+ end
end
def pull_request_reviews(repo_name, iid)
diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb
index 266ee2938ba..26472b0d468 100644
--- a/lib/gitlab/github_import/importer/note_attachments_importer.rb
+++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb
@@ -4,14 +4,15 @@ module Gitlab
module GithubImport
module Importer
class NoteAttachmentsImporter
- attr_reader :note_text, :project
+ attr_reader :note_text, :project, :client
# note_text - An instance of `Gitlab::GithubImport::Representation::NoteText`.
# project - An instance of `Project`.
# client - An instance of `Gitlab::GithubImport::Client`.
- def initialize(note_text, project, _client = nil)
+ def initialize(note_text, project, client)
@note_text = note_text
@project = project
+ @client = client
end
def execute
@@ -33,7 +34,7 @@ module Gitlab
if attachment.part_of_project_blob?(project_import_source)
convert_project_content_link(attachment.url, project_import_source)
- elsif attachment.media? || attachment.doc_belongs_to_project?(project_import_source)
+ elsif attachment.media?(project_import_source) || attachment.doc_belongs_to_project?(project_import_source)
download_attachment(attachment)
else # url to other GitHub project
attachment.url
@@ -53,14 +54,24 @@ module Gitlab
# in: an instance of Gitlab::GithubImport::Markdown::Attachment
# out: gitlab attachment markdown url
def download_attachment(attachment)
- downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url)
+ downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options)
file = downloader.perform
uploader = UploadService.new(project, file, FileUploader).execute
uploader.to_h[:url]
+ rescue ::Gitlab::GithubImport::AttachmentsDownloader::UnsupportedAttachmentError
+ attachment.url
ensure
downloader&.delete
end
+ def options
+ {
+ headers: {
+ 'Authorization' => "Bearer #{client.octokit.access_token}"
+ }
+ }
+ end
+
def update_note_record(text)
case note_text.record_type
when ::Release.name
diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb
index e270cfba619..0d8f3196719 100644
--- a/lib/gitlab/github_import/markdown/attachment.rb
+++ b/lib/gitlab/github_import/markdown/attachment.rb
@@ -57,7 +57,8 @@ module Gitlab
def github_url?(url, docs: false, media: false)
if media
- url.start_with?(::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN)
+ url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url,
+ ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN)
elsif docs
url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url)
end
@@ -65,6 +66,9 @@ module Gitlab
def whitelisted_type?(url, docs: false, media: false)
if media
+ # We do not know the file extension type from the /assets markdown
+ return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url)
+
MEDIA_TYPES.any? { |type| url.end_with?(type) }
elsif docs
DOC_TYPES.any? { |type| url.end_with?(type) }
@@ -91,8 +95,11 @@ module Gitlab
)
end
- def media?
- url.start_with?(::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN)
+ def media?(import_source)
+ url.start_with?(
+ "#{::Gitlab::GithubImport::MarkdownText.github_url}/#{import_source}/assets",
+ ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN
+ )
end
def inspect
diff --git a/lib/gitlab/github_import/object_counter.rb b/lib/gitlab/github_import/object_counter.rb
index 7ee64b2abac..88e91800cee 100644
--- a/lib/gitlab/github_import/object_counter.rb
+++ b/lib/gitlab/github_import/object_counter.rb
@@ -14,6 +14,8 @@ module Gitlab
CACHING = Gitlab::Cache::Import::Caching
+ IMPORT_CACHING_TIMEOUT = 2.weeks.to_i
+
class << self
# Increments the project and the global counters if the given value is >= 1
def increment(project, object_type, operation, value: 1)
@@ -36,7 +38,7 @@ module Gitlab
# After import is completed we store this information in project's import_checksums
return cached_summary if cached_summary != EMPTY_SUMMARY || project.import_state.blank?
- project.import_state.in_progress? ? cached_summary : project.import_checksums
+ project.import_state.completed? ? project.import_checksums : cached_summary
end
private
@@ -50,7 +52,7 @@ module Gitlab
.sort
.each do |counter|
object_type = counter.split('/').last
- result[operation][object_type] = CACHING.read_integer(counter)
+ result[operation][object_type] = CACHING.read_integer(counter) || 0
end
end
end
@@ -84,11 +86,11 @@ module Gitlab
add_counter_to_list(project, operation, counter_key)
- CACHING.increment_by(counter_key, value)
+ CACHING.increment_by(counter_key, value, timeout: IMPORT_CACHING_TIMEOUT)
end
def add_counter_to_list(project, operation, key)
- CACHING.set_add(counter_list_key(project, operation), key)
+ CACHING.set_add(counter_list_key(project, operation), key, timeout: IMPORT_CACHING_TIMEOUT)
end
def counter_list_key(project, operation)
diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb
index 57365ebe206..1832f071a44 100644
--- a/lib/gitlab/github_import/user_finder.rb
+++ b/lib/gitlab/github_import/user_finder.rb
@@ -28,6 +28,17 @@ module Gitlab
EMAIL_FOR_USERNAME_CACHE_KEY =
'github-import/user-finder/email-for-username/%s'
+ # The base cache key to use for caching the user ETAG response headers
+ USERNAME_ETAG_CACHE_KEY = 'github-import/user-finder/user-etag/%s'
+
+ # The base cache key to store whether an email has been fetched for a project
+ EMAIL_FETCHED_FOR_PROJECT_CACHE_KEY = 'github-import/user-finder/%{project}/email-fetched/%{username}'
+
+ EMAIL_API_CALL_LOGGING_MESSAGE = {
+ true => 'Fetching email from GitHub with ETAG header',
+ false => 'Fetching email from GitHub'
+ }.freeze
+
# project - An instance of `Project`
# client - An instance of `Gitlab::GithubImport::Client`
def initialize(project, client)
@@ -109,24 +120,39 @@ module Gitlab
id_for_github_id(id) || id_for_github_email(email)
end
- # Find the public email of a given username in GitHub. The public email is cached to avoid multiple calls to
- # GitHub. In case the username does not exist or the public email is nil, a blank value is cached to also prevent
- # multiple calls to GitHub.
+ # Find the public email of a given username in GitHub.
+ # The email is cached to avoid multiple calls to GitHub. The cache is shared among all projects.
+ # If the email was not found, a blank email is cached.
+ # If the email is blank, we attempt to fetch it from GitHub using an ETAG request once for every project.
+
+ # @param username [String] The username of the GitHub user.
#
# @return [String] If public email is found
# @return [Nil] If public email or username does not exist
def email_for_github_username(username)
- cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username
- email = Gitlab::Cache::Import::Caching.read(cache_key)
+ email = read_email_from_cache(username)
+
+ if email.blank? && !email_fetched_for_project?(username)
+ # If an ETAG is available, make an API call with the ETAG.
+ # Only make a rate-limited API call if the ETAG is not available and the email is nil.
+ etag = read_etag_from_cache(username)
+ email = fetch_email_from_github(username, etag: etag) || email
- if email.nil?
- user = client.user(username)
- email = Gitlab::Cache::Import::Caching.write(cache_key, user[:email].to_s, timeout: timeout(user[:email]))
+ cache_email!(username, email)
+ cache_etag!(username) if email.blank? && etag.nil?
+
+ # If a non-blank email is cached, we don't need the ETAG or project check caches.
+ # Otherwise, indicate that the project has been checked.
+ if email.present?
+ clear_caches!(username)
+ else
+ set_project_as_checked!(username)
+ end
end
email.presence
rescue ::Octokit::NotFound
- Gitlab::Cache::Import::Caching.write(cache_key, '')
+ cache_email!(username, '')
nil
end
@@ -192,12 +218,66 @@ module Gitlab
private
- def timeout(email)
- if email
- Gitlab::Cache::Import::Caching::TIMEOUT
- else
- Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT
- end
+ def read_email_from_cache(username)
+ Gitlab::Cache::Import::Caching.read(email_cache_key(username))
+ end
+
+ def read_etag_from_cache(username)
+ Gitlab::Cache::Import::Caching.read(etag_cache_key(username))
+ end
+
+ def email_fetched_for_project?(username)
+ email_fetched_for_project_cache_key = email_fetched_for_project_cache_key(username)
+ Gitlab::Cache::Import::Caching.read(email_fetched_for_project_cache_key)
+ end
+
+ def fetch_email_from_github(username, etag: nil)
+ log(EMAIL_API_CALL_LOGGING_MESSAGE[etag.present?], username: username)
+ user = client.user(username, { headers: { 'If-None-Match' => etag }.compact })
+
+ user[:email] || '' if user
+ end
+
+ def cache_email!(username, email)
+ return unless email
+
+ Gitlab::Cache::Import::Caching.write(email_cache_key(username), email)
+ end
+
+ def cache_etag!(username)
+ etag = client.octokit.last_response.headers[:etag]
+ Gitlab::Cache::Import::Caching.write(etag_cache_key(username), etag)
+ end
+
+ def set_project_as_checked!(username)
+ Gitlab::Cache::Import::Caching.write(email_fetched_for_project_cache_key(username), 1)
+ end
+
+ def clear_caches!(username)
+ Gitlab::Cache::Import::Caching.expire(etag_cache_key(username), 0)
+ Gitlab::Cache::Import::Caching.expire(email_fetched_for_project_cache_key(username), 0)
+ end
+
+ def email_cache_key(username)
+ EMAIL_FOR_USERNAME_CACHE_KEY % username
+ end
+
+ def etag_cache_key(username)
+ USERNAME_ETAG_CACHE_KEY % username
+ end
+
+ def email_fetched_for_project_cache_key(username)
+ format(EMAIL_FETCHED_FOR_PROJECT_CACHE_KEY, project: project.id, username: username)
+ end
+
+ def log(message, username: nil)
+ Gitlab::Import::Logger.info(
+ import_type: :github,
+ project_id: project.id,
+ class: self.class.name,
+ username: username,
+ message: message
+ )
end
end
end