Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab')
-rw-r--r--lib/gitlab/cache/import/caching.rb2
-rw-r--r--lib/gitlab/github_import.rb9
-rw-r--r--lib/gitlab/github_import/client.rb4
-rw-r--r--lib/gitlab/github_import/importer/single_endpoint_diff_notes_importer.rb54
-rw-r--r--lib/gitlab/github_import/importer/single_endpoint_issue_notes_importer.rb54
-rw-r--r--lib/gitlab/github_import/importer/single_endpoint_merge_request_notes_importer.rb54
-rw-r--r--lib/gitlab/github_import/issuable_finder.rb12
-rw-r--r--lib/gitlab/github_import/single_endpoint_notes_importing.rb85
8 files changed, 271 insertions, 3 deletions
diff --git a/lib/gitlab/cache/import/caching.rb b/lib/gitlab/cache/import/caching.rb
index 89c85cb50be..8cc4f213fc1 100644
--- a/lib/gitlab/cache/import/caching.rb
+++ b/lib/gitlab/cache/import/caching.rb
@@ -7,6 +7,8 @@ module Gitlab
# The default timeout of the cache keys.
TIMEOUT = 24.hours.to_i
+ LONGER_TIMEOUT = 72.hours.to_i
+
WRITE_IF_GREATER_SCRIPT = <<-EOF.strip_heredoc.freeze
local key, value, ttl = KEYS[1], tonumber(ARGV[1]), ARGV[2]
local existing = tonumber(redis.call("get", key))
diff --git a/lib/gitlab/github_import.rb b/lib/gitlab/github_import.rb
index c3cc15e10f7..7ac0d875512 100644
--- a/lib/gitlab/github_import.rb
+++ b/lib/gitlab/github_import.rb
@@ -11,6 +11,7 @@ module Gitlab
Client.new(
token_to_use,
host: host.presence || self.formatted_import_url(project),
+ per_page: self.per_page(project),
parallel: parallel
)
end
@@ -33,5 +34,13 @@ module Gitlab
url.to_s
end
end
+
+ def self.per_page(project)
+ if project.group.present? && Feature.enabled?(:github_importer_lower_per_page_limit, project.group, type: :ops, default_enabled: :yaml)
+ Gitlab::GithubImport::Client::LOWER_PER_PAGE
+ else
+ Gitlab::GithubImport::Client::DEFAULT_PER_PAGE
+ end
+ end
end
end
diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb
index 138716b1b53..efa816c5eb0 100644
--- a/lib/gitlab/github_import/client.rb
+++ b/lib/gitlab/github_import/client.rb
@@ -19,6 +19,8 @@ module Gitlab
attr_reader :octokit
SEARCH_MAX_REQUESTS_PER_MINUTE = 30
+ DEFAULT_PER_PAGE = 100
+ LOWER_PER_PAGE = 50
# A single page of data and the corresponding page number.
Page = Struct.new(:objects, :number)
@@ -44,7 +46,7 @@ module Gitlab
# this value to `true` for parallel importing is crucial as
# otherwise hitting the rate limit will result in a thread
# being blocked in a `sleep()` call for up to an hour.
- def initialize(token, host: nil, per_page: 100, parallel: true)
+ def initialize(token, host: nil, per_page: DEFAULT_PER_PAGE, parallel: true)
@host = host
@octokit = ::Octokit::Client.new(
access_token: token,
diff --git a/lib/gitlab/github_import/importer/single_endpoint_diff_notes_importer.rb b/lib/gitlab/github_import/importer/single_endpoint_diff_notes_importer.rb
new file mode 100644
index 00000000000..a2c3d1bd057
--- /dev/null
+++ b/lib/gitlab/github_import/importer/single_endpoint_diff_notes_importer.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+# This importer is used when `github_importer_single_endpoint_notes_import`
+# feature flag is on and replaces `DiffNotesImporter`.
+#
+# It fetches 1 PR's diff notes at a time using `pull_request_comments` endpoint, which is
+# slower than `NotesImporter` but it makes sure all notes are imported,
+# as it can sometimes not be the case for `NotesImporter`, because
+# `issues_comments` endpoint it uses can be limited by GitHub API
+# to not return all available pages.
+module Gitlab
+ module GithubImport
+ module Importer
+ class SingleEndpointDiffNotesImporter
+ include ParallelScheduling
+ include SingleEndpointNotesImporting
+
+ def importer_class
+ DiffNoteImporter
+ end
+
+ def representation_class
+ Representation::DiffNote
+ end
+
+ def sidekiq_worker_class
+ ImportDiffNoteWorker
+ end
+
+ def object_type
+ :diff_note
+ end
+
+ def collection_method
+ :pull_request_comments
+ end
+
+ private
+
+ def noteables
+ project.merge_requests.where.not(iid: already_imported_noteables) # rubocop: disable CodeReuse/ActiveRecord
+ end
+
+ def page_counter_id(merge_request)
+ "merge_request/#{merge_request.id}/#{collection_method}"
+ end
+
+ def notes_imported_cache_key
+ "github-importer/merge_request/diff_notes/already-imported/#{project.id}"
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/importer/single_endpoint_issue_notes_importer.rb b/lib/gitlab/github_import/importer/single_endpoint_issue_notes_importer.rb
new file mode 100644
index 00000000000..49569ed52d8
--- /dev/null
+++ b/lib/gitlab/github_import/importer/single_endpoint_issue_notes_importer.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+# This importer is used when `github_importer_single_endpoint_notes_import`
+# feature flag is on and replaces `IssuesImporter` issue notes import.
+#
+# It fetches 1 issue's comments at a time using `issue_comments` endpoint, which is
+# slower than `NotesImporter` but it makes sure all notes are imported,
+# as it can sometimes not be the case for `NotesImporter`, because
+# `issues_comments` endpoint it uses can be limited by GitHub API
+# to not return all available pages.
+module Gitlab
+ module GithubImport
+ module Importer
+ class SingleEndpointIssueNotesImporter
+ include ParallelScheduling
+ include SingleEndpointNotesImporting
+
+ def importer_class
+ NoteImporter
+ end
+
+ def representation_class
+ Representation::Note
+ end
+
+ def sidekiq_worker_class
+ ImportNoteWorker
+ end
+
+ def object_type
+ :note
+ end
+
+ def collection_method
+ :issue_comments
+ end
+
+ private
+
+ def noteables
+ project.issues.where.not(iid: already_imported_noteables) # rubocop: disable CodeReuse/ActiveRecord
+ end
+
+ def page_counter_id(issue)
+ "issue/#{issue.id}/#{collection_method}"
+ end
+
+ def notes_imported_cache_key
+ "github-importer/issue/notes/already-imported/#{project.id}"
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/importer/single_endpoint_merge_request_notes_importer.rb b/lib/gitlab/github_import/importer/single_endpoint_merge_request_notes_importer.rb
new file mode 100644
index 00000000000..d837639c14d
--- /dev/null
+++ b/lib/gitlab/github_import/importer/single_endpoint_merge_request_notes_importer.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+# This importer is used when `github_importer_single_endpoint_notes_import`
+# feature flag is on and replaces `NotesImporter` MR notes import.
+#
+# It fetches 1 PR's comments at a time using `issue_comments` endpoint, which is
+# slower than `NotesImporter` but it makes sure all notes are imported,
+# as it can sometimes not be the case for `NotesImporter`, because
+# `issues_comments` endpoint it uses can be limited by GitHub API
+# to not return all available pages.
+module Gitlab
+ module GithubImport
+ module Importer
+ class SingleEndpointMergeRequestNotesImporter
+ include ParallelScheduling
+ include SingleEndpointNotesImporting
+
+ def importer_class
+ NoteImporter
+ end
+
+ def representation_class
+ Representation::Note
+ end
+
+ def sidekiq_worker_class
+ ImportNoteWorker
+ end
+
+ def object_type
+ :note
+ end
+
+ def collection_method
+ :issue_comments
+ end
+
+ private
+
+ def noteables
+ project.merge_requests.where.not(iid: already_imported_noteables) # rubocop: disable CodeReuse/ActiveRecord
+ end
+
+ def page_counter_id(merge_request)
+ "merge_request/#{merge_request.id}/#{collection_method}"
+ end
+
+ def notes_imported_cache_key
+ "github-importer/merge_request/notes/already-imported/#{project.id}"
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/issuable_finder.rb b/lib/gitlab/github_import/issuable_finder.rb
index 136531505ea..5298a3d81ea 100644
--- a/lib/gitlab/github_import/issuable_finder.rb
+++ b/lib/gitlab/github_import/issuable_finder.rb
@@ -23,7 +23,7 @@ module Gitlab
#
# This method will return `nil` if no ID could be found.
def database_id
- val = Gitlab::Cache::Import::Caching.read(cache_key)
+ val = Gitlab::Cache::Import::Caching.read(cache_key, timeout: timeout)
val.to_i if val.present?
end
@@ -32,7 +32,7 @@ module Gitlab
#
# database_id - The ID of the corresponding database row.
def cache_database_id(database_id)
- Gitlab::Cache::Import::Caching.write(cache_key, database_id)
+ Gitlab::Cache::Import::Caching.write(cache_key, database_id, timeout: timeout)
end
private
@@ -76,6 +76,14 @@ module Gitlab
)
end
end
+
+ def timeout
+ if project.group.present? && ::Feature.enabled?(:github_importer_single_endpoint_notes_import, project.group, type: :ops, default_enabled: :yaml)
+ Gitlab::Cache::Import::Caching::LONGER_TIMEOUT
+ else
+ Gitlab::Cache::Import::Caching::TIMEOUT
+ end
+ end
end
end
end
diff --git a/lib/gitlab/github_import/single_endpoint_notes_importing.rb b/lib/gitlab/github_import/single_endpoint_notes_importing.rb
new file mode 100644
index 00000000000..43402ecd165
--- /dev/null
+++ b/lib/gitlab/github_import/single_endpoint_notes_importing.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+# This module is used in:
+# - SingleEndpointDiffNotesImporter
+# - SingleEndpointIssueNotesImporter
+# - SingleEndpointMergeRequestNotesImporter
+#
+# `github_importer_single_endpoint_notes_import`
+# feature flag is on.
+#
+# It fetches 1 PR's associated objects at a time using `issue_comments` or
+# `pull_request_comments` endpoint, which is slower than `NotesImporter`
+# but it makes sure all notes are imported, as it can sometimes not be
+# the case for `NotesImporter`, because `issues_comments` endpoint
+# it uses can be limited by GitHub API to not return all available pages.
+module Gitlab
+ module GithubImport
+ module SingleEndpointNotesImporting
+ BATCH_SIZE = 100
+
+ def each_object_to_import
+ each_notes_page do |page|
+ page.objects.each do |note|
+ next if already_imported?(note)
+
+ Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
+
+ yield(note)
+
+ mark_as_imported(note)
+ end
+ end
+ end
+
+ def id_for_already_imported_cache(note)
+ note.id
+ end
+
+ private
+
+ def each_notes_page
+ noteables.each_batch(of: BATCH_SIZE, column: :iid) do |batch|
+ batch.each do |noteable|
+ # The page counter needs to be scoped by noteable to avoid skipping
+ # pages of notes from already imported noteables.
+ page_counter = PageCounter.new(project, page_counter_id(noteable))
+ repo = project.import_source
+ options = collection_options.merge(page: page_counter.current)
+
+ client.each_page(collection_method, repo, noteable.iid, options) do |page|
+ next unless page_counter.set(page.number)
+
+ yield page
+ end
+
+ mark_notes_imported(noteable)
+ end
+ end
+ end
+
+ def mark_notes_imported(noteable)
+ Gitlab::Cache::Import::Caching.set_add(
+ notes_imported_cache_key,
+ noteable.iid
+ )
+ end
+
+ def already_imported_noteables
+ Gitlab::Cache::Import::Caching.values_from_set(notes_imported_cache_key)
+ end
+
+ def noteables
+ NotImplementedError
+ end
+
+ def notes_imported_cache_key
+ NotImplementedError
+ end
+
+ def page_counter_id(noteable)
+ NotImplementedError
+ end
+ end
+ end
+end