Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/github_import')
-rw-r--r--lib/gitlab/github_import/bulk_importing.rb43
-rw-r--r--lib/gitlab/github_import/importer/diff_note_importer.rb2
-rw-r--r--lib/gitlab/github_import/importer/issue_importer.rb2
-rw-r--r--lib/gitlab/github_import/importer/label_links_importer.rb2
-rw-r--r--lib/gitlab/github_import/importer/labels_importer.rb14
-rw-r--r--lib/gitlab/github_import/importer/lfs_objects_importer.rb6
-rw-r--r--lib/gitlab/github_import/importer/milestones_importer.rb14
-rw-r--r--lib/gitlab/github_import/importer/note_importer.rb2
-rw-r--r--lib/gitlab/github_import/importer/pull_requests_importer.rb6
-rw-r--r--lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb43
-rw-r--r--lib/gitlab/github_import/importer/releases_importer.rb14
-rw-r--r--lib/gitlab/github_import/importer/repository_importer.rb12
-rw-r--r--lib/gitlab/github_import/logger.rb11
-rw-r--r--lib/gitlab/github_import/object_counter.rb31
-rw-r--r--lib/gitlab/github_import/parallel_scheduling.rb35
-rw-r--r--lib/gitlab/github_import/user_finder.rb14
16 files changed, 132 insertions, 119 deletions
diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb
index 0d448b55104..80f8f8bfbe2 100644
--- a/lib/gitlab/github_import/bulk_importing.rb
+++ b/lib/gitlab/github_import/bulk_importing.rb
@@ -3,23 +3,60 @@
module Gitlab
module GithubImport
module BulkImporting
+ attr_reader :project, :client
+
+ # project - An instance of `Project`.
+ # client - An instance of `Gitlab::GithubImport::Client`.
+ def initialize(project, client)
+ @project = project
+ @client = client
+ end
+
# Builds and returns an Array of objects to bulk insert into the
# database.
#
# enum - An Enumerable that returns the objects to turn into database
# rows.
def build_database_rows(enum)
- enum.each_with_object([]) do |(object, _), rows|
- rows << build(object) unless already_imported?(object)
+ rows = enum.each_with_object([]) do |(object, _), result|
+ result << build(object) unless already_imported?(object)
end
+
+ log_and_increment_counter(rows.size, :fetched)
+
+ rows
end
# Bulk inserts the given rows into the database.
def bulk_insert(model, rows, batch_size: 100)
rows.each_slice(batch_size) do |slice|
- Gitlab::Database.bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert
+ Gitlab::Database.main.bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert
+
+ log_and_increment_counter(slice.size, :imported)
end
end
+
+ def object_type
+ raise NotImplementedError
+ end
+
+ private
+
+ def log_and_increment_counter(value, operation)
+ Gitlab::Import::Logger.info(
+ import_type: :github,
+ project_id: project.id,
+ importer: self.class.name,
+ message: "#{value} #{object_type.to_s.pluralize} #{operation}"
+ )
+
+ Gitlab::GithubImport::ObjectCounter.increment(
+ project,
+ object_type,
+ operation,
+ value: value
+ )
+ end
end
end
end
diff --git a/lib/gitlab/github_import/importer/diff_note_importer.rb b/lib/gitlab/github_import/importer/diff_note_importer.rb
index d2f5af63621..9bda066efcc 100644
--- a/lib/gitlab/github_import/importer/diff_note_importer.rb
+++ b/lib/gitlab/github_import/importer/diff_note_importer.rb
@@ -46,7 +46,7 @@ module Gitlab
# To work around this we're using bulk_insert with a single row. This
# allows us to efficiently insert data (even if it's just 1 row)
# without having to use all sorts of hacks to disable callbacks.
- Gitlab::Database.bulk_insert(LegacyDiffNote.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert
+ Gitlab::Database.main.bulk_insert(LegacyDiffNote.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert
rescue ActiveRecord::InvalidForeignKey
# It's possible the project and the issue have been deleted since
# scheduling this job. In this case we'll just skip creating the note.
diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb
index 13061d2c9df..f8665676ccf 100644
--- a/lib/gitlab/github_import/importer/issue_importer.rb
+++ b/lib/gitlab/github_import/importer/issue_importer.rb
@@ -75,7 +75,7 @@ module Gitlab
end
end
- Gitlab::Database.bulk_insert(IssueAssignee.table_name, assignees) # rubocop:disable Gitlab/BulkInsert
+ Gitlab::Database.main.bulk_insert(IssueAssignee.table_name, assignees) # rubocop:disable Gitlab/BulkInsert
end
end
end
diff --git a/lib/gitlab/github_import/importer/label_links_importer.rb b/lib/gitlab/github_import/importer/label_links_importer.rb
index 77eb4542195..b608bb48e38 100644
--- a/lib/gitlab/github_import/importer/label_links_importer.rb
+++ b/lib/gitlab/github_import/importer/label_links_importer.rb
@@ -40,7 +40,7 @@ module Gitlab
}
end
- Gitlab::Database.bulk_insert(LabelLink.table_name, rows) # rubocop:disable Gitlab/BulkInsert
+ Gitlab::Database.main.bulk_insert(LabelLink.table_name, rows) # rubocop:disable Gitlab/BulkInsert
end
def find_target_id
diff --git a/lib/gitlab/github_import/importer/labels_importer.rb b/lib/gitlab/github_import/importer/labels_importer.rb
index 80246fa1b77..7293de56a9a 100644
--- a/lib/gitlab/github_import/importer/labels_importer.rb
+++ b/lib/gitlab/github_import/importer/labels_importer.rb
@@ -6,15 +6,9 @@ module Gitlab
class LabelsImporter
include BulkImporting
- attr_reader :project, :client, :existing_labels
-
- # project - An instance of `Project`.
- # client - An instance of `Gitlab::GithubImport::Client`.
# rubocop: disable CodeReuse/ActiveRecord
- def initialize(project, client)
- @project = project
- @client = client
- @existing_labels = project.labels.pluck(:title).to_set
+ def existing_labels
+ @existing_labels ||= project.labels.pluck(:title).to_set
end
# rubocop: enable CodeReuse/ActiveRecord
@@ -51,6 +45,10 @@ module Gitlab
def each_label
client.labels(project.import_source)
end
+
+ def object_type
+ :label
+ end
end
end
end
diff --git a/lib/gitlab/github_import/importer/lfs_objects_importer.rb b/lib/gitlab/github_import/importer/lfs_objects_importer.rb
index 40248ecbd31..775afd5f53a 100644
--- a/lib/gitlab/github_import/importer/lfs_objects_importer.rb
+++ b/lib/gitlab/github_import/importer/lfs_objects_importer.rb
@@ -35,7 +35,11 @@ module Gitlab
yield object
end
rescue StandardError => e
- error(project.id, e)
+ Gitlab::Import::ImportFailureService.track(
+ project_id: project.id,
+ error_source: importer_class.name,
+ exception: e
+ )
end
end
end
diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb
index 71ff7465d9b..d11b151bbe2 100644
--- a/lib/gitlab/github_import/importer/milestones_importer.rb
+++ b/lib/gitlab/github_import/importer/milestones_importer.rb
@@ -6,15 +6,9 @@ module Gitlab
class MilestonesImporter
include BulkImporting
- attr_reader :project, :client, :existing_milestones
-
- # project - An instance of `Project`
- # client - An instance of `Gitlab::GithubImport::Client`
# rubocop: disable CodeReuse/ActiveRecord
- def initialize(project, client)
- @project = project
- @client = client
- @existing_milestones = project.milestones.pluck(:iid).to_set
+ def existing_milestones
+ @existing_milestones ||= project.milestones.pluck(:iid).to_set
end
# rubocop: enable CodeReuse/ActiveRecord
@@ -55,6 +49,10 @@ module Gitlab
def each_milestone
client.milestones(project.import_source, state: 'all')
end
+
+ def object_type
+ :milestone
+ end
end
end
end
diff --git a/lib/gitlab/github_import/importer/note_importer.rb b/lib/gitlab/github_import/importer/note_importer.rb
index ae9996d81ef..1fd42a69fac 100644
--- a/lib/gitlab/github_import/importer/note_importer.rb
+++ b/lib/gitlab/github_import/importer/note_importer.rb
@@ -37,7 +37,7 @@ module Gitlab
# We're using bulk_insert here so we can bypass any validations and
# callbacks. Running these would result in a lot of unnecessary SQL
# queries being executed when importing large projects.
- Gitlab::Database.bulk_insert(Note.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert
+ Gitlab::Database.main.bulk_insert(Note.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert
rescue ActiveRecord::InvalidForeignKey
# It's possible the project and the issue have been deleted since
# scheduling this job. In this case we'll just skip creating the note.
diff --git a/lib/gitlab/github_import/importer/pull_requests_importer.rb b/lib/gitlab/github_import/importer/pull_requests_importer.rb
index b2f099761b1..2812fbd3dfe 100644
--- a/lib/gitlab/github_import/importer/pull_requests_importer.rb
+++ b/lib/gitlab/github_import/importer/pull_requests_importer.rb
@@ -40,11 +40,7 @@ module Gitlab
# updating the timestamp.
project.update_column(:last_repository_updated_at, Time.zone.now)
- if Feature.enabled?(:fetch_remote_params, project, default_enabled: :yaml)
- project.repository.fetch_remote('github', url: project.import_url, refmap: Gitlab::GithubImport.refmap, forced: false)
- else
- project.repository.fetch_remote('github', forced: false)
- end
+ project.repository.fetch_remote(project.import_url, refmap: Gitlab::GithubImport.refmap, forced: false)
pname = project.path_with_namespace
diff --git a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb
index e389acbf877..bd65eb5899c 100644
--- a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb
+++ b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb
@@ -37,43 +37,6 @@ module Gitlab
review.id
end
- def each_object_to_import(&block)
- if use_github_review_importer_query_only_unimported_merge_requests?
- each_merge_request_to_import(&block)
- else
- each_merge_request_skipping_imported(&block)
- end
- end
-
- private
-
- attr_reader :merge_requests_already_imported_cache_key
-
- # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/62036#note_587181108
- def use_github_review_importer_query_only_unimported_merge_requests?
- Feature.enabled?(
- :github_review_importer_query_only_unimported_merge_requests,
- default_enabled: :yaml
- )
- end
-
- def each_merge_request_skipping_imported
- project.merge_requests.find_each do |merge_request|
- next if already_imported?(merge_request)
-
- Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
-
- client
- .pull_request_reviews(project.import_source, merge_request.iid)
- .each do |review|
- review.merge_request_id = merge_request.id
- yield(review)
- end
-
- mark_as_imported(merge_request)
- end
- end
-
# The worker can be interrupted, by rate limit for instance,
# in different situations. To avoid requesting already imported data,
# if the worker is interrupted:
@@ -82,7 +45,7 @@ module Gitlab
# - before importing all merge requests reviews
# Merge requests that had all the reviews imported are cached with
# `mark_merge_request_reviews_imported`
- def each_merge_request_to_import
+ def each_object_to_import(&block)
each_review_page do |page, merge_request|
page.objects.each do |review|
next if already_imported?(review)
@@ -97,6 +60,10 @@ module Gitlab
end
end
+ private
+
+ attr_reader :merge_requests_already_imported_cache_key
+
def each_review_page
merge_requests_to_import.find_each do |merge_request|
# The page counter needs to be scoped by merge request to avoid skipping
diff --git a/lib/gitlab/github_import/importer/releases_importer.rb b/lib/gitlab/github_import/importer/releases_importer.rb
index a3734ccf069..c1fbd868800 100644
--- a/lib/gitlab/github_import/importer/releases_importer.rb
+++ b/lib/gitlab/github_import/importer/releases_importer.rb
@@ -6,15 +6,9 @@ module Gitlab
class ReleasesImporter
include BulkImporting
- attr_reader :project, :client, :existing_tags
-
- # project - An instance of `Project`
- # client - An instance of `Gitlab::GithubImport::Client`
# rubocop: disable CodeReuse/ActiveRecord
- def initialize(project, client)
- @project = project
- @client = client
- @existing_tags = project.releases.pluck(:tag).to_set
+ def existing_tags
+ @existing_tags ||= project.releases.pluck(:tag).to_set
end
# rubocop: enable CodeReuse/ActiveRecord
@@ -50,6 +44,10 @@ module Gitlab
def description_for(release)
release.body.presence || "Release for tag #{release.tag_name}"
end
+
+ def object_type
+ :release
+ end
end
end
end
diff --git a/lib/gitlab/github_import/importer/repository_importer.rb b/lib/gitlab/github_import/importer/repository_importer.rb
index 1401c92a44e..20068a33019 100644
--- a/lib/gitlab/github_import/importer/repository_importer.rb
+++ b/lib/gitlab/github_import/importer/repository_importer.rb
@@ -50,7 +50,7 @@ module Gitlab
project.ensure_repository
refmap = Gitlab::GithubImport.refmap
- project.repository.fetch_as_mirror(project.import_url, refmap: refmap, forced: true, remote_name: 'github')
+ project.repository.fetch_as_mirror(project.import_url, refmap: refmap, forced: true)
project.change_head(default_branch) if default_branch
@@ -59,8 +59,6 @@ module Gitlab
Repositories::HousekeepingService.new(project, :gc).execute
true
- rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
- fail_import("Failed to import the repository: #{e.message}")
end
def import_wiki_repository
@@ -70,7 +68,8 @@ module Gitlab
rescue ::Gitlab::Git::CommandError => e
if e.message !~ /repository not exported/
project.create_wiki
- fail_import("Failed to import the wiki: #{e.message}")
+
+ raise e
else
true
end
@@ -84,11 +83,6 @@ module Gitlab
project.update_column(:last_repository_updated_at, Time.zone.now)
end
- def fail_import(message)
- project.import_state.mark_as_failed(message)
- false
- end
-
private
def default_branch
diff --git a/lib/gitlab/github_import/logger.rb b/lib/gitlab/github_import/logger.rb
new file mode 100644
index 00000000000..980aa0a7982
--- /dev/null
+++ b/lib/gitlab/github_import/logger.rb
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module GithubImport
+ class Logger < ::Gitlab::Import::Logger
+ def default_attributes
+ super.merge(import_type: :github)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/github_import/object_counter.rb b/lib/gitlab/github_import/object_counter.rb
index e4835504c2d..4c9a8da601f 100644
--- a/lib/gitlab/github_import/object_counter.rb
+++ b/lib/gitlab/github_import/object_counter.rb
@@ -14,11 +14,16 @@ module Gitlab
CACHING = Gitlab::Cache::Import::Caching
class << self
- def increment(project, object_type, operation)
+ # Increments the project and the global counters if the given value is >= 1
+ def increment(project, object_type, operation, value: 1)
+ integer = value.to_i
+
+ return if integer <= 0
+
validate_operation!(operation)
- increment_project_counter(project, object_type, operation)
- increment_global_counter(object_type, operation)
+ increment_project_counter(project, object_type, operation, integer)
+ increment_global_counter(object_type, operation, integer)
end
def summary(project)
@@ -41,7 +46,7 @@ module Gitlab
# and it's used to report the health of the Github Importer
# in the Grafana Dashboard
# https://dashboards.gitlab.net/d/2zgM_rImz/github-importer?orgId=1
- def increment_global_counter(object_type, operation)
+ def increment_global_counter(object_type, operation, value)
key = GLOBAL_COUNTER_KEY % {
operation: operation,
object_type: object_type
@@ -51,18 +56,26 @@ module Gitlab
object_type: object_type.to_s.humanize
}
- Gitlab::Metrics.counter(key.to_sym, description).increment
+ Gitlab::Metrics.counter(key.to_sym, description).increment(by: value)
end
# Project counters are short lived, in Redis,
# and it's used to report how successful a project
# import was with the #summary method.
- def increment_project_counter(project, object_type, operation)
- counter_key = PROJECT_COUNTER_KEY % { project: project.id, operation: operation, object_type: object_type }
+ def increment_project_counter(project, object_type, operation, value)
+ counter_key = PROJECT_COUNTER_KEY % {
+ project: project.id,
+ operation: operation,
+ object_type: object_type
+ }
add_counter_to_list(project, operation, counter_key)
- CACHING.increment(counter_key)
+ if Feature.disabled?(:import_redis_increment_by, default_enabled: :yaml)
+ CACHING.increment(counter_key)
+ else
+ CACHING.increment_by(counter_key, value)
+ end
end
def add_counter_to_list(project, operation, key)
@@ -75,7 +88,7 @@ module Gitlab
def validate_operation!(operation)
unless operation.to_s.presence_in(OPERATIONS)
- raise ArgumentError, "Operation must be #{OPERATIONS.join(' or ')}"
+ raise ArgumentError, "operation must be #{OPERATIONS.join(' or ')}"
end
end
end
diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb
index 4598429d568..8c76f5a9d94 100644
--- a/lib/gitlab/github_import/parallel_scheduling.rb
+++ b/lib/gitlab/github_import/parallel_scheduling.rb
@@ -49,9 +49,14 @@ module Gitlab
retval
rescue StandardError => e
- error(project.id, e)
+ Gitlab::Import::ImportFailureService.track(
+ project_id: project.id,
+ error_source: self.class.name,
+ exception: e,
+ fail_import: abort_on_failure
+ )
- raise e
+ raise(e)
end
# Imports all the objects in sequence in the current thread.
@@ -165,6 +170,10 @@ module Gitlab
raise NotImplementedError
end
+ def abort_on_failure
+ false
+ end
+
# Any options to be passed to the method used for retrieving the data to
# import.
def collection_options
@@ -174,36 +183,16 @@ module Gitlab
private
def info(project_id, extra = {})
- logger.info(log_attributes(project_id, extra))
- end
-
- def error(project_id, exception)
- logger.error(
- log_attributes(
- project_id,
- message: 'importer failed',
- 'error.message': exception.message
- )
- )
-
- Gitlab::ErrorTracking.track_exception(
- exception,
- log_attributes(project_id)
- )
+ Logger.info(log_attributes(project_id, extra))
end
def log_attributes(project_id, extra = {})
extra.merge(
- import_source: :github,
project_id: project_id,
importer: importer_class.name,
parallel: parallel?
)
end
-
- def logger
- @logger ||= Gitlab::Import::Logger.build
- end
end
end
end
diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb
index 058cd1ebd57..f583ef39d13 100644
--- a/lib/gitlab/github_import/user_finder.rb
+++ b/lib/gitlab/github_import/user_finder.rb
@@ -120,10 +120,18 @@ module Gitlab
read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email)
end
- # Queries and caches the GitLab user ID for a GitHub user ID, if one was
- # found.
+ # If importing from github.com, queries and caches the GitLab user ID for
+ # a GitHub user ID, if one was found.
+ #
+ # When importing from Github Enterprise, do not query user by Github ID
+ # since we only have users' Github ID from github.com.
def id_for_github_id(id)
- gitlab_id = query_id_for_github_id(id) || nil
+ gitlab_id =
+ if project.github_enterprise_import?
+ nil
+ else
+ query_id_for_github_id(id)
+ end
Gitlab::Cache::Import::Caching.write(ID_CACHE_KEY % id, gitlab_id)
end