diff options
Diffstat (limited to 'lib/gitlab/github_import')
16 files changed, 132 insertions, 119 deletions
diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb index 0d448b55104..80f8f8bfbe2 100644 --- a/lib/gitlab/github_import/bulk_importing.rb +++ b/lib/gitlab/github_import/bulk_importing.rb @@ -3,23 +3,60 @@ module Gitlab module GithubImport module BulkImporting + attr_reader :project, :client + + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + def initialize(project, client) + @project = project + @client = client + end + # Builds and returns an Array of objects to bulk insert into the # database. # # enum - An Enumerable that returns the objects to turn into database # rows. def build_database_rows(enum) - enum.each_with_object([]) do |(object, _), rows| - rows << build(object) unless already_imported?(object) + rows = enum.each_with_object([]) do |(object, _), result| + result << build(object) unless already_imported?(object) end + + log_and_increment_counter(rows.size, :fetched) + + rows end # Bulk inserts the given rows into the database. def bulk_insert(model, rows, batch_size: 100) rows.each_slice(batch_size) do |slice| - Gitlab::Database.bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert + Gitlab::Database.main.bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert + + log_and_increment_counter(slice.size, :imported) end end + + def object_type + raise NotImplementedError + end + + private + + def log_and_increment_counter(value, operation) + Gitlab::Import::Logger.info( + import_type: :github, + project_id: project.id, + importer: self.class.name, + message: "#{value} #{object_type.to_s.pluralize} #{operation}" + ) + + Gitlab::GithubImport::ObjectCounter.increment( + project, + object_type, + operation, + value: value + ) + end end end end diff --git a/lib/gitlab/github_import/importer/diff_note_importer.rb b/lib/gitlab/github_import/importer/diff_note_importer.rb index d2f5af63621..9bda066efcc 100644 --- a/lib/gitlab/github_import/importer/diff_note_importer.rb +++ b/lib/gitlab/github_import/importer/diff_note_importer.rb @@ -46,7 +46,7 @@ module Gitlab # To work around this we're using bulk_insert with a single row. This # allows us to efficiently insert data (even if it's just 1 row) # without having to use all sorts of hacks to disable callbacks. - Gitlab::Database.bulk_insert(LegacyDiffNote.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert + Gitlab::Database.main.bulk_insert(LegacyDiffNote.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert rescue ActiveRecord::InvalidForeignKey # It's possible the project and the issue have been deleted since # scheduling this job. In this case we'll just skip creating the note. diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb index 13061d2c9df..f8665676ccf 100644 --- a/lib/gitlab/github_import/importer/issue_importer.rb +++ b/lib/gitlab/github_import/importer/issue_importer.rb @@ -75,7 +75,7 @@ module Gitlab end end - Gitlab::Database.bulk_insert(IssueAssignee.table_name, assignees) # rubocop:disable Gitlab/BulkInsert + Gitlab::Database.main.bulk_insert(IssueAssignee.table_name, assignees) # rubocop:disable Gitlab/BulkInsert end end end diff --git a/lib/gitlab/github_import/importer/label_links_importer.rb b/lib/gitlab/github_import/importer/label_links_importer.rb index 77eb4542195..b608bb48e38 100644 --- a/lib/gitlab/github_import/importer/label_links_importer.rb +++ b/lib/gitlab/github_import/importer/label_links_importer.rb @@ -40,7 +40,7 @@ module Gitlab } end - Gitlab::Database.bulk_insert(LabelLink.table_name, rows) # rubocop:disable Gitlab/BulkInsert + Gitlab::Database.main.bulk_insert(LabelLink.table_name, rows) # rubocop:disable Gitlab/BulkInsert end def find_target_id diff --git a/lib/gitlab/github_import/importer/labels_importer.rb b/lib/gitlab/github_import/importer/labels_importer.rb index 80246fa1b77..7293de56a9a 100644 --- a/lib/gitlab/github_import/importer/labels_importer.rb +++ b/lib/gitlab/github_import/importer/labels_importer.rb @@ -6,15 +6,9 @@ module Gitlab class LabelsImporter include BulkImporting - attr_reader :project, :client, :existing_labels - - # project - An instance of `Project`. - # client - An instance of `Gitlab::GithubImport::Client`. # rubocop: disable CodeReuse/ActiveRecord - def initialize(project, client) - @project = project - @client = client - @existing_labels = project.labels.pluck(:title).to_set + def existing_labels + @existing_labels ||= project.labels.pluck(:title).to_set end # rubocop: enable CodeReuse/ActiveRecord @@ -51,6 +45,10 @@ module Gitlab def each_label client.labels(project.import_source) end + + def object_type + :label + end end end end diff --git a/lib/gitlab/github_import/importer/lfs_objects_importer.rb b/lib/gitlab/github_import/importer/lfs_objects_importer.rb index 40248ecbd31..775afd5f53a 100644 --- a/lib/gitlab/github_import/importer/lfs_objects_importer.rb +++ b/lib/gitlab/github_import/importer/lfs_objects_importer.rb @@ -35,7 +35,11 @@ module Gitlab yield object end rescue StandardError => e - error(project.id, e) + Gitlab::Import::ImportFailureService.track( + project_id: project.id, + error_source: importer_class.name, + exception: e + ) end end end diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb index 71ff7465d9b..d11b151bbe2 100644 --- a/lib/gitlab/github_import/importer/milestones_importer.rb +++ b/lib/gitlab/github_import/importer/milestones_importer.rb @@ -6,15 +6,9 @@ module Gitlab class MilestonesImporter include BulkImporting - attr_reader :project, :client, :existing_milestones - - # project - An instance of `Project` - # client - An instance of `Gitlab::GithubImport::Client` # rubocop: disable CodeReuse/ActiveRecord - def initialize(project, client) - @project = project - @client = client - @existing_milestones = project.milestones.pluck(:iid).to_set + def existing_milestones + @existing_milestones ||= project.milestones.pluck(:iid).to_set end # rubocop: enable CodeReuse/ActiveRecord @@ -55,6 +49,10 @@ module Gitlab def each_milestone client.milestones(project.import_source, state: 'all') end + + def object_type + :milestone + end end end end diff --git a/lib/gitlab/github_import/importer/note_importer.rb b/lib/gitlab/github_import/importer/note_importer.rb index ae9996d81ef..1fd42a69fac 100644 --- a/lib/gitlab/github_import/importer/note_importer.rb +++ b/lib/gitlab/github_import/importer/note_importer.rb @@ -37,7 +37,7 @@ module Gitlab # We're using bulk_insert here so we can bypass any validations and # callbacks. Running these would result in a lot of unnecessary SQL # queries being executed when importing large projects. - Gitlab::Database.bulk_insert(Note.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert + Gitlab::Database.main.bulk_insert(Note.table_name, [attributes]) # rubocop:disable Gitlab/BulkInsert rescue ActiveRecord::InvalidForeignKey # It's possible the project and the issue have been deleted since # scheduling this job. In this case we'll just skip creating the note. diff --git a/lib/gitlab/github_import/importer/pull_requests_importer.rb b/lib/gitlab/github_import/importer/pull_requests_importer.rb index b2f099761b1..2812fbd3dfe 100644 --- a/lib/gitlab/github_import/importer/pull_requests_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests_importer.rb @@ -40,11 +40,7 @@ module Gitlab # updating the timestamp. project.update_column(:last_repository_updated_at, Time.zone.now) - if Feature.enabled?(:fetch_remote_params, project, default_enabled: :yaml) - project.repository.fetch_remote('github', url: project.import_url, refmap: Gitlab::GithubImport.refmap, forced: false) - else - project.repository.fetch_remote('github', forced: false) - end + project.repository.fetch_remote(project.import_url, refmap: Gitlab::GithubImport.refmap, forced: false) pname = project.path_with_namespace diff --git a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb index e389acbf877..bd65eb5899c 100644 --- a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb @@ -37,43 +37,6 @@ module Gitlab review.id end - def each_object_to_import(&block) - if use_github_review_importer_query_only_unimported_merge_requests? - each_merge_request_to_import(&block) - else - each_merge_request_skipping_imported(&block) - end - end - - private - - attr_reader :merge_requests_already_imported_cache_key - - # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/62036#note_587181108 - def use_github_review_importer_query_only_unimported_merge_requests? - Feature.enabled?( - :github_review_importer_query_only_unimported_merge_requests, - default_enabled: :yaml - ) - end - - def each_merge_request_skipping_imported - project.merge_requests.find_each do |merge_request| - next if already_imported?(merge_request) - - Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) - - client - .pull_request_reviews(project.import_source, merge_request.iid) - .each do |review| - review.merge_request_id = merge_request.id - yield(review) - end - - mark_as_imported(merge_request) - end - end - # The worker can be interrupted, by rate limit for instance, # in different situations. To avoid requesting already imported data, # if the worker is interrupted: @@ -82,7 +45,7 @@ module Gitlab # - before importing all merge requests reviews # Merge requests that had all the reviews imported are cached with # `mark_merge_request_reviews_imported` - def each_merge_request_to_import + def each_object_to_import(&block) each_review_page do |page, merge_request| page.objects.each do |review| next if already_imported?(review) @@ -97,6 +60,10 @@ module Gitlab end end + private + + attr_reader :merge_requests_already_imported_cache_key + def each_review_page merge_requests_to_import.find_each do |merge_request| # The page counter needs to be scoped by merge request to avoid skipping diff --git a/lib/gitlab/github_import/importer/releases_importer.rb b/lib/gitlab/github_import/importer/releases_importer.rb index a3734ccf069..c1fbd868800 100644 --- a/lib/gitlab/github_import/importer/releases_importer.rb +++ b/lib/gitlab/github_import/importer/releases_importer.rb @@ -6,15 +6,9 @@ module Gitlab class ReleasesImporter include BulkImporting - attr_reader :project, :client, :existing_tags - - # project - An instance of `Project` - # client - An instance of `Gitlab::GithubImport::Client` # rubocop: disable CodeReuse/ActiveRecord - def initialize(project, client) - @project = project - @client = client - @existing_tags = project.releases.pluck(:tag).to_set + def existing_tags + @existing_tags ||= project.releases.pluck(:tag).to_set end # rubocop: enable CodeReuse/ActiveRecord @@ -50,6 +44,10 @@ module Gitlab def description_for(release) release.body.presence || "Release for tag #{release.tag_name}" end + + def object_type + :release + end end end end diff --git a/lib/gitlab/github_import/importer/repository_importer.rb b/lib/gitlab/github_import/importer/repository_importer.rb index 1401c92a44e..20068a33019 100644 --- a/lib/gitlab/github_import/importer/repository_importer.rb +++ b/lib/gitlab/github_import/importer/repository_importer.rb @@ -50,7 +50,7 @@ module Gitlab project.ensure_repository refmap = Gitlab::GithubImport.refmap - project.repository.fetch_as_mirror(project.import_url, refmap: refmap, forced: true, remote_name: 'github') + project.repository.fetch_as_mirror(project.import_url, refmap: refmap, forced: true) project.change_head(default_branch) if default_branch @@ -59,8 +59,6 @@ module Gitlab Repositories::HousekeepingService.new(project, :gc).execute true - rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e - fail_import("Failed to import the repository: #{e.message}") end def import_wiki_repository @@ -70,7 +68,8 @@ module Gitlab rescue ::Gitlab::Git::CommandError => e if e.message !~ /repository not exported/ project.create_wiki - fail_import("Failed to import the wiki: #{e.message}") + + raise e else true end @@ -84,11 +83,6 @@ module Gitlab project.update_column(:last_repository_updated_at, Time.zone.now) end - def fail_import(message) - project.import_state.mark_as_failed(message) - false - end - private def default_branch diff --git a/lib/gitlab/github_import/logger.rb b/lib/gitlab/github_import/logger.rb new file mode 100644 index 00000000000..980aa0a7982 --- /dev/null +++ b/lib/gitlab/github_import/logger.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class Logger < ::Gitlab::Import::Logger + def default_attributes + super.merge(import_type: :github) + end + end + end +end diff --git a/lib/gitlab/github_import/object_counter.rb b/lib/gitlab/github_import/object_counter.rb index e4835504c2d..4c9a8da601f 100644 --- a/lib/gitlab/github_import/object_counter.rb +++ b/lib/gitlab/github_import/object_counter.rb @@ -14,11 +14,16 @@ module Gitlab CACHING = Gitlab::Cache::Import::Caching class << self - def increment(project, object_type, operation) + # Increments the project and the global counters if the given value is >= 1 + def increment(project, object_type, operation, value: 1) + integer = value.to_i + + return if integer <= 0 + validate_operation!(operation) - increment_project_counter(project, object_type, operation) - increment_global_counter(object_type, operation) + increment_project_counter(project, object_type, operation, integer) + increment_global_counter(object_type, operation, integer) end def summary(project) @@ -41,7 +46,7 @@ module Gitlab # and it's used to report the health of the Github Importer # in the Grafana Dashboard # https://dashboards.gitlab.net/d/2zgM_rImz/github-importer?orgId=1 - def increment_global_counter(object_type, operation) + def increment_global_counter(object_type, operation, value) key = GLOBAL_COUNTER_KEY % { operation: operation, object_type: object_type @@ -51,18 +56,26 @@ module Gitlab object_type: object_type.to_s.humanize } - Gitlab::Metrics.counter(key.to_sym, description).increment + Gitlab::Metrics.counter(key.to_sym, description).increment(by: value) end # Project counters are short lived, in Redis, # and it's used to report how successful a project # import was with the #summary method. - def increment_project_counter(project, object_type, operation) - counter_key = PROJECT_COUNTER_KEY % { project: project.id, operation: operation, object_type: object_type } + def increment_project_counter(project, object_type, operation, value) + counter_key = PROJECT_COUNTER_KEY % { + project: project.id, + operation: operation, + object_type: object_type + } add_counter_to_list(project, operation, counter_key) - CACHING.increment(counter_key) + if Feature.disabled?(:import_redis_increment_by, default_enabled: :yaml) + CACHING.increment(counter_key) + else + CACHING.increment_by(counter_key, value) + end end def add_counter_to_list(project, operation, key) @@ -75,7 +88,7 @@ module Gitlab def validate_operation!(operation) unless operation.to_s.presence_in(OPERATIONS) - raise ArgumentError, "Operation must be #{OPERATIONS.join(' or ')}" + raise ArgumentError, "operation must be #{OPERATIONS.join(' or ')}" end end end diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb index 4598429d568..8c76f5a9d94 100644 --- a/lib/gitlab/github_import/parallel_scheduling.rb +++ b/lib/gitlab/github_import/parallel_scheduling.rb @@ -49,9 +49,14 @@ module Gitlab retval rescue StandardError => e - error(project.id, e) + Gitlab::Import::ImportFailureService.track( + project_id: project.id, + error_source: self.class.name, + exception: e, + fail_import: abort_on_failure + ) - raise e + raise(e) end # Imports all the objects in sequence in the current thread. @@ -165,6 +170,10 @@ module Gitlab raise NotImplementedError end + def abort_on_failure + false + end + # Any options to be passed to the method used for retrieving the data to # import. def collection_options @@ -174,36 +183,16 @@ module Gitlab private def info(project_id, extra = {}) - logger.info(log_attributes(project_id, extra)) - end - - def error(project_id, exception) - logger.error( - log_attributes( - project_id, - message: 'importer failed', - 'error.message': exception.message - ) - ) - - Gitlab::ErrorTracking.track_exception( - exception, - log_attributes(project_id) - ) + Logger.info(log_attributes(project_id, extra)) end def log_attributes(project_id, extra = {}) extra.merge( - import_source: :github, project_id: project_id, importer: importer_class.name, parallel: parallel? ) end - - def logger - @logger ||= Gitlab::Import::Logger.build - end end end end diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb index 058cd1ebd57..f583ef39d13 100644 --- a/lib/gitlab/github_import/user_finder.rb +++ b/lib/gitlab/github_import/user_finder.rb @@ -120,10 +120,18 @@ module Gitlab read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email) end - # Queries and caches the GitLab user ID for a GitHub user ID, if one was - # found. + # If importing from github.com, queries and caches the GitLab user ID for + # a GitHub user ID, if one was found. + # + # When importing from Github Enterprise, do not query user by Github ID + # since we only have users' Github ID from github.com. def id_for_github_id(id) - gitlab_id = query_id_for_github_id(id) || nil + gitlab_id = + if project.github_enterprise_import? + nil + else + query_id_for_github_id(id) + end Gitlab::Cache::Import::Caching.write(ID_CACHE_KEY % id, gitlab_id) end |