Welcome to mirror list, hosted at ThFree Co, Russian Federation.

bulk_importing.rb « github_import « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d16f4d7587b571a181b00c5c3d1b9826c4b6d839 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# frozen_string_literal: true

module Gitlab
  module GithubImport
    module BulkImporting
      attr_reader :project, :client

      # project - An instance of `Project`.
      # client - An instance of `Gitlab::GithubImport::Client`.
      def initialize(project, client)
        @project = project
        @client = client
        @validation_errors = []
      end

      # Builds and returns an Array of objects to bulk insert into the
      # database and array of validation errors if object is invalid.
      #
      # enum - An Enumerable that returns the objects to turn into database
      #        rows.
      def build_database_rows(enum)
        errors = []
        rows = enum.each_with_object([]) do |(object, _), result|
          next if already_imported?(object)

          attrs = build_attributes(object)
          build_record = model.new(attrs)

          if build_record.invalid?
            github_identifiers = github_identifiers(object)

            log_error(github_identifiers, build_record.errors.full_messages)
            errors << {
              validation_errors: build_record.errors,
              github_identifiers: github_identifiers
            }
            next
          end

          result << attrs
        end

        log_and_increment_counter(rows.size, :fetched)

        [rows, errors]
      end

      # Bulk inserts the given rows into the database.
      def bulk_insert(rows, batch_size: 100)
        rows.each_slice(batch_size) do |slice|
          ApplicationRecord.legacy_bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert

          log_and_increment_counter(slice.size, :imported)
        end
      end

      def object_type
        raise NotImplementedError
      end

      def bulk_insert_failures(errors)
        rows = errors.map do |error|
          correlation_id_value = Labkit::Correlation::CorrelationId.current_or_new_id

          {
            source: self.class.name,
            exception_class: 'ActiveRecord::RecordInvalid',
            exception_message: error[:validation_errors].full_messages.first.truncate(255),
            correlation_id_value: correlation_id_value,
            retry_count: nil,
            created_at: Time.zone.now,
            external_identifiers: error[:github_identifiers]
          }
        end

        project.import_failures.insert_all(rows)
      end

      private

      def log_and_increment_counter(value, operation)
        Gitlab::Import::Logger.info(
          import_type: :github,
          project_id: project.id,
          importer: self.class.name,
          message: "#{value} #{object_type.to_s.pluralize} #{operation}"
        )

        Gitlab::GithubImport::ObjectCounter.increment(
          project,
          object_type,
          operation,
          value: value
        )
      end

      def log_error(github_identifiers, messages)
        Gitlab::Import::Logger.error(
          import_type: :github,
          project_id: project.id,
          importer: self.class.name,
          message: messages,
          github_identifiers: github_identifiers
        )
      end

      def github_identifiers(object)
        raise NotImplementedError
      end
    end
  end
end