Welcome to mirror list, hosted at ThFree Co, Russian Federation.

import.rake « import_export « gitlab « tasks « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c832cba0287cb1006ca50dff6f511fb05a6b29a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# frozen_string_literal: true

# Import large project archives
#
# This task:
#   1. Disables ObjectStorage for archive upload
#   2. Performs Sidekiq job synchronously
#
# @example
#   bundle exec rake "gitlab:import_export:import[root, root, imported_project, /path/to/file.tar.gz, true]"
#
namespace :gitlab do
  namespace :import_export do
    desc 'GitLab | Import/Export | EXPERIMENTAL | Import large project archives'
    task :import, [:username, :namespace_path, :project_path, :archive_path, :measurement_enabled] => :gitlab_environment do |_t, args|
      # Load it here to avoid polluting Rake tasks with Sidekiq test warnings
      require 'sidekiq/testing'

      warn_user_is_not_gitlab

      if ENV['IMPORT_DEBUG'].present?
        ActiveRecord::Base.logger = Logger.new(STDOUT)
      end

      GitlabProjectImport.new(
        namespace_path: args.namespace_path,
        project_path:   args.project_path,
        username:       args.username,
        file_path:      args.archive_path,
        measurement_enabled: args.measurement_enabled == 'true'
      ).import
    end
  end
end

class GitlabProjectImport
  def initialize(opts)
    @project_path = opts.fetch(:project_path)
    @file_path    = opts.fetch(:file_path)
    @namespace    = Namespace.find_by_full_path(opts.fetch(:namespace_path))
    @current_user = User.find_by_username(opts.fetch(:username))
    @measurement_enabled = opts.fetch(:measurement_enabled)
  end

  def import
    show_import_start_message

    run_isolated_sidekiq_job

    show_import_failures_count

    if @project&.import_state&.last_error
      puts "ERROR: #{@project.import_state.last_error}"
      exit 1
    elsif @project.errors.any?
      puts "ERROR: #{@project.errors.full_messages.join(', ')}"
      exit 1
    else
      puts 'Done!'
    end
  rescue StandardError => e
    puts "Exception: #{e.message}"
    puts e.backtrace
    exit 1
  end

  private

  def with_request_store
    RequestStore.begin!
    yield
  ensure
    RequestStore.end!
    RequestStore.clear!
  end

  def with_count_queries(&block)
    count = 0

    counter_f = ->(name, started, finished, unique_id, payload) {
      unless payload[:name].in? %w[CACHE SCHEMA]
        count += 1
      end
    }

    ActiveSupport::Notifications.subscribed(counter_f, "sql.active_record", &block)

    puts "Number of sql calls: #{count}"
  end

  def with_gc_counter
    gc_counts_before = GC.stat.select { |k, v| k =~ /count/ }
    yield
    gc_counts_after = GC.stat.select { |k, v| k =~ /count/ }
    stats = gc_counts_before.merge(gc_counts_after) { |k, vb, va| va - vb }
    puts "Total GC count: #{stats[:count]}"
    puts "Minor GC count: #{stats[:minor_gc_count]}"
    puts "Major GC count: #{stats[:major_gc_count]}"
  end

  def with_measure_time
    timing = Benchmark.realtime do
      yield
    end

    time = Time.at(timing).utc.strftime("%H:%M:%S")
    puts "Time to finish: #{time}"
  end

  def with_measuring
    puts "Measuring enabled..."
    with_gc_counter do
      with_count_queries do
        with_measure_time do
          yield
        end
      end
    end
  end

  def measurement_enabled?
    @measurement_enabled != false
  end

  # We want to ensure that all Sidekiq jobs are executed
  # synchronously as part of that process.
  # This ensures that all expensive operations do not escape
  # to general Sidekiq clusters/nodes.
  def with_isolated_sidekiq_job
    Sidekiq::Testing.fake! do
      with_request_store do
        # If you are attempting to import a large project into a development environment,
        # you may see Gitaly throw an error about too many calls or invocations.
        # This is due to a n+1 calls limit being set for development setups (not enforced in production)
        # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/24475#note_283090635
        # For development setups, this code-path will be excluded from n+1 detection.
        ::Gitlab::GitalyClient.allow_n_plus_1_calls do
          measurement_enabled? ? with_measuring { yield } : yield
        end
      end

      true
    end
  end

  def run_isolated_sidekiq_job
    with_isolated_sidekiq_job do
      @project = create_project

      execute_sidekiq_job
    end
  end

  def create_project
    # We are disabling ObjectStorage for `import`
    # as it is too slow to handle big archives:
    # 1. DB transaction timeouts on upload
    # 2. Download of archive before unpacking
    disable_upload_object_storage do
      service = Projects::GitlabProjectsImportService.new(
        @current_user,
        {
          namespace_id: @namespace.id,
          path:         @project_path,
          file:         File.open(@file_path)
        }
      )

      service.execute
    end
  end

  def execute_sidekiq_job
    Sidekiq::Worker.drain_all
  end

  def disable_upload_object_storage
    overwrite_uploads_setting('background_upload', false) do
      overwrite_uploads_setting('direct_upload', false) do
        yield
      end
    end
  end

  def overwrite_uploads_setting(key, value)
    old_value = Settings.uploads.object_store[key]
    Settings.uploads.object_store[key] = value

    yield

  ensure
    Settings.uploads.object_store[key] = old_value
  end

  def full_path
    "#{@namespace.full_path}/#{@project_path}"
  end

  def show_import_start_message
    puts "Importing GitLab export: #{@file_path} into GitLab" \
      " #{full_path}" \
      " as #{@current_user.name}"
  end

  def show_import_failures_count
    return unless @project.import_failures.exists?

    puts "Total number of not imported relations: #{@project.import_failures.count}"
  end
end