Welcome to mirror list, hosted at ThFree Co, Russian Federation.

elastic_repo_indexer « bin - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3dfe0c4164b71e18824a6915c5eb7b31cdd4b3ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env ruby

require 'rubygems'
require 'bundler/setup'
require 'json'
require 'active_model'
require 'active_support'
require 'active_support/core_ext'
require 'benchmark'
require 'charlock_holmes'

$: << File.expand_path('../lib', __dir__)
$: << File.expand_path('../ee/lib', __dir__)

require 'open3'
require 'rugged'

require 'gitlab/blob_helper'
require 'gitlab/elastic/client'
require 'elasticsearch/model'
require 'elasticsearch/git'
require 'elasticsearch/git/encoder_helper'
require 'elasticsearch/git/lite_blob'
require 'elasticsearch/git/model'
require 'elasticsearch/git/repository'

Thread.abort_on_exception = true

path_to_log_file = File.expand_path('../log/es-indexer.log', __dir__)
LOGGER = Logger.new(path_to_log_file)

PROJECT_ID = ARGV.shift
REPO_PATH = ARGV.shift
FROM_SHA = ENV['FROM_SHA']
TO_SHA = ENV['TO_SHA']
RAILS_ENV = ENV['RAILS_ENV']

# Symbols get stringified when passed through JSON
elastic = {}
JSON.parse(ENV['ELASTIC_CONNECTION_INFO']).each { |k, v| elastic[k.to_sym] = v }
ELASTIC_CONFIG = elastic

LOGGER.info("Has been scheduled for project #{REPO_PATH} with SHA range #{FROM_SHA}:#{TO_SHA}")

class Repository
  include Elasticsearch::Git::Repository

  index_name ['gitlab', RAILS_ENV].compact.join('-')

  def initialize
    self.__elasticsearch__.client = ::Gitlab::Elastic::Client.build(ELASTIC_CONFIG)
  end

  def client_for_indexing
    self.__elasticsearch__.client
  end

  def repository_id
    PROJECT_ID
  end

  def project_id
    PROJECT_ID
  end

  def path_to_repo
    REPO_PATH
  end
end

repo = Repository.new

params = { from_rev: FROM_SHA, to_rev: TO_SHA }.compact

commit_thr = Thread.new do
  LOGGER.info("Indexing commits started")

  timings = Benchmark.measure do
    indexed = 0
    repo.index_commits(params) do |batch, total_count|
      indexed += batch.length
      LOGGER.info("Indexed #{indexed}/#{total_count} commits")
    end
  end

  LOGGER.info("Commits for #{REPO_PATH} are indexed. Time elapsed: #{timings.real}")
end

LOGGER.info("Indexing blobs started")

timings = Benchmark.measure do
  indexed = 0
  repo.index_blobs(params) do |batch, total_count|
    indexed += batch.length
    LOGGER.info("Indexed #{indexed}/#{total_count} blobs")
  end
end

LOGGER.info("Blobs for #{REPO_PATH} are indexed. Time elapsed: #{timings.real}")

commit_thr.join