1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
|
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'optparse'
require 'json'
require 'fileutils'
require 'erb'
require_relative '../tooling/quality/test_level'
# Class to generate RSpec test child pipeline with dynamically parallelized jobs.
class GenerateRspecPipeline
SKIP_PIPELINE_YML_FILE = ".gitlab/ci/_skip.yml"
TEST_LEVELS = %i[migration background_migration unit integration system].freeze
MAX_NODES_COUNT = 50 # Maximum parallelization allowed by GitLab
OPTIMAL_TEST_JOB_DURATION_IN_SECONDS = 600 # 10 MINUTES
SETUP_DURATION_IN_SECONDS = 180.0 # 3 MINUTES
OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS = OPTIMAL_TEST_JOB_DURATION_IN_SECONDS - SETUP_DURATION_IN_SECONDS
# As of 2022-09-01:
# $ find spec -type f | wc -l
# 12825
# and
# $ find ee/spec -type f | wc -l
# 5610
# which gives a total of 18435 test files (`NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE`).
#
# Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/)
# is 170183 seconds (`DURATION_OF_THE_TEST_SUITE_IN_SECONDS`).
#
# This gives an approximate 170183 / 18435 = 9.2 seconds per test file
# (`DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS`).
#
# If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`SETUP_DURATION_IN_SECONDS`),
# then we need to give 7 minutes of testing to each test node (`OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS`).
# (7 * 60) / 9.2 = 45.6
#
# So if we'd want to run the full test suites in 10 minutes (`OPTIMAL_TEST_JOB_DURATION_IN_SECONDS`),
# we'd need to run at max 45 test file per nodes (`#optimal_test_file_count_per_node_per_test_level`).
NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE = 18_435
DURATION_OF_THE_TEST_SUITE_IN_SECONDS = 170_183
DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS =
DURATION_OF_THE_TEST_SUITE_IN_SECONDS / NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE
# pipeline_template_path: A YAML pipeline configuration template to generate the final pipeline config from
# rspec_files_path: A file containing RSpec files to run, separated by a space
# knapsack_report_path: A file containing a Knapsack report
# test_suite_prefix: An optional test suite folder prefix (e.g. `ee/` or `jh/`)
# generated_pipeline_path: An optional filename where to write the pipeline config (defaults to
# `"#{pipeline_template_path}.yml"`)
def initialize(
pipeline_template_path:, rspec_files_path: nil, knapsack_report_path: nil, test_suite_prefix: nil,
generated_pipeline_path: nil)
@pipeline_template_path = pipeline_template_path.to_s
@rspec_files_path = rspec_files_path.to_s
@knapsack_report_path = knapsack_report_path.to_s
@test_suite_prefix = test_suite_prefix
@generated_pipeline_path = generated_pipeline_path || "#{pipeline_template_path}.yml"
raise ArgumentError unless File.exist?(@pipeline_template_path)
end
def generate!
if all_rspec_files.empty?
info "Using #{SKIP_PIPELINE_YML_FILE} due to no RSpec files to run"
FileUtils.cp(SKIP_PIPELINE_YML_FILE, generated_pipeline_path)
return
end
info "pipeline_template_path: #{pipeline_template_path}"
info "generated_pipeline_path: #{generated_pipeline_path}"
File.open(generated_pipeline_path, 'w') do |handle|
pipeline_yaml = ERB.new(File.read(pipeline_template_path)).result_with_hash(**erb_binding)
handle.write(pipeline_yaml.squeeze("\n").strip)
end
end
private
attr_reader :pipeline_template_path, :rspec_files_path, :knapsack_report_path, :test_suite_prefix,
:generated_pipeline_path
def info(text)
$stdout.puts "[#{self.class.name}] #{text}"
end
def all_rspec_files
@all_rspec_files ||= File.exist?(rspec_files_path) ? File.read(rspec_files_path).split(' ') : []
end
def erb_binding
{
rspec_files_per_test_level: rspec_files_per_test_level,
test_suite_prefix: test_suite_prefix
}
end
def rspec_files_per_test_level
@rspec_files_per_test_level ||= begin
all_remaining_rspec_files = all_rspec_files.dup
TEST_LEVELS.each_with_object(Hash.new { |h, k| h[k] = {} }) do |test_level, memo| # rubocop:disable Rails/IndexWith
memo[test_level][:files] = all_remaining_rspec_files
.grep(test_level_service.regexp(test_level, true))
.tap { |files| files.each { |file| all_remaining_rspec_files.delete(file) } }
memo[test_level][:parallelization] = optimal_nodes_count(test_level, memo[test_level][:files])
end
end
end
def optimal_nodes_count(test_level, rspec_files)
nodes_count = (rspec_files.size / optimal_test_file_count_per_node_per_test_level(test_level)).ceil
info "Optimal node count for #{rspec_files.size} #{test_level} RSpec files is #{nodes_count}."
if nodes_count > MAX_NODES_COUNT
info "We don't want to parallelize to more than #{MAX_NODES_COUNT} jobs for now! " \
"Decreasing the parallelization to #{MAX_NODES_COUNT}."
MAX_NODES_COUNT
else
nodes_count
end
end
def optimal_test_file_count_per_node_per_test_level(test_level)
[
(OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS / average_test_file_duration_in_seconds_per_test_level[test_level]),
1
].max
end
def average_test_file_duration_in_seconds_per_test_level
@optimal_test_file_count_per_node_per_test_level ||=
if knapsack_report.any?
remaining_knapsack_report = knapsack_report.dup
TEST_LEVELS.each_with_object({}) do |test_level, memo|
matching_data_per_test_level = remaining_knapsack_report
.select { |test_file, _| test_file.match?(test_level_service.regexp(test_level, true)) }
.tap { |test_data| test_data.each { |file, _| remaining_knapsack_report.delete(file) } }
memo[test_level] =
if matching_data_per_test_level.empty?
DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS
else
matching_data_per_test_level.values.sum / matching_data_per_test_level.keys.size
end
end
else
TEST_LEVELS.each_with_object({}) do |test_level, memo| # rubocop:disable Rails/IndexWith
memo[test_level] = DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS
end
end
end
def knapsack_report
@knapsack_report ||=
begin
File.exist?(knapsack_report_path) ? JSON.parse(File.read(knapsack_report_path)) : {}
rescue JSON::ParserError => e
info "[ERROR] Knapsack report at #{knapsack_report_path} couldn't be parsed! Error:\n#{e}"
{}
end
end
def test_level_service
@test_level_service ||= Quality::TestLevel.new(test_suite_prefix)
end
end
if $PROGRAM_NAME == __FILE__
options = {}
OptionParser.new do |opts|
opts.on("-f", "--rspec-files-path path", String, "Path to a file containing RSpec files to run, " \
"separated by a space") do |value|
options[:rspec_files_path] = value
end
opts.on("-t", "--pipeline-template-path PATH", String, "Path to a YAML pipeline configuration template to " \
"generate the final pipeline config from") do |value|
options[:pipeline_template_path] = value
end
opts.on("-k", "--knapsack-report-path path", String, "Path to a Knapsack report") do |value|
options[:knapsack_report_path] = value
end
opts.on("-p", "--test-suite-prefix test_suite_prefix", String, "Test suite folder prefix") do |value|
options[:test_suite_prefix] = value
end
opts.on("-o", "--generated-pipeline-path generated_pipeline_path", String, "Path where to write the pipeline " \
"config") do |value|
options[:generated_pipeline_path] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
GenerateRspecPipeline.new(**options).generate!
end
|