1 files changed, 66 insertions, 0 deletions
diff --git a/scripts/generate-rspec-foss-impact-pipeline b/scripts/generate-rspec-foss-impact-pipeline
new file mode 100755
index 00000000000..3277f38ebe1
--- /dev/null
+++ b/scripts/generate-rspec-foss-impact-pipeline
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Script to generate `rspec foss-impact` test child pipeline with dynamically parallelized jobs.
+
+source scripts/utils.sh
+
+rspec_matching_tests_foss_path="${1}"
+pipeline_yml="${2}"
+
+test_file_count=$(wc -w "${rspec_matching_tests_foss_path}" | awk '{ print $1 }')
+echoinfo "test_file_count: ${test_file_count}"
+
+if [[ "${test_file_count}" -eq 0 ]]; then
+  skip_pipeline=".gitlab/ci/_skip.yml"
+
+  echo "Using ${skip_pipeline} due to no impacted FOSS rspec tests to run"
+  cp $skip_pipeline "$pipeline_yml"
+  exit
+fi
+
+# As of 2022-09-01:
+# $ find spec -type f | wc -l
+#  12825
+# and
+# $ find ee/spec -type f | wc -l
+#  5610
+# which gives a total of 18435 test files (`number_of_tests_in_total_in_the_test_suite`).
+#
+# Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/) is 170183 seconds (`duration_of_the_test_suite_in_seconds`).
+#
+# This gives an approximate 170183 / 18435 = 9.2 seconds per test file (`average_test_file_duration_in_seconds`).
+#
+# If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`setup_duration_in_seconds`), then we need to give 7 minutes of testing to each test node (`optimal_test_runtime_duration_in_seconds`).
+# (7 * 60) / 9.2 = 45.6
+#
+# So if we'd want to run the full test suites in 10 minutes (`optimal_test_job_duration_in_seconds`), we'd need to run at max 45 test file per nodes (`optimal_test_file_count_per_node`).
+number_of_tests_in_total_in_the_test_suite=18435
+duration_of_the_test_suite_in_seconds=170183
+optimal_test_job_duration_in_seconds=600 # 10 minutes
+setup_duration_in_seconds=180 # 3 minutes
+
+optimal_test_runtime_duration_in_seconds=$(( optimal_test_job_duration_in_seconds - setup_duration_in_seconds ))
+echoinfo "optimal_test_runtime_duration_in_seconds: ${optimal_test_runtime_duration_in_seconds}"
+
+average_test_file_duration_in_seconds=$(( duration_of_the_test_suite_in_seconds / number_of_tests_in_total_in_the_test_suite ))
+echoinfo "average_test_file_duration_in_seconds: ${average_test_file_duration_in_seconds}"
+
+optimal_test_file_count_per_node=$(( optimal_test_runtime_duration_in_seconds / average_test_file_duration_in_seconds ))
+echoinfo "optimal_test_file_count_per_node: ${optimal_test_file_count_per_node}"
+
+node_count=$(( test_file_count / optimal_test_file_count_per_node ))
+echoinfo "node_count: ${node_count}"
+
+echoinfo "Optimal node count for 'rspec foss-impact' jobs is ${node_count}."
+
+MAX_NODES_COUNT=50 # Maximum parallelization allowed by GitLab
+if [[ "${node_count}" -gt "${MAX_NODES_COUNT}" ]]; then
+  echoinfo "We don't want to parallelize 'rspec foss-impact' to more than ${MAX_NODES_COUNT} jobs for now! Decreasing the parallelization to ${MAX_NODES_COUNT}."
+  node_count=${MAX_NODES_COUNT}
+fi
+
+ruby -rerb -e "puts ERB.new(File.read('.gitlab/ci/rails/rspec-foss-impact.gitlab-ci.yml.erb')).result_with_hash(parallel_value: ${node_count})" > "${pipeline_yml}"
+
+echosuccess "Generated ${pipeline_yml} pipeline with following content:"
+cat "${pipeline_yml}"