Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'qa/qa/service/praefect_manager.rb')
-rw-r--r--qa/qa/service/praefect_manager.rb102
1 files changed, 89 insertions, 13 deletions
diff --git a/qa/qa/service/praefect_manager.rb b/qa/qa/service/praefect_manager.rb
index 7e47049d446..8ffb7c47652 100644
--- a/qa/qa/service/praefect_manager.rb
+++ b/qa/qa/service/praefect_manager.rb
@@ -1,5 +1,7 @@
# frozen_string_literal: true
+require 'digest'
+
module QA
module Service
class PraefectManager
@@ -50,6 +52,7 @@ module QA
def stop_primary_node
stop_node(@primary_node)
+ wait_until_node_is_removed_from_healthy_storages(@primary_node)
end
def start_primary_node
@@ -67,6 +70,7 @@ module QA
def stop_secondary_node
stop_node(@secondary_node)
+ wait_until_node_is_removed_from_healthy_storages(@secondary_node)
end
def start_secondary_node
@@ -75,6 +79,7 @@ module QA
def stop_tertiary_node
stop_node(@tertiary_node)
+ wait_until_node_is_removed_from_healthy_storages(@tertiary_node)
end
def start_tertiary_node
@@ -82,20 +87,41 @@ module QA
end
def start_node(name)
- shell "docker start #{name}"
- end
+ state = node_state(name)
+ return if state == "running"
+
+ if state == "paused"
+ shell "docker unpause #{name}"
+ end
+
+ if state == "stopped"
+ shell "docker start #{name}"
+ end
- def stop_node(name)
- shell "docker stop #{name}"
wait_until_shell_command_matches(
"docker inspect -f {{.State.Running}} #{name}",
- /false/,
+ /true/,
sleep_interval: 3,
max_duration: 180,
retry_on_exception: true
)
end
+ def stop_node(name)
+ return if node_state(name) == 'paused'
+
+ shell "docker pause #{name}"
+ end
+
+ def node_state(name)
+ state = "stopped"
+ wait_until_shell_command("docker inspect -f {{.State.Status}} #{name}") do |line|
+ QA::Runtime::Logger.debug(line)
+ break state = "running" if line.include?("running")
+ break state = "paused" if line.include?("paused")
+ end
+ end
+
def clear_replication_queue
QA::Runtime::Logger.info("Clearing the replication queue")
shell sql_to_docker_exec_cmd(
@@ -174,15 +200,25 @@ module QA
end
def start_all_nodes
- start_node(@postgres)
+ start_postgres
start_node(@primary_node)
start_node(@secondary_node)
start_node(@tertiary_node)
- start_node(@praefect)
+ start_praefect
wait_for_health_check_all_nodes
end
+ def start_postgres
+ start_node(@postgres)
+
+ Support::Waiter.repeat_until(max_attempts: 60, sleep_interval: 1) do
+ shell(sql_to_docker_exec_cmd("SELECT 1 as healthy_database"), fail_on_exception: false) do |line|
+ break true if line.include?("healthy_database")
+ end
+ end
+ end
+
def verify_storage_move(source_storage, destination_storage, repo_type: :project)
return if Specs::Helpers::ContextSelector.dot_com?
@@ -194,9 +230,8 @@ module QA
def wait_for_praefect
QA::Runtime::Logger.info("Waiting for health check on praefect")
Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
- # praefect runs a grpc server on port 2305, which will return an error 'Connection refused' until such time it is ready
- wait_until_shell_command("docker exec #{@gitaly_cluster} bash -c 'curl #{@praefect}:2305'") do |line|
- break if line.include?('curl: (1) Received HTTP/0.9 when not allowed')
+ wait_until_shell_command("docker exec #{@praefect} gitlab-ctl status praefect") do |line|
+ break true if line.include?('run: praefect: ')
QA::Runtime::Logger.debug(line.chomp)
end
@@ -250,6 +285,48 @@ module QA
end
end
+ def praefect_dataloss_information(project_id)
+ dataloss_info = []
+ cmd = "docker exec #{@praefect} praefect -config /var/opt/gitlab/praefect/config.toml dataloss --partially-unavailable=true"
+ shell(cmd) { |line| dataloss_info << line.strip }
+
+ # Expected will have a record for each repository in the storage, in the following format
+ # @hashed/bc/52/bc52dd634277c4a34a2d6210994a9a5e2ab6d33bb4a3a8963410e00ca6c15a02.git:
+ # Primary: gitaly1
+ # In-Sync Storages:
+ # gitaly1, assigned host
+ # gitaly3, assigned host
+ # Outdated Storages:
+ # gitaly2 is behind by 1 change or less, assigned host
+ #
+ # Alternatively, if all repositories are in sync, a concise message is returned
+ # Virtual storage: default
+ # All repositories are fully available on all assigned storages!
+
+ # extract the relevant project under test info if it is identified
+ start_index = dataloss_info.index { |line| line.include?("#{Digest::SHA256.hexdigest(project_id.to_s)}.git") }
+ unless start_index.nil?
+ dataloss_info = dataloss_info[start_index, 7]
+ end
+
+ dataloss_info&.each { |info| QA::Runtime::Logger.debug(info) }
+ dataloss_info
+ end
+
+ def praefect_dataloss_info_for_project(project_id)
+ dataloss_info = []
+ Support::Retrier.retry_until(max_duration: 60) do
+ dataloss_info = praefect_dataloss_information(project_id)
+ dataloss_info.include?("#{Digest::SHA256.hexdigest(project_id.to_s)}.git")
+ end
+ end
+
+ def wait_for_project_synced_across_all_storages(project_id)
+ Support::Retrier.retry_until(max_duration: 60) do
+ praefect_dataloss_information(project_id).include?('All repositories are fully available on all assigned storages!')
+ end
+ end
+
def wait_for_health_check_all_nodes
wait_for_gitaly_health_check(@primary_node)
wait_for_gitaly_health_check(@secondary_node)
@@ -259,9 +336,8 @@ module QA
def wait_for_gitaly_health_check(node)
QA::Runtime::Logger.info("Waiting for health check on #{node}")
Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
- # gitaly runs a grpc server on port 8075, which will return an error 'Connection refused' until such time it is ready
- wait_until_shell_command("docker exec #{@praefect} bash -c 'curl #{node}:8075'") do |line|
- break if line.include?('curl: (1) Received HTTP/0.9 when not allowed')
+ wait_until_shell_command("docker exec #{node} gitlab-ctl status gitaly") do |line|
+ break true if line.include?('run: gitaly: ')
QA::Runtime::Logger.debug(line.chomp)
end