Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaweł Chojnacki <pawel@chojnacki.ws>2017-04-07 13:27:15 +0300
committerDouwe Maan <douwe@gitlab.com>2017-04-07 13:27:15 +0300
commitc3e43c9ba38f8cc0f2fd4a918d052c3977a93421 (patch)
tree3c5e09e7955663f69573ee9f47e9344011993532 /lib/gitlab/health_checks
parent15e87cea3a374e8d929c28f6843170b182fe159a (diff)
Add /-/readiness /-/liveness and /-/health_metrics endpoints to track application readiness
Diffstat (limited to 'lib/gitlab/health_checks')
-rw-r--r--lib/gitlab/health_checks/base_abstract_check.rb45
-rw-r--r--lib/gitlab/health_checks/db_check.rb29
-rw-r--r--lib/gitlab/health_checks/fs_shards_check.rb117
-rw-r--r--lib/gitlab/health_checks/metric.rb3
-rw-r--r--lib/gitlab/health_checks/redis_check.rb25
-rw-r--r--lib/gitlab/health_checks/result.rb3
-rw-r--r--lib/gitlab/health_checks/simple_abstract_check.rb43
7 files changed, 265 insertions, 0 deletions
diff --git a/lib/gitlab/health_checks/base_abstract_check.rb b/lib/gitlab/health_checks/base_abstract_check.rb
new file mode 100644
index 00000000000..7de6d4d9367
--- /dev/null
+++ b/lib/gitlab/health_checks/base_abstract_check.rb
@@ -0,0 +1,45 @@
+module Gitlab
+ module HealthChecks
+ module BaseAbstractCheck
+ def name
+ super.demodulize.underscore
+ end
+
+ def human_name
+ name.sub(/_check$/, '').capitalize
+ end
+
+ def readiness
+ raise NotImplementedError
+ end
+
+ def liveness
+ HealthChecks::Result.new(true)
+ end
+
+ def metrics
+ []
+ end
+
+ protected
+
+ def metric(name, value, **labels)
+ Metric.new(name, value, labels)
+ end
+
+ def with_timing(proc)
+ start = Time.now
+ result = proc.call
+ yield result, Time.now.to_f - start.to_f
+ end
+
+ def catch_timeout(seconds, &block)
+ begin
+ Timeout.timeout(seconds.to_i, &block)
+ rescue Timeout::Error => ex
+ ex
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/health_checks/db_check.rb b/lib/gitlab/health_checks/db_check.rb
new file mode 100644
index 00000000000..fd94984f8a2
--- /dev/null
+++ b/lib/gitlab/health_checks/db_check.rb
@@ -0,0 +1,29 @@
+module Gitlab
+ module HealthChecks
+ class DbCheck
+ extend SimpleAbstractCheck
+
+ class << self
+ private
+
+ def metric_prefix
+ 'db_ping'
+ end
+
+ def is_successful?(result)
+ result == '1'
+ end
+
+ def check
+ catch_timeout 10.seconds do
+ if Gitlab::Database.postgresql?
+ ActiveRecord::Base.connection.execute('SELECT 1 as ping')&.first&.[]('ping')
+ else
+ ActiveRecord::Base.connection.execute('SELECT 1 as ping')&.first&.first&.to_s
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb
new file mode 100644
index 00000000000..df962d203b7
--- /dev/null
+++ b/lib/gitlab/health_checks/fs_shards_check.rb
@@ -0,0 +1,117 @@
+module Gitlab
+ module HealthChecks
+ class FsShardsCheck
+ extend BaseAbstractCheck
+
+ class << self
+ def readiness
+ repository_storages.map do |storage_name|
+ begin
+ tmp_file_path = tmp_file_path(storage_name)
+
+ if !storage_stat_test(storage_name)
+ HealthChecks::Result.new(false, 'cannot stat storage', shard: storage_name)
+ elsif !storage_write_test(tmp_file_path)
+ HealthChecks::Result.new(false, 'cannot write to storage', shard: storage_name)
+ elsif !storage_read_test(tmp_file_path)
+ HealthChecks::Result.new(false, 'cannot read from storage', shard: storage_name)
+ else
+ HealthChecks::Result.new(true, nil, shard: storage_name)
+ end
+ rescue RuntimeError => ex
+ message = "unexpected error #{ex} when checking storage #{storage_name}"
+ Rails.logger.error(message)
+ HealthChecks::Result.new(false, message, shard: storage_name)
+ ensure
+ delete_test_file(tmp_file_path)
+ end
+ end
+ end
+
+ def metrics
+ repository_storages.flat_map do |storage_name|
+ tmp_file_path = tmp_file_path(storage_name)
+ [
+ operation_metrics(:filesystem_accessible, :filesystem_access_latency, -> { storage_stat_test(storage_name) }, shard: storage_name),
+ operation_metrics(:filesystem_writable, :filesystem_write_latency, -> { storage_write_test(tmp_file_path) }, shard: storage_name),
+ operation_metrics(:filesystem_readable, :filesystem_read_latency, -> { storage_read_test(tmp_file_path) }, shard: storage_name)
+ ].flatten
+ end
+ end
+
+ private
+
+ RANDOM_STRING = SecureRandom.hex(1000).freeze
+
+ def operation_metrics(ok_metric, latency_metric, operation, **labels)
+ with_timing operation do |result, elapsed|
+ [
+ metric(latency_metric, elapsed, **labels),
+ metric(ok_metric, result ? 1 : 0, **labels)
+ ]
+ end
+ rescue RuntimeError => ex
+ Rails.logger("unexpected error #{ex} when checking #{ok_metric}")
+ [metric(ok_metric, 0, **labels)]
+ end
+
+ def repository_storages
+ @repository_storage ||= Gitlab::CurrentSettings.current_application_settings.repository_storages
+ end
+
+ def storages_paths
+ @storage_paths ||= Gitlab.config.repositories.storages
+ end
+
+ def with_timeout(args)
+ %w{timeout 1}.concat(args)
+ end
+
+ def tmp_file_path(storage_name)
+ Dir::Tmpname.create(%w(fs_shards_check +deleted), path(storage_name)) { |path| path }
+ end
+
+ def path(storage_name)
+ storages_paths&.dig(storage_name, 'path')
+ end
+
+ def storage_stat_test(storage_name)
+ stat_path = File.join(path(storage_name), '.')
+ begin
+ _, status = Gitlab::Popen.popen(with_timeout(%W{ stat #{stat_path} }))
+ status == 0
+ rescue Errno::ENOENT
+ File.exist?(stat_path) && File::Stat.new(stat_path).readable?
+ end
+ end
+
+ def storage_write_test(tmp_path)
+ _, status = Gitlab::Popen.popen(with_timeout(%W{ tee #{tmp_path} })) do |stdin|
+ stdin.write(RANDOM_STRING)
+ end
+ status == 0
+ rescue Errno::ENOENT
+ written_bytes = File.write(tmp_path, RANDOM_STRING) rescue Errno::ENOENT
+ written_bytes == RANDOM_STRING.length
+ end
+
+ def storage_read_test(tmp_path)
+ _, status = Gitlab::Popen.popen(with_timeout(%W{ diff #{tmp_path} - })) do |stdin|
+ stdin.write(RANDOM_STRING)
+ end
+ status == 0
+ rescue Errno::ENOENT
+ file_contents = File.read(tmp_path) rescue Errno::ENOENT
+ file_contents == RANDOM_STRING
+ end
+
+ def delete_test_file(tmp_path)
+ _, status = Gitlab::Popen.popen(with_timeout(%W{ rm -f #{tmp_path} }))
+ status == 0
+ rescue Errno::ENOENT
+ File.delete(tmp_path) rescue Errno::ENOENT
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/health_checks/metric.rb b/lib/gitlab/health_checks/metric.rb
new file mode 100644
index 00000000000..1a2eab0b005
--- /dev/null
+++ b/lib/gitlab/health_checks/metric.rb
@@ -0,0 +1,3 @@
+module Gitlab::HealthChecks
+ Metric = Struct.new(:name, :value, :labels)
+end
diff --git a/lib/gitlab/health_checks/redis_check.rb b/lib/gitlab/health_checks/redis_check.rb
new file mode 100644
index 00000000000..57bbe5b3ad0
--- /dev/null
+++ b/lib/gitlab/health_checks/redis_check.rb
@@ -0,0 +1,25 @@
+module Gitlab
+ module HealthChecks
+ class RedisCheck
+ extend SimpleAbstractCheck
+
+ class << self
+ private
+
+ def metric_prefix
+ 'redis_ping'
+ end
+
+ def is_successful?(result)
+ result == 'PONG'
+ end
+
+ def check
+ catch_timeout 10.seconds do
+ Gitlab::Redis.with(&:ping)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/health_checks/result.rb b/lib/gitlab/health_checks/result.rb
new file mode 100644
index 00000000000..8086760023e
--- /dev/null
+++ b/lib/gitlab/health_checks/result.rb
@@ -0,0 +1,3 @@
+module Gitlab::HealthChecks
+ Result = Struct.new(:success, :message, :labels)
+end
diff --git a/lib/gitlab/health_checks/simple_abstract_check.rb b/lib/gitlab/health_checks/simple_abstract_check.rb
new file mode 100644
index 00000000000..fbe1645c1b1
--- /dev/null
+++ b/lib/gitlab/health_checks/simple_abstract_check.rb
@@ -0,0 +1,43 @@
+module Gitlab
+ module HealthChecks
+ module SimpleAbstractCheck
+ include BaseAbstractCheck
+
+ def readiness
+ check_result = check
+ if is_successful?(check_result)
+ HealthChecks::Result.new(true)
+ elsif check_result.is_a?(Timeout::Error)
+ HealthChecks::Result.new(false, "#{human_name} check timed out")
+ else
+ HealthChecks::Result.new(false, "unexpected #{human_name} check result: #{check_result}")
+ end
+ end
+
+ def metrics
+ with_timing method(:check) do |result, elapsed|
+ Rails.logger.error("#{human_name} check returned unexpected result #{result}") unless is_successful?(result)
+ [
+ metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0),
+ metric("#{metric_prefix}_success", is_successful?(result) ? 1 : 0),
+ metric("#{metric_prefix}_latency", elapsed)
+ ]
+ end
+ end
+
+ private
+
+ def metric_prefix
+ raise NotImplementedError
+ end
+
+ def is_successful?(result)
+ raise NotImplementedError
+ end
+
+ def check
+ raise NotImplementedError
+ end
+ end
+ end
+end