diff options
Diffstat (limited to 'lib/gitlab/metrics')
41 files changed, 854 insertions, 340 deletions
diff --git a/lib/gitlab/metrics/dashboard/cache.rb b/lib/gitlab/metrics/dashboard/cache.rb index a9ccf0fea9b..54b5250d209 100644 --- a/lib/gitlab/metrics/dashboard/cache.rb +++ b/lib/gitlab/metrics/dashboard/cache.rb @@ -9,34 +9,53 @@ module Gitlab CACHE_KEYS = 'all_cached_metric_dashboards' class << self - # Stores a dashboard in the cache, documenting the key - # so the cached can be cleared in bulk at another time. - def fetch(key) - register_key(key) + # This class method (Gitlab::Metrics::Dashboard::Cache.fetch) can be used + # when the key does not need to be deleted by `delete_all!`. + # For example, out of the box dashboard caches do not need to be deleted. + delegate :fetch, to: :"Rails.cache" - Rails.cache.fetch(key) { yield } - end + alias_method :for, :new + end + + def initialize(project) + @project = project + end + + # Stores a dashboard in the cache, documenting the key + # so the cache can be cleared in bulk at another time. + def fetch(key) + register_key(key) + + Rails.cache.fetch(key) { yield } + end - # Resets all dashboard caches, such that all - # dashboard content will be loaded from source on - # subsequent dashboard calls. - def delete_all! - all_keys.each { |key| Rails.cache.delete(key) } + # Resets all dashboard caches, such that all + # dashboard content will be loaded from source on + # subsequent dashboard calls. + def delete_all! + all_keys.each { |key| Rails.cache.delete(key) } - Rails.cache.delete(CACHE_KEYS) - end + Rails.cache.delete(catalog_key) + end - private + private - def register_key(key) - new_keys = all_keys.add(key).to_a.join('|') + def register_key(key) + new_keys = all_keys.add(key).to_a.join('|') - Rails.cache.write(CACHE_KEYS, new_keys) - end + Rails.cache.write(catalog_key, new_keys) + end + + def all_keys + keys = Rails.cache.read(catalog_key)&.split('|') + Set.new(keys) + end - def all_keys - Set.new(Rails.cache.read(CACHE_KEYS)&.split('|')) - end + # One key to store them all... + # This key is used to store the names of all the keys that contain this + # project's dashboards. + def catalog_key + "#{CACHE_KEYS}_#{@project.id}" end end end diff --git a/lib/gitlab/metrics/dashboard/defaults.rb b/lib/gitlab/metrics/dashboard/defaults.rb index 3c39a7c6911..6a5f98a18c8 100644 --- a/lib/gitlab/metrics/dashboard/defaults.rb +++ b/lib/gitlab/metrics/dashboard/defaults.rb @@ -7,7 +7,6 @@ module Gitlab module Dashboard module Defaults DEFAULT_PANEL_TYPE = 'area-chart' - DEFAULT_PANEL_WEIGHT = 0 end end end diff --git a/lib/gitlab/metrics/dashboard/finder.rb b/lib/gitlab/metrics/dashboard/finder.rb index 5e2d78e10a4..2c4793eb75f 100644 --- a/lib/gitlab/metrics/dashboard/finder.rb +++ b/lib/gitlab/metrics/dashboard/finder.rb @@ -14,10 +14,7 @@ module Gitlab ::Metrics::Dashboard::SelfMonitoringDashboardService, # This dashboard is displayed on the K8s cluster settings health page. - ::Metrics::Dashboard::ClusterDashboardService, - - # This dashboard is not yet ready for the world. - ::Metrics::Dashboard::PodDashboardService + ::Metrics::Dashboard::ClusterDashboardService ].freeze class << self @@ -72,17 +69,11 @@ module Gitlab # display_name: String, # default: Boolean }] def find_all_paths(project) - project.repository.metrics_dashboard_paths - end - - # Summary of all known dashboards. Used to populate repo cache. - # Prefer #find_all_paths. - def find_all_paths_from_source(project) - Gitlab::Metrics::Dashboard::Cache.delete_all! - - user_facing_dashboard_services(project).flat_map do |service| + dashboards = user_facing_dashboard_services(project).flat_map do |service| service.all_dashboard_paths(project) end + + Gitlab::Utils.stable_sort_by(dashboards) { |dashboard| dashboard[:display_name].downcase } end private diff --git a/lib/gitlab/metrics/dashboard/repo_dashboard_finder.rb b/lib/gitlab/metrics/dashboard/repo_dashboard_finder.rb new file mode 100644 index 00000000000..8b791e110ba --- /dev/null +++ b/lib/gitlab/metrics/dashboard/repo_dashboard_finder.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Provides methods to list and read dashboard yaml files from a project's repository. +module Gitlab + module Metrics + module Dashboard + class RepoDashboardFinder + DASHBOARD_ROOT = ".gitlab/dashboards" + DASHBOARD_EXTENSION = '.yml' + + class << self + # Returns list of all user-defined dashboard paths. Used to populate + # Repository model cache (Repository#user_defined_metrics_dashboard_paths). + # Also deletes all dashboard cache entries. + # @return [Array] ex) ['.gitlab/dashboards/dashboard1.yml'] + def list_dashboards(project) + Gitlab::Metrics::Dashboard::Cache.for(project).delete_all! + + file_finder(project).list_files_for(DASHBOARD_ROOT) + end + + # Reads the given dashboard from repository, and returns the content as a string. + # @return [String] + def read_dashboard(project, dashboard_path) + file_finder(project).read(dashboard_path) + end + + private + + def file_finder(project) + Gitlab::Template::Finders::RepoTemplateFinder.new(project, DASHBOARD_ROOT, DASHBOARD_EXTENSION) + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/dashboard/stages/custom_metrics_inserter.rb b/lib/gitlab/metrics/dashboard/stages/custom_metrics_inserter.rb index 3444a01bccd..3b49eb1c837 100644 --- a/lib/gitlab/metrics/dashboard/stages/custom_metrics_inserter.rb +++ b/lib/gitlab/metrics/dashboard/stages/custom_metrics_inserter.rb @@ -9,7 +9,10 @@ module Gitlab # config. If there are no project-specific metrics, # this will have no effect. def transform! - PrometheusMetricsFinder.new(project: project).execute.each do |project_metric| + custom_metrics = PrometheusMetricsFinder.new(project: project, ordered: true).execute + custom_metrics = Gitlab::Utils.stable_sort_by(custom_metrics) { |metric| -metric.priority } + + custom_metrics.each do |project_metric| group = find_or_create_panel_group(dashboard[:panel_groups], project_metric) panel = find_or_create_panel(group[:panels], project_metric) find_or_create_metric(panel[:metrics], project_metric) @@ -83,7 +86,6 @@ module Gitlab def new_panel_group(metric) { group: metric.group_title, - priority: metric.priority, panels: [] } end diff --git a/lib/gitlab/metrics/dashboard/stages/metric_endpoint_inserter.rb b/lib/gitlab/metrics/dashboard/stages/metric_endpoint_inserter.rb index c48a7ff25a5..dd85bd0beb1 100644 --- a/lib/gitlab/metrics/dashboard/stages/metric_endpoint_inserter.rb +++ b/lib/gitlab/metrics/dashboard/stages/metric_endpoint_inserter.rb @@ -45,7 +45,9 @@ module Gitlab raise Errors::MissingQueryError.new('Each "metric" must define one of :query or :query_range') unless query - query + # We need to remove any newlines since our UrlBlocker does not allow + # multiline URLs. + query.to_s.squish end end end diff --git a/lib/gitlab/metrics/dashboard/stages/sorter.rb b/lib/gitlab/metrics/dashboard/stages/sorter.rb deleted file mode 100644 index 882211e1441..00000000000 --- a/lib/gitlab/metrics/dashboard/stages/sorter.rb +++ /dev/null @@ -1,34 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Metrics - module Dashboard - module Stages - class Sorter < BaseStage - def transform! - missing_panel_groups! unless dashboard[:panel_groups].is_a? Array - - sort_groups! - sort_panels! - end - - private - - # Sorts the groups in the dashboard by the :priority key - def sort_groups! - dashboard[:panel_groups] = Gitlab::Utils.stable_sort_by(dashboard[:panel_groups]) { |group| -group[:priority].to_i } - end - - # Sorts the panels in the dashboard by the :weight key - def sort_panels! - dashboard[:panel_groups].each do |group| - missing_panels! unless group[:panels].is_a? Array - - group[:panels] = Gitlab::Utils.stable_sort_by(group[:panels]) { |panel| -panel[:weight].to_i } - end - end - end - end - end - end -end diff --git a/lib/gitlab/metrics/dashboard/stages/track_panel_type.rb b/lib/gitlab/metrics/dashboard/stages/track_panel_type.rb new file mode 100644 index 00000000000..71da779d16c --- /dev/null +++ b/lib/gitlab/metrics/dashboard/stages/track_panel_type.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + module Dashboard + module Stages + class TrackPanelType < BaseStage + def transform! + for_panel_groups do |panel_group| + for_panels_in(panel_group) do |panel| + track_panel_type(panel) + end + end + end + + private + + def track_panel_type(panel) + panel_type = panel[:type] + + Gitlab::Tracking.event('MetricsDashboard::Chart', 'chart_rendered', label: panel_type) + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/dashboard/url.rb b/lib/gitlab/metrics/dashboard/url.rb index 10a2f3c2397..160ecfb85c9 100644 --- a/lib/gitlab/metrics/dashboard/url.rb +++ b/lib/gitlab/metrics/dashboard/url.rb @@ -43,6 +43,39 @@ module Gitlab end end + # Matches dashboard urls for a metric chart embed + # for cluster metrics + # + # EX - https://<host>/<namespace>/<project>/-/clusters/<cluster_id>/?group=Cluster%20Health&title=Memory%20Usage&y_label=Memory%20(GiB) + def clusters_regex + strong_memoize(:clusters_regex) do + regex_for_project_metrics( + %r{ + /clusters + /(?<cluster_id>\d+) + /? + }x + ) + end + end + + # Matches dashboard urls for a metric chart embed + # for a specifc firing GitLab alert + # + # EX - https://<host>/<namespace>/<project>/prometheus/alerts/<alert_id>/metrics_dashboard + def alert_regex + strong_memoize(:alert_regex) do + regex_for_project_metrics( + %r{ + /prometheus + /alerts + /(?<alert>\d+) + /metrics_dashboard + }x + ) + end + end + # Parses query params out from full url string into hash. # # Ex) 'https://<root>/<project>/<environment>/metrics?title=Title&group=Group' @@ -60,22 +93,6 @@ module Gitlab Gitlab::Routing.url_helpers.metrics_dashboard_namespace_project_environment_url(*args) end - # Matches dashboard urls for a metric chart embed - # for cluster metrics - # - # EX - https://<host>/<namespace>/<project>/-/clusters/<cluster_id>/?group=Cluster%20Health&title=Memory%20Usage&y_label=Memory%20(GiB) - def clusters_regex - strong_memoize(:clusters_regex) do - regex_for_project_metrics( - %r{ - /clusters - /(?<cluster_id>\d+) - /? - }x - ) - end - end - private def regex_for_project_metrics(path_suffix_pattern) @@ -92,16 +109,18 @@ module Gitlab end def gitlab_host_pattern - Regexp.escape(Gitlab.config.gitlab.url) + Regexp.escape(gitlab_domain) end def project_path_pattern "\/#{Project.reference_pattern}" end + + def gitlab_domain + Gitlab.config.gitlab.url + end end end end end end - -Gitlab::Metrics::Dashboard::Url.extend_if_ee('::EE::Gitlab::Metrics::Dashboard::Url') diff --git a/lib/gitlab/metrics/dashboard/validator.rb b/lib/gitlab/metrics/dashboard/validator.rb new file mode 100644 index 00000000000..8edd9c397f9 --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + module Dashboard + module Validator + DASHBOARD_SCHEMA_PATH = 'lib/gitlab/metrics/dashboard/validator/schemas/dashboard.json'.freeze + + class << self + def validate(content, schema_path = DASHBOARD_SCHEMA_PATH, dashboard_path: nil, project: nil) + errors = _validate(content, schema_path, dashboard_path: dashboard_path, project: project) + errors.empty? + end + + def validate!(content, schema_path = DASHBOARD_SCHEMA_PATH, dashboard_path: nil, project: nil) + errors = _validate(content, schema_path, dashboard_path: dashboard_path, project: project) + errors.empty? || raise(errors.first) + end + + private + + def _validate(content, schema_path, dashboard_path: nil, project: nil) + client = Validator::Client.new(content, schema_path, dashboard_path: dashboard_path, project: project) + client.execute + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/dashboard/validator/client.rb b/lib/gitlab/metrics/dashboard/validator/client.rb new file mode 100644 index 00000000000..c63415abcfc --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/client.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + module Dashboard + module Validator + class Client + # @param content [Hash] Representing a raw, unprocessed + # dashboard object + # @param schema_path [String] Representing path to dashboard schema file + # @param dashboard_path[String] Representing path to dashboard content file + # @param project [Project] Project to validate dashboard against + def initialize(content, schema_path, dashboard_path: nil, project: nil) + @content = content + @schema_path = schema_path + @dashboard_path = dashboard_path + @project = project + end + + def execute + errors = validate_against_schema + errors += post_schema_validator.validate + + errors.compact + end + + private + + attr_reader :content, :schema_path, :project, :dashboard_path + + def custom_formats + @custom_formats ||= CustomFormats.new + end + + def post_schema_validator + PostSchemaValidator.new( + project: project, + metric_ids: custom_formats.metric_ids_cache, + dashboard_path: dashboard_path + ) + end + + def schemer + @schemer ||= ::JSONSchemer.schema(Pathname.new(schema_path), formats: custom_formats.format_handlers) + end + + def validate_against_schema + schemer.validate(content).map do |error| + Errors::SchemaValidationError.new(error) + end + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/dashboard/validator/custom_formats.rb b/lib/gitlab/metrics/dashboard/validator/custom_formats.rb new file mode 100644 index 00000000000..485e80ad1b7 --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/custom_formats.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + module Dashboard + module Validator + class CustomFormats + def format_handlers + # Key is custom JSON Schema format name. Value is a proc that takes data and schema and handles + # validations. + @format_handlers ||= { + "add_to_metric_id_cache" => ->(data, schema) { metric_ids_cache << data } + } + end + + def metric_ids_cache + @metric_ids_cache ||= [] + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/dashboard/validator/errors.rb b/lib/gitlab/metrics/dashboard/validator/errors.rb new file mode 100644 index 00000000000..0f6e687d291 --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/errors.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + module Dashboard + module Validator + module Errors + InvalidDashboardError = Class.new(StandardError) + + class SchemaValidationError < InvalidDashboardError + def initialize(error = {}) + super(error_message(error)) + end + + private + + def error_message(error) + if error.is_a?(Hash) && error.present? + pretty(error) + else + "Dashboard failed schema validation" + end + end + + # based on https://github.com/davishmcclurg/json_schemer/blob/master/lib/json_schemer/errors.rb + # with addition ability to translate error messages + def pretty(error) + data, data_pointer, type, schema = error.values_at('data', 'data_pointer', 'type', 'schema') + location = data_pointer.empty? ? 'root' : data_pointer + + case type + when 'required' + keys = error.fetch('details').fetch('missing_keys').join(', ') + _("%{location} is missing required keys: %{keys}") % { location: location, keys: keys } + when 'null', 'string', 'boolean', 'integer', 'number', 'array', 'object' + _("'%{data}' at %{location} is not of type: %{type}") % { data: data, location: location, type: type } + when 'pattern' + _("'%{data}' at %{location} does not match pattern: %{pattern}") % { data: data, location: location, pattern: schema.fetch('pattern') } + when 'format' + _("'%{data}' at %{location} does not match format: %{format}") % { data: data, location: location, format: schema.fetch('format') } + when 'const' + _("'%{data}' at %{location} is not: %{const}") % { data: data, location: location, const: schema.fetch('const').inspect } + when 'enum' + _("'%{data}' at %{location} is not one of: %{enum}") % { data: data, location: location, enum: schema.fetch('enum') } + else + _("'%{data}' at %{location} is invalid: error_type=%{type}") % { data: data, location: location, type: type } + end + end + end + + class DuplicateMetricIds < InvalidDashboardError + def initialize + super(_("metric_id must be unique across a project")) + end + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/dashboard/validator/post_schema_validator.rb b/lib/gitlab/metrics/dashboard/validator/post_schema_validator.rb new file mode 100644 index 00000000000..73bfc5a6294 --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/post_schema_validator.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + module Dashboard + module Validator + class PostSchemaValidator + def initialize(metric_ids:, project: nil, dashboard_path: nil) + @metric_ids = metric_ids + @project = project + @dashboard_path = dashboard_path + end + + def validate + errors = [] + errors << uniq_metric_ids + errors.compact + end + + private + + attr_reader :project, :metric_ids, :dashboard_path + + def uniq_metric_ids + return Validator::Errors::DuplicateMetricIds.new if metric_ids.uniq! + + uniq_metric_ids_across_project if project.present? || dashboard_path.present? + end + + # rubocop: disable CodeReuse/ActiveRecord + def uniq_metric_ids_across_project + return ArgumentError.new(_('Both project and dashboard_path are required')) unless + dashboard_path.present? && project.present? + + # If PrometheusMetric identifier is not unique across project and dashboard_path, + # we need to error because we don't know if the user is trying to create a new metric + # or update an existing one. + identifier_on_other_dashboard = PrometheusMetric.where( + project: project, + identifier: metric_ids + ).where.not( + dashboard_path: dashboard_path + ).exists? + + Validator::Errors::DuplicateMetricIds.new if identifier_on_other_dashboard + end + # rubocop: enable CodeReuse/ActiveRecord + end + end + end + end +end diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/axis.json b/lib/gitlab/metrics/dashboard/validator/schemas/axis.json new file mode 100644 index 00000000000..54334022426 --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/schemas/axis.json @@ -0,0 +1,14 @@ +{ + "type": "object", + "properties": { + "name": { "type": "string" }, + "format": { + "type": "string", + "default": "engineering" + }, + "precision": { + "type": "number", + "default": 2 + } + } +} diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/dashboard.json b/lib/gitlab/metrics/dashboard/validator/schemas/dashboard.json new file mode 100644 index 00000000000..313f03be7dc --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/schemas/dashboard.json @@ -0,0 +1,18 @@ +{ + "type": "object", + "required": ["dashboard", "panel_groups"], + "properties": { + "dashboard": { "type": "string" }, + "panel_groups": { + "type": "array", + "items": { "$ref": "./panel_group.json" } + }, + "templating": { + "$ref": "./templating.json" + }, + "links": { + "type": "array", + "items": { "$ref": "./link.json" } + } + } +} diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/link.json b/lib/gitlab/metrics/dashboard/validator/schemas/link.json new file mode 100644 index 00000000000..4ea7b5dd324 --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/schemas/link.json @@ -0,0 +1,12 @@ +{ + "type": "object", + "required": ["url"], + "properties": { + "url": { "type": "string" }, + "title": { "type": "string" }, + "type": { + "type": "string", + "enum": ["grafana"] + } + } +} diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/metric.json b/lib/gitlab/metrics/dashboard/validator/schemas/metric.json new file mode 100644 index 00000000000..13831b77e3e --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/schemas/metric.json @@ -0,0 +1,16 @@ +{ + "type": "object", + "required": ["unit"], + "oneOf": [{ "required": ["query"] }, { "required": ["query_range"] }], + "properties": { + "id": { + "type": "string", + "format": "add_to_metric_id_cache" + }, + "unit": { "type": "string" }, + "label": { "type": "string" }, + "query": { "type": ["string", "number"] }, + "query_range": { "type": ["string", "number"] }, + "step": { "type": "number" } + } +} diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/panel.json b/lib/gitlab/metrics/dashboard/validator/schemas/panel.json new file mode 100644 index 00000000000..011eef53e40 --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/schemas/panel.json @@ -0,0 +1,24 @@ +{ + "type": "object", + "required": ["title", "metrics"], + "properties": { + "type": { + "type": "string", + "enum": ["area-chart", "anomaly-chart", "bar", "column", "stacked-column", "single-stat", "heatmap"], + "default": "area-chart" + }, + "title": { "type": "string" }, + "y_label": { "type": "string" }, + "y_axis": { "$ref": "./axis.json" }, + "max_value": { "type": "number" }, + "weight": { "type": "number" }, + "metrics": { + "type": "array", + "items": { "$ref": "./metric.json" } + }, + "links": { + "type": "array", + "items": { "$ref": "./link.json" } + } + } +} diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/panel_group.json b/lib/gitlab/metrics/dashboard/validator/schemas/panel_group.json new file mode 100644 index 00000000000..1306fc475db --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/schemas/panel_group.json @@ -0,0 +1,12 @@ +{ + "type": "object", + "required": ["group", "panels"], + "properties": { + "group": { "type": "string" }, + "priority": { "type": "number" }, + "panels": { + "type": "array", + "items": { "$ref": "./panel.json" } + } + } +} diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/templating.json b/lib/gitlab/metrics/dashboard/validator/schemas/templating.json new file mode 100644 index 00000000000..6f8664c89af --- /dev/null +++ b/lib/gitlab/metrics/dashboard/validator/schemas/templating.json @@ -0,0 +1,7 @@ +{ + "type": "object", + "required": ["variables"], + "properties": { + "variables": { "type": "object" } + } +} diff --git a/lib/gitlab/metrics/elasticsearch_rack_middleware.rb b/lib/gitlab/metrics/elasticsearch_rack_middleware.rb index 6830eed68d5..870ab148004 100644 --- a/lib/gitlab/metrics/elasticsearch_rack_middleware.rb +++ b/lib/gitlab/metrics/elasticsearch_rack_middleware.rb @@ -4,18 +4,10 @@ module Gitlab module Metrics # Rack middleware for tracking Elasticsearch metrics from Grape and Web requests. class ElasticsearchRackMiddleware - HISTOGRAM_BUCKETS = [0.1, 0.25, 0.5, 1, 2.5, 5, 10, 60].freeze + HISTOGRAM_BUCKETS = [0.1, 0.5, 1, 10, 50].freeze def initialize(app) @app = app - - @requests_total_counter = Gitlab::Metrics.counter(:http_elasticsearch_requests_total, - 'Amount of calls to Elasticsearch servers during web requests', - Gitlab::Metrics::Transaction::BASE_LABELS) - @requests_duration_histogram = Gitlab::Metrics.histogram(:http_elasticsearch_requests_duration_seconds, - 'Query time for Elasticsearch servers during web requests', - Gitlab::Metrics::Transaction::BASE_LABELS, - HISTOGRAM_BUCKETS) end def call(env) @@ -29,12 +21,19 @@ module Gitlab private def record_metrics(transaction) - labels = transaction.labels query_time = ::Gitlab::Instrumentation::ElasticsearchTransport.query_time request_count = ::Gitlab::Instrumentation::ElasticsearchTransport.get_request_count - @requests_total_counter.increment(labels, request_count) - @requests_duration_histogram.observe(labels, query_time) + return unless request_count > 0 + + transaction.increment(:http_elasticsearch_requests_total, request_count) do + docstring 'Amount of calls to Elasticsearch servers during web requests' + end + + transaction.observe(:http_elasticsearch_requests_duration_seconds, query_time) do + docstring 'Query time for Elasticsearch servers during web requests' + buckets HISTOGRAM_BUCKETS + end end end end diff --git a/lib/gitlab/metrics/method_call.rb b/lib/gitlab/metrics/method_call.rb index fbeda3b75e0..c6b0a0c5e76 100644 --- a/lib/gitlab/metrics/method_call.rb +++ b/lib/gitlab/metrics/method_call.rb @@ -4,16 +4,7 @@ module Gitlab module Metrics # Class for tracking timing information about method calls class MethodCall - include Gitlab::Metrics::Methods - BASE_LABELS = { module: nil, method: nil }.freeze - attr_reader :real_time, :cpu_time, :call_count, :labels - - define_histogram :gitlab_method_call_duration_seconds do - docstring 'Method calls real duration' - base_labels Transaction::BASE_LABELS.merge(BASE_LABELS) - buckets [0.01, 0.05, 0.1, 0.5, 1] - with_feature :prometheus_metrics_method_instrumentation - end + attr_reader :real_time, :cpu_time, :call_count # name - The full name of the method (including namespace) such as # `User#sign_in`. @@ -42,8 +33,14 @@ module Gitlab @cpu_time += cpu_time @call_count += 1 - if above_threshold? - self.class.gitlab_method_call_duration_seconds.observe(@transaction.labels.merge(labels), real_time) + if above_threshold? && transaction + label_keys = labels.keys + transaction.observe(:gitlab_method_call_duration_seconds, real_time, labels) do + docstring 'Method calls real duration' + label_keys label_keys + buckets [0.01, 0.05, 0.1, 0.5, 1] + with_feature :prometheus_metrics_method_instrumentation + end end retval @@ -54,6 +51,10 @@ module Gitlab def above_threshold? real_time.in_milliseconds >= ::Gitlab::Metrics.method_call_threshold end + + private + + attr_reader :labels, :transaction end end end diff --git a/lib/gitlab/metrics/methods.rb b/lib/gitlab/metrics/methods.rb index 83a7b925392..2b5d1c710f6 100644 --- a/lib/gitlab/metrics/methods.rb +++ b/lib/gitlab/metrics/methods.rb @@ -69,62 +69,6 @@ module Gitlab raise ArgumentError, "uknown metric type #{type}" end end - - # Fetch and/or initialize counter metric - # @param [Symbol] name - # @param [Hash] opts - def fetch_counter(name, opts = {}, &block) - fetch_metric(:counter, name, opts, &block) - end - - # Fetch and/or initialize gauge metric - # @param [Symbol] name - # @param [Hash] opts - def fetch_gauge(name, opts = {}, &block) - fetch_metric(:gauge, name, opts, &block) - end - - # Fetch and/or initialize histogram metric - # @param [Symbol] name - # @param [Hash] opts - def fetch_histogram(name, opts = {}, &block) - fetch_metric(:histogram, name, opts, &block) - end - - # Fetch and/or initialize summary metric - # @param [Symbol] name - # @param [Hash] opts - def fetch_summary(name, opts = {}, &block) - fetch_metric(:summary, name, opts, &block) - end - - # Define metric accessor method for a Counter - # @param [Symbol] name - # @param [Hash] opts - def define_counter(name, opts = {}, &block) - define_metric(:counter, name, opts, &block) - end - - # Define metric accessor method for a Gauge - # @param [Symbol] name - # @param [Hash] opts - def define_gauge(name, opts = {}, &block) - define_metric(:gauge, name, opts, &block) - end - - # Define metric accessor method for a Histogram - # @param [Symbol] name - # @param [Hash] opts - def define_histogram(name, opts = {}, &block) - define_metric(:histogram, name, opts, &block) - end - - # Define metric accessor method for a Summary - # @param [Symbol] name - # @param [Hash] opts - def define_summary(name, opts = {}, &block) - define_metric(:summary, name, opts, &block) - end end end end diff --git a/lib/gitlab/metrics/methods/metric_options.rb b/lib/gitlab/metrics/methods/metric_options.rb index 8e6ceb74c09..1e488df3e99 100644 --- a/lib/gitlab/metrics/methods/metric_options.rb +++ b/lib/gitlab/metrics/methods/metric_options.rb @@ -4,14 +4,12 @@ module Gitlab module Metrics module Methods class MetricOptions - SMALL_NETWORK_BUCKETS = [0.005, 0.01, 0.1, 1, 10].freeze - def initialize(options = {}) @multiprocess_mode = options[:multiprocess_mode] || :all - @buckets = options[:buckets] || SMALL_NETWORK_BUCKETS - @base_labels = options[:base_labels] || {} + @buckets = options[:buckets] || ::Prometheus::Client::Histogram::DEFAULT_BUCKETS @docstring = options[:docstring] @with_feature = options[:with_feature] + @label_keys = options[:label_keys] || [] end # Documentation describing metric in metrics endpoint '/-/metrics' @@ -40,12 +38,21 @@ module Gitlab end # Base labels are merged with per metric labels - def base_labels(base_labels = nil) - @base_labels = base_labels unless base_labels.nil? + def base_labels + @base_labels ||= @label_keys.product([nil]).to_h @base_labels end + def label_keys(label_keys = nil) + unless label_keys.nil? + @label_keys = label_keys + @base_labels = nil + end + + @label_keys + end + # Use feature toggle to control whether certain metric is enabled/disabled def with_feature(name = nil) @with_feature = name unless name.nil? @@ -55,6 +62,7 @@ module Gitlab def evaluate(&block) instance_eval(&block) if block_given? + self end end diff --git a/lib/gitlab/metrics/rack_middleware.rb b/lib/gitlab/metrics/rack_middleware.rb index c6a0457ffe5..a6884ea6983 100644 --- a/lib/gitlab/metrics/rack_middleware.rb +++ b/lib/gitlab/metrics/rack_middleware.rb @@ -10,8 +10,7 @@ module Gitlab # env - A Hash containing Rack environment details. def call(env) - trans = transaction_from_env(env) - retval = nil + trans = WebTransaction.new(env) begin retval = trans.run { @app.call(env) } @@ -24,21 +23,6 @@ module Gitlab retval end - - def transaction_from_env(env) - trans = WebTransaction.new(env) - - trans.set(:request_uri, filtered_path(env), false) - trans.set(:request_method, env['REQUEST_METHOD'], false) - - trans - end - - private - - def filtered_path(env) - ActionDispatch::Request.new(env).filtered_path.presence || env['REQUEST_URI'] - end end end end diff --git a/lib/gitlab/metrics/redis_rack_middleware.rb b/lib/gitlab/metrics/redis_rack_middleware.rb deleted file mode 100644 index f0f99c5f45d..00000000000 --- a/lib/gitlab/metrics/redis_rack_middleware.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Metrics - # Rack middleware for tracking Redis metrics from Grape and Web requests. - class RedisRackMiddleware - def initialize(app) - @app = app - - @requests_total_counter = Gitlab::Metrics.counter(:http_redis_requests_total, - 'Amount of calls to Redis servers during web requests', - Gitlab::Metrics::Transaction::BASE_LABELS) - @requests_duration_histogram = Gitlab::Metrics.histogram(:http_redis_requests_duration_seconds, - 'Query time for Redis servers during web requests', - Gitlab::Metrics::Transaction::BASE_LABELS, - Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS) - end - - def call(env) - transaction = Gitlab::Metrics.current_transaction - - @app.call(env) - ensure - record_metrics(transaction) - end - - private - - def record_metrics(transaction) - labels = transaction.labels - query_time = Gitlab::Instrumentation::Redis.query_time - request_count = Gitlab::Instrumentation::Redis.get_request_count - - @requests_total_counter.increment(labels, request_count) - @requests_duration_histogram.observe(labels, query_time) - end - end - end -end diff --git a/lib/gitlab/metrics/samplers/threads_sampler.rb b/lib/gitlab/metrics/samplers/threads_sampler.rb new file mode 100644 index 00000000000..05acef7ce0c --- /dev/null +++ b/lib/gitlab/metrics/samplers/threads_sampler.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + module Samplers + class ThreadsSampler < BaseSampler + SAMPLING_INTERVAL_SECONDS = 5 + KNOWN_PUMA_THREAD_NAMES = ['puma worker check pipe', 'puma server', + 'puma threadpool reaper', 'puma threadpool trimmer', + 'puma worker check pipe', 'puma stat payload'].freeze + + SIDEKIQ_WORKER_THREAD_NAME = 'sidekiq_worker_thread' + + METRIC_PREFIX = "gitlab_ruby_threads_" + + METRIC_DESCRIPTIONS = { + max_expected_threads: "Maximum number of threads expected to be running and performing application work", + running_threads: "Number of running Ruby threads by name" + }.freeze + + def metrics + @metrics ||= METRIC_DESCRIPTIONS.each_with_object({}) do |(name, description), result| + result[name] = ::Gitlab::Metrics.gauge(:"#{METRIC_PREFIX}#{name}", description) + end + end + + def sample + metrics[:max_expected_threads].set({}, Gitlab::Runtime.max_threads) + + threads_by_name.each do |name, threads| + uses_db, not_using_db = threads.partition { |thread| thread[:uses_db_connection] } + + set_running_threads(name, uses_db_connection: "yes", size: uses_db.size) + set_running_threads(name, uses_db_connection: "no", size: not_using_db.size) + end + end + + private + + def set_running_threads(name, uses_db_connection:, size:) + metrics[:running_threads].set({ thread_name: name, uses_db_connection: uses_db_connection }, size) + end + + def threads_by_name + Thread.list.group_by { |thread| name_for_thread(thread) } + end + + def uses_db_connection(thread) + thread[:uses_db_connection] ? "yes" : "no" + end + + def name_for_thread(thread) + thread_name = thread.name.to_s.presence + + if thread_name.presence.nil? + 'unnamed' + elsif thread_name =~ /puma threadpool \d+/ + # These are the puma workers processing requests + 'puma threadpool' + elsif use_thread_name?(thread_name) + thread_name + else + 'unrecognized' + end + end + + def use_thread_name?(thread_name) + thread_name == SIDEKIQ_WORKER_THREAD_NAME || + # Samplers defined in `lib/gitlab/metrics/samplers` + thread_name.ends_with?('sampler') || + # Exporters from `lib/gitlab/metrics/exporter` + thread_name.ends_with?('exporter') || + KNOWN_PUMA_THREAD_NAMES.include?(thread_name) + end + end + end + end +end diff --git a/lib/gitlab/metrics/sidekiq_middleware.rb b/lib/gitlab/metrics/sidekiq_middleware.rb index 1c99e1e730c..8c4e5a8d70c 100644 --- a/lib/gitlab/metrics/sidekiq_middleware.rb +++ b/lib/gitlab/metrics/sidekiq_middleware.rb @@ -12,7 +12,9 @@ module Gitlab begin # Old gitlad-shell messages don't provide enqueued_at/created_at attributes enqueued_at = payload['enqueued_at'] || payload['created_at'] || 0 - trans.set(:sidekiq_queue_duration, Time.current.to_f - enqueued_at) + trans.set(:gitlab_transaction_sidekiq_queue_duration_total, Time.current.to_f - enqueued_at) do + multiprocess_mode :livesum + end trans.run { yield } rescue Exception => error # rubocop: disable Lint/RescueException trans.add_event(:sidekiq_exception) diff --git a/lib/gitlab/metrics/subscribers/action_view.rb b/lib/gitlab/metrics/subscribers/action_view.rb index 24107e42aa9..e1f1f37c905 100644 --- a/lib/gitlab/metrics/subscribers/action_view.rb +++ b/lib/gitlab/metrics/subscribers/action_view.rb @@ -5,14 +5,6 @@ module Gitlab module Subscribers # Class for tracking the rendering timings of views. class ActionView < ActiveSupport::Subscriber - include Gitlab::Metrics::Methods - define_histogram :gitlab_view_rendering_duration_seconds do - docstring 'View rendering time' - base_labels Transaction::BASE_LABELS.merge({ path: nil }) - buckets [0.001, 0.01, 0.1, 1, 10.0] - with_feature :prometheus_metrics_view_instrumentation - end - attach_to :action_view SERIES = 'views' @@ -27,10 +19,14 @@ module Gitlab def track(event) tags = tags_for(event) - - self.class.gitlab_view_rendering_duration_seconds.observe(current_transaction.labels.merge(tags), event.duration) - - current_transaction.increment(:view_duration, event.duration) + current_transaction.observe(:gitlab_view_rendering_duration_seconds, event.duration, tags) do + docstring 'View rendering time' + label_keys %i(view) + buckets [0.001, 0.01, 0.1, 1, 10.0] + with_feature :prometheus_metrics_view_instrumentation + end + + current_transaction.increment(:gitlab_transaction_view_duration_total, event.duration) end def relative_path(path) diff --git a/lib/gitlab/metrics/subscribers/active_record.rb b/lib/gitlab/metrics/subscribers/active_record.rb index d2736882432..e53ac00e77f 100644 --- a/lib/gitlab/metrics/subscribers/active_record.rb +++ b/lib/gitlab/metrics/subscribers/active_record.rb @@ -5,20 +5,25 @@ module Gitlab module Subscribers # Class for tracking the total query duration of a transaction. class ActiveRecord < ActiveSupport::Subscriber - include Gitlab::Metrics::Methods attach_to :active_record IGNORABLE_SQL = %w{BEGIN COMMIT}.freeze DB_COUNTERS = %i{db_count db_write_count db_cached_count}.freeze def sql(event) + # Mark this thread as requiring a database connection. This is used + # by the Gitlab::Metrics::Samplers::ThreadsSampler to count threads + # using a connection. + Thread.current[:uses_db_connection] = true + return unless current_transaction payload = event.payload - return if payload[:name] == 'SCHEMA' || IGNORABLE_SQL.include?(payload[:sql]) - self.class.gitlab_sql_duration_seconds.observe(current_transaction.labels, event.duration / 1000.0) + current_transaction.observe(:gitlab_sql_duration_seconds, event.duration / 1000.0) do + buckets [0.05, 0.1, 0.25] + end increment_db_counters(payload) end @@ -33,12 +38,6 @@ module Gitlab private - define_histogram :gitlab_sql_duration_seconds do - docstring 'SQL time' - base_labels Transaction::BASE_LABELS - buckets [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0] - end - def select_sql_command?(payload) payload[:sql].match(/\A((?!(.*[^\w'"](DELETE|UPDATE|INSERT INTO)[^\w'"])))(WITH.*)?(SELECT)((?!(FOR UPDATE|FOR SHARE)).)*$/i) end @@ -54,7 +53,7 @@ module Gitlab end def increment(counter) - current_transaction.increment(counter, 1) + current_transaction.increment("gitlab_transaction_#{counter}_total".to_sym, 1) if Gitlab::SafeRequestStore.active? Gitlab::SafeRequestStore[counter] = Gitlab::SafeRequestStore[counter].to_i + 1 diff --git a/lib/gitlab/metrics/subscribers/rails_cache.rb b/lib/gitlab/metrics/subscribers/rails_cache.rb index 2ee7144fe2f..b274d2b1079 100644 --- a/lib/gitlab/metrics/subscribers/rails_cache.rb +++ b/lib/gitlab/metrics/subscribers/rails_cache.rb @@ -14,11 +14,10 @@ module Gitlab return unless current_transaction return if event.payload[:super_operation] == :fetch - if event.payload[:hit] - current_transaction.increment(:cache_read_hit_count, 1, false) - else - metric_cache_misses_total.increment(current_transaction.labels) - current_transaction.increment(:cache_read_miss_count, 1, false) + unless event.payload[:hit] + current_transaction.increment(:gitlab_cache_misses_total, 1) do + docstring 'Cache read miss' + end end end @@ -37,25 +36,30 @@ module Gitlab def cache_fetch_hit(event) return unless current_transaction - current_transaction.increment(:cache_read_hit_count, 1) + current_transaction.increment(:gitlab_transaction_cache_read_hit_count_total, 1) end def cache_generate(event) return unless current_transaction - metric_cache_misses_total.increment(current_transaction.labels) - current_transaction.increment(:cache_read_miss_count, 1) + current_transaction.increment(:gitlab_cache_misses_total, 1) do + docstring 'Cache read miss' + end + + current_transaction.increment(:gitlab_transaction_cache_read_miss_count_total, 1) end def observe(key, duration) return unless current_transaction - metric_cache_operations_total.increment(current_transaction.labels.merge({ operation: key })) - metric_cache_operation_duration_seconds.observe({ operation: key }, duration / 1000.0) - current_transaction.increment(:cache_duration, duration, false) - current_transaction.increment(:cache_count, 1, false) - current_transaction.increment("cache_#{key}_duration".to_sym, duration, false) - current_transaction.increment("cache_#{key}_count".to_sym, 1, false) + labels = { operation: key } + + current_transaction.increment(:gitlab_cache_operations_total, 1, labels) do + docstring 'Cache operations' + label_keys labels.keys + end + + metric_cache_operation_duration_seconds.observe(labels, duration / 1000.0) end private @@ -64,14 +68,6 @@ module Gitlab Transaction.current end - def metric_cache_operations_total - @metric_cache_operations_total ||= ::Gitlab::Metrics.counter( - :gitlab_cache_operations_total, - 'Cache operations', - Transaction::BASE_LABELS - ) - end - def metric_cache_operation_duration_seconds @metric_cache_operation_duration_seconds ||= ::Gitlab::Metrics.histogram( :gitlab_cache_operation_duration_seconds, @@ -80,14 +76,6 @@ module Gitlab [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0] ) end - - def metric_cache_misses_total - @metric_cache_misses_total ||= ::Gitlab::Metrics.counter( - :gitlab_cache_misses_total, - 'Cache read miss', - Transaction::BASE_LABELS - ) - end end end end diff --git a/lib/gitlab/metrics/templates/Area.metrics-dashboard.yml b/lib/gitlab/metrics/templates/Area.metrics-dashboard.yml new file mode 100644 index 00000000000..1f7dd25aaee --- /dev/null +++ b/lib/gitlab/metrics/templates/Area.metrics-dashboard.yml @@ -0,0 +1,15 @@ +# Only one dashboard should be defined per file +# More info: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html +dashboard: 'Area Panel Example' + +# For more information about the required properties of panel_groups +# please visit: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html#panel-group-panel_groups-properties +panel_groups: + - group: 'Server Statistics' + panels: + - title: Average amount of time spent by the CPU + type: area-chart + metrics: + - query_range: 'rate(node_cpu_seconds_total[15m])' + unit: 'Seconds' + label: "Time in Seconds" diff --git a/lib/gitlab/metrics/templates/Default.metrics-dashboard.yml b/lib/gitlab/metrics/templates/Default.metrics-dashboard.yml new file mode 100644 index 00000000000..b331e792461 --- /dev/null +++ b/lib/gitlab/metrics/templates/Default.metrics-dashboard.yml @@ -0,0 +1,24 @@ +# Only one dashboard should be defined per file +# More info: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html +dashboard: 'Single Stat' + +# This is where all of the variables that can be manipulated via the UI +# are initialized +# Check out: https://docs.gitlab.com/ee/operations/metrics/dashboards/templating_variables.html#templating-variables-for-metrics-dashboards-core +templating: + variables: + job: 'prometheus' + +# For more information about the required properties of panel_groups +# please visit: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html#panel-group-panel_groups-properties +panel_groups: + - group: 'Memory' + panels: + - title: Prometheus + type: single-stat + metrics: + # Queries that make use of variables need to have double curly brackets {} + # set to the variables, per the example below + - query: 'max(go_memstats_alloc_bytes{job="{{job}}"}) / 1024 /1024' + unit: '%' + label: "Max" diff --git a/lib/gitlab/metrics/templates/gauge.metrics-dashboard.yml b/lib/gitlab/metrics/templates/gauge.metrics-dashboard.yml new file mode 100644 index 00000000000..1c17a3a4d40 --- /dev/null +++ b/lib/gitlab/metrics/templates/gauge.metrics-dashboard.yml @@ -0,0 +1,23 @@ +# Only one dashboard should be defined per file +# More info: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html +dashboard: 'Gauge Panel Example' + +# For more information about the required properties of panel_groups +# please visit: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html#panel-group-panel_groups-properties +panel_groups: + - group: 'Server Statistics' + panels: + - title: "Memory usage" + # More information about gauge panel types can be found here: + # https://docs.gitlab.com/ee/operations/metrics/dashboards/panel_types.html#gauge + type: "gauge-chart" + min_value: 0 + max_value: 1024 + split: 10 + thresholds: + mode: "percentage" + values: [60, 90] + format: "megabytes" + metrics: + - query: '(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes)) / 1024 / 1024' + unit: 'MB' diff --git a/lib/gitlab/metrics/templates/index.md b/lib/gitlab/metrics/templates/index.md new file mode 100644 index 00000000000..59fc85899da --- /dev/null +++ b/lib/gitlab/metrics/templates/index.md @@ -0,0 +1,3 @@ +# Development guide for Metrics Dashboard templates + +Please follow [the development guideline](../../../../doc/development/operations/metrics/templates.md) diff --git a/lib/gitlab/metrics/templates/k8s_area.metrics-dashboard.yml b/lib/gitlab/metrics/templates/k8s_area.metrics-dashboard.yml new file mode 100644 index 00000000000..aea816658d0 --- /dev/null +++ b/lib/gitlab/metrics/templates/k8s_area.metrics-dashboard.yml @@ -0,0 +1,15 @@ +# Only one dashboard should be defined per file +# More info: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html +dashboard: 'Area Panel Example' + +# For more information about the required properties of panel_groups +# please visit: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html#panel-group-panel_groups-properties +panel_groups: + - group: 'Server Statistics' + panels: + - title: "Core Usage (Pod Average)" + type: area-chart + metrics: + - query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod)) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))' + unit: 'cores' + label: "Pod Average (in seconds)" diff --git a/lib/gitlab/metrics/templates/k8s_gauge.metrics-dashboard.yml b/lib/gitlab/metrics/templates/k8s_gauge.metrics-dashboard.yml new file mode 100644 index 00000000000..7f97719765b --- /dev/null +++ b/lib/gitlab/metrics/templates/k8s_gauge.metrics-dashboard.yml @@ -0,0 +1,23 @@ +# Only one dashboard should be defined per file +# More info: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html +dashboard: 'Gauge K8s Panel Example' + +# For more information about the required properties of panel_groups +# please visit: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html#panel-group-panel_groups-properties +panel_groups: + - group: 'Server Statistics' + panels: + - title: "Memory usage" + # More information about gauge panel types can be found here: + # https://docs.gitlab.com/ee/operations/metrics/dashboards/panel_types.html#gauge + type: "gauge-chart" + min_value: 0 + max_value: 1024 + split: 10 + thresholds: + mode: "percentage" + values: [60, 90] + format: "megabytes" + metrics: + - query: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024' + unit: 'MB' diff --git a/lib/gitlab/metrics/templates/k8s_single-stat.metrics-dashboard.yml b/lib/gitlab/metrics/templates/k8s_single-stat.metrics-dashboard.yml new file mode 100644 index 00000000000..829e12357ff --- /dev/null +++ b/lib/gitlab/metrics/templates/k8s_single-stat.metrics-dashboard.yml @@ -0,0 +1,17 @@ +# Only one dashboard should be defined per file +# More info: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html +dashboard: 'Single Stat Panel Example' + +# For more information about the required properties of panel_groups +# please visit: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html#panel-group-panel_groups-properties +panel_groups: + - group: 'Server Statistics' + panels: + - title: "Memory usage" + # More information about heatmap panel types can be found here: + # https://docs.gitlab.com/ee/operations/metrics/dashboards/panel_types.html#single-stat + type: "single-stat" + metrics: + - query: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024' + unit: 'MB' + label: "Used memory" diff --git a/lib/gitlab/metrics/templates/single-stat.metrics-dashboard.yml b/lib/gitlab/metrics/templates/single-stat.metrics-dashboard.yml new file mode 100644 index 00000000000..18c27fffc7c --- /dev/null +++ b/lib/gitlab/metrics/templates/single-stat.metrics-dashboard.yml @@ -0,0 +1,17 @@ +# Only one dashboard should be defined per file +# More info: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html +dashboard: 'Heatmap Panel Example' + +# For more information about the required properties of panel_groups +# please visit: https://docs.gitlab.com/ee/operations/metrics/dashboards/yaml.html#panel-group-panel_groups-properties +panel_groups: + - group: 'Server Statistics' + panels: + - title: "Memory usage" + # More information about heatmap panel types can be found here: + # https://docs.gitlab.com/ee/operations/metrics/dashboards/panel_types.html#single-stat + type: "single-stat" + metrics: + - query: '(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes)) / 1024 / 1024' + unit: 'MB' + label: "Used memory" diff --git a/lib/gitlab/metrics/transaction.rb b/lib/gitlab/metrics/transaction.rb index da06be9c79c..95bc90f9dad 100644 --- a/lib/gitlab/metrics/transaction.rb +++ b/lib/gitlab/metrics/transaction.rb @@ -6,20 +6,35 @@ module Gitlab class Transaction include Gitlab::Metrics::Methods - # base labels shared among all transactions - BASE_LABELS = { controller: nil, action: nil, feature_category: nil }.freeze + # base label keys shared among all transactions + BASE_LABEL_KEYS = %i(controller action feature_category).freeze # labels that potentially contain sensitive information and will be filtered - FILTERED_LABELS = [:branch, :path].freeze + FILTERED_LABEL_KEYS = %i(branch path).freeze THREAD_KEY = :_gitlab_metrics_transaction + SMALL_BUCKETS = [0.1, 0.25, 0.5, 1.0, 2.5, 5.0].freeze + # The series to store events (e.g. Git pushes) in. EVENT_SERIES = 'events' attr_reader :method - def self.current - Thread.current[THREAD_KEY] + class << self + def current + Thread.current[THREAD_KEY] + end + + def prometheus_metric(name, type, &block) + fetch_metric(type, name) do + # set default metric options + docstring "#{name.to_s.humanize} #{type}" + + evaluate(&block) + # always filter sensitive labels and merge with base ones + label_keys BASE_LABEL_KEYS | (label_keys - FILTERED_LABEL_KEYS) + end + end end def initialize @@ -27,9 +42,6 @@ module Gitlab @started_at = nil @finished_at = nil - - @memory_before = 0 - @memory_after = 0 end def duration @@ -40,25 +52,22 @@ module Gitlab System.thread_cpu_duration(@thread_cputime_start) end - def allocated_memory - @memory_after - @memory_before - end - def run Thread.current[THREAD_KEY] = self - @memory_before = System.memory_usage_rss @started_at = System.monotonic_time @thread_cputime_start = System.thread_cpu_time yield ensure - @memory_after = System.memory_usage_rss @finished_at = System.monotonic_time - self.class.gitlab_transaction_cputime_seconds.observe(labels, thread_cpu_duration) - self.class.gitlab_transaction_duration_seconds.observe(labels, duration) - self.class.gitlab_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0) + observe(:gitlab_transaction_cputime_seconds, thread_cpu_duration) do + buckets SMALL_BUCKETS + end + observe(:gitlab_transaction_duration_seconds, duration) do + buckets SMALL_BUCKETS + end Thread.current[THREAD_KEY] = nil end @@ -71,8 +80,12 @@ module Gitlab # event_name - The name of the event (e.g. "git_push"). # tags - A set of tags to attach to the event. def add_event(event_name, tags = {}) - filtered_tags = filter_tags(tags) - self.class.transaction_metric(event_name, :counter, prefix: 'event_', tags: filtered_tags).increment(filtered_tags.merge(labels)) + event_name = "gitlab_transaction_event_#{event_name}_total".to_sym + metric = self.class.prometheus_metric(event_name, :counter) do + label_keys tags.keys + end + + metric.increment(filter_labels(tags)) end # Returns a MethodCall object for the given name. @@ -84,52 +97,70 @@ module Gitlab method end - def increment(name, value, use_prometheus = true) - self.class.transaction_metric(name, :counter).increment(labels, value) if use_prometheus - end + # Increment counter metric + # + # It will initialize the metric if metric is not found + # + # block - if provided can be used to initialize metric with custom options (docstring, labels, with_feature) + # + # Example: + # ``` + # transaction.increment(:mestric_name, 1, { docstring: 'Custom title', base_labels: {sane: 'yes'} } ) do + # + # transaction.increment(:mestric_name, 1) do + # docstring 'Custom title' + # label_keys %i(sane) + # end + # ``` + def increment(name, value = 1, labels = {}, &block) + counter = self.class.prometheus_metric(name, :counter, &block) - def set(name, value, use_prometheus = true) - self.class.transaction_metric(name, :gauge).set(labels, value) if use_prometheus + counter.increment(filter_labels(labels), value) end - def labels - BASE_LABELS - end + # Set gauge metric + # + # It will initialize the metric if metric is not found + # + # block - if provided, it can be used to initialize metric with custom options (docstring, labels, with_feature, multiprocess_mode) + # - multiprocess_mode is :all by default + # + # Example: + # ``` + # transaction.set(:mestric_name, 1) do + # multiprocess_mode :livesum + # end + # ``` + def set(name, value, labels = {}, &block) + gauge = self.class.prometheus_metric(name, :gauge, &block) - define_histogram :gitlab_transaction_cputime_seconds do - docstring 'Transaction thread cputime' - base_labels BASE_LABELS - buckets [0.1, 0.25, 0.5, 1.0, 2.5, 5.0] + gauge.set(filter_labels(labels), value) end - define_histogram :gitlab_transaction_duration_seconds do - docstring 'Transaction duration' - base_labels BASE_LABELS - buckets [0.1, 0.25, 0.5, 1.0, 2.5, 5.0] - end + # Observe histogram metric + # + # It will initialize the metric if metric is not found + # + # block - if provided, it can be used to initialize metric with custom options (docstring, labels, with_feature, buckets) + # + # Example: + # ``` + # transaction.observe(:mestric_name, 1) do + # buckets [100, 1000, 10000, 100000, 1000000, 10000000] + # end + # ``` + def observe(name, value, labels = {}, &block) + histogram = self.class.prometheus_metric(name, :histogram, &block) - define_histogram :gitlab_transaction_allocated_memory_bytes do - docstring 'Transaction allocated memory bytes' - base_labels BASE_LABELS - buckets [100, 1000, 10000, 100000, 1000000, 10000000] + histogram.observe(filter_labels(labels), value) end - def self.transaction_metric(name, type, prefix: nil, tags: {}) - metric_name = "gitlab_transaction_#{prefix}#{name}_total".to_sym - fetch_metric(type, metric_name) do - docstring "Transaction #{prefix}#{name} #{type}" - base_labels tags.merge(BASE_LABELS) - - if type == :gauge - multiprocess_mode :livesum - end - end + def labels + BASE_LABEL_KEYS.product([nil]).to_h end - private - - def filter_tags(tags) - tags.without(*FILTERED_LABELS) + def filter_labels(labels) + labels.empty? ? self.labels : labels.without(*FILTERED_LABEL_KEYS).merge(self.labels) end end end |