Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2020-07-20 15:26:25 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2020-07-20 15:26:25 +0300
commita09983ae35713f5a2bbb100981116d31ce99826e (patch)
tree2ee2af7bd104d57086db360a7e6d8c9d5d43667a /config/prometheus
parent18c5ab32b738c0b6ecb4d0df3994000482f34bd8 (diff)
Add latest changes from gitlab-org/gitlab@13-2-stable-ee
Diffstat (limited to 'config/prometheus')
-rw-r--r--config/prometheus/cluster_metrics.yml91
-rw-r--r--config/prometheus/common_metrics.yml24
-rw-r--r--config/prometheus/queries_cluster_metrics.yml65
3 files changed, 117 insertions, 63 deletions
diff --git a/config/prometheus/cluster_metrics.yml b/config/prometheus/cluster_metrics.yml
index f2a41e4c337..1e396f4bbbd 100644
--- a/config/prometheus/cluster_metrics.yml
+++ b/config/prometheus/cluster_metrics.yml
@@ -1,63 +1,40 @@
+dashboard: 'Cluster health'
+priority: 1
+panel_groups:
- group: Cluster Health
- priority: 1
- metrics:
+ priority: 10
+ panels:
- title: "CPU Usage"
+ type: "area-chart"
y_label: "CPU (cores)"
- required_metrics: ['container_cpu_usage_seconds_total']
weight: 1
- queries:
- - query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{id="/"}[15m])) by (job)) without (job)'
- label: Usage (cores)
- unit: "cores"
- appearance:
- line:
- width: 2
- area:
- opacity: 0
- - query_range: 'sum(kube_pod_container_resource_requests_cpu_cores{kubernetes_namespace="gitlab-managed-apps"})'
- label: Requested (cores)
- unit: "cores"
- appearance:
- line:
- width: 2
- area:
- opacity: 0
- - query_range: 'sum(kube_node_status_capacity_cpu_cores{kubernetes_namespace="gitlab-managed-apps"})'
- label: Capacity (cores)
- unit: "cores"
- appearance:
- line:
- type: 'dashed'
- width: 2
- area:
- opacity: 0
- - title: "Memory usage"
+ metrics:
+ - id: cluster_health_cpu_usage
+ query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{id="/"}[15m])) by (job)) without (job)'
+ unit: cores
+ label: Usage (cores)
+ - id: cluster_health_cpu_requested
+ query_range: 'sum(kube_pod_container_resource_requests_cpu_cores{kubernetes_namespace="gitlab-managed-apps"})'
+ unit: cores
+ label: Requested (cores)
+ - id: cluster_health_cpu_capacity
+ query_range: 'sum(kube_node_status_capacity_cpu_cores{kubernetes_namespace="gitlab-managed-apps"})'
+ unit: cores
+ label: Capacity (cores)
+ - title: "Memory Usage"
+ type: "area-chart"
y_label: "Memory (GiB)"
- required_metrics: ['container_memory_usage_bytes']
weight: 1
- queries:
- - query_range: 'avg(sum(container_memory_usage_bytes{id="/"}) by (job)) without (job) / 2^30'
- label: Usage (GiB)
- unit: "GiB"
- appearance:
- line:
- width: 2
- area:
- opacity: 0
- - query_range: 'sum(kube_pod_container_resource_requests_memory_bytes{kubernetes_namespace="gitlab-managed-apps"})/2^30'
- label: Requested (GiB)
- unit: "GiB"
- appearance:
- line:
- width: 2
- area:
- opacity: 0
- - query_range: 'sum(kube_node_status_capacity_memory_bytes{kubernetes_namespace="gitlab-managed-apps"})/2^30'
- label: Capacity (GiB)
- unit: "GiB"
- appearance:
- line:
- type: 'dashed'
- width: 2
- area:
- opacity: 0
+ metrics:
+ - id: cluster_health_memory_usage
+ query_range: 'avg(sum(container_memory_usage_bytes{id="/"}) by (job)) without (job) / 2^30'
+ unit: GiB
+ label: Usage (GiB)
+ - id: cluster_health_memory_requested
+ query_range: 'sum(kube_pod_container_resource_requests_memory_bytes{kubernetes_namespace="gitlab-managed-apps"})/2^30'
+ unit: GiB
+ label: Requested (GiB)
+ - id: cluster_health_memory_capacity
+ query_range: 'sum(kube_node_status_capacity_memory_bytes{kubernetes_namespace="gitlab-managed-apps"})/2^30'
+ unit: GiB
+ label: Capacity (GiB)
diff --git a/config/prometheus/common_metrics.yml b/config/prometheus/common_metrics.yml
index f0491df3db9..d9aaff12a4d 100644
--- a/config/prometheus/common_metrics.yml
+++ b/config/prometheus/common_metrics.yml
@@ -10,7 +10,9 @@ panel_groups:
weight: 4
metrics:
- id: system_metrics_kubernetes_container_memory_total
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024'
+ # Remove the second metric (after OR) when we drop support for K8s 1.13
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/229279
+ query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024'
label: Total (GB)
unit: GB
- title: "Core Usage (Total)"
@@ -19,7 +21,9 @@ panel_groups:
weight: 3
metrics:
- id: system_metrics_kubernetes_container_cores_total
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job)'
+ # Remove the second metric (after OR) when we drop support for K8s 1.13
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/229279
+ query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job)'
label: Total (cores)
unit: "cores"
- title: "Memory Usage (Pod average)"
@@ -28,7 +32,9 @@ panel_groups:
weight: 2
metrics:
- id: system_metrics_kubernetes_container_memory_average
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
+ # Remove the second metric (after OR) when we drop support for K8s 1.13
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/229279
+ query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
label: Pod average (MB)
unit: MB
- title: "Canary: Memory Usage (Pod Average)"
@@ -37,7 +43,9 @@ panel_groups:
weight: 2
metrics:
- id: system_metrics_kubernetes_container_memory_average_canary
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
+ # Remove the second metric (after OR) when we drop support for K8s 1.13
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/229279
+ query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
label: Pod average (MB)
unit: MB
track: canary
@@ -47,7 +55,9 @@ panel_groups:
weight: 1
metrics:
- id: system_metrics_kubernetes_container_core_usage
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
+ # Remove the second metric (after OR) when we drop support for K8s 1.13
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/229279
+ query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod)) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
label: Pod average (cores)
unit: "cores"
- title: "Canary: Core Usage (Pod Average)"
@@ -56,7 +66,9 @@ panel_groups:
weight: 1
metrics:
- id: system_metrics_kubernetes_container_core_usage_canary
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
+ # Remove the second metric (after OR) when we drop support for K8s 1.13
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/229279
+ query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod)) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
label: Pod average (cores)
unit: "cores"
track: canary
diff --git a/config/prometheus/queries_cluster_metrics.yml b/config/prometheus/queries_cluster_metrics.yml
new file mode 100644
index 00000000000..bec3ba22d83
--- /dev/null
+++ b/config/prometheus/queries_cluster_metrics.yml
@@ -0,0 +1,65 @@
+# most likely this file can be removed, but until we are sure and have capacity to tackle that I've
+# only moved it and added https://gitlab.com/gitlab-org/gitlab/-/issues/225869 to track work need to clean up codebase.
+- group: Cluster Health
+ priority: 1
+ metrics:
+ - title: "CPU Usage"
+ y_label: "CPU (cores)"
+ required_metrics: ['container_cpu_usage_seconds_total']
+ weight: 1
+ queries:
+ - query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{id="/"}[15m])) by (job)) without (job)'
+ label: Usage (cores)
+ unit: "cores"
+ appearance:
+ line:
+ width: 2
+ area:
+ opacity: 0
+ - query_range: 'sum(kube_pod_container_resource_requests_cpu_cores{kubernetes_namespace="gitlab-managed-apps"})'
+ label: Requested (cores)
+ unit: "cores"
+ appearance:
+ line:
+ width: 2
+ area:
+ opacity: 0
+ - query_range: 'sum(kube_node_status_capacity_cpu_cores{kubernetes_namespace="gitlab-managed-apps"})'
+ label: Capacity (cores)
+ unit: "cores"
+ appearance:
+ line:
+ type: 'dashed'
+ width: 2
+ area:
+ opacity: 0
+ - title: "Memory usage"
+ y_label: "Memory (GiB)"
+ required_metrics: ['container_memory_usage_bytes']
+ weight: 1
+ queries:
+ - query_range: 'avg(sum(container_memory_usage_bytes{id="/"}) by (job)) without (job) / 2^30'
+ label: Usage (GiB)
+ unit: "GiB"
+ appearance:
+ line:
+ width: 2
+ area:
+ opacity: 0
+ - query_range: 'sum(kube_pod_container_resource_requests_memory_bytes{kubernetes_namespace="gitlab-managed-apps"})/2^30'
+ label: Requested (GiB)
+ unit: "GiB"
+ appearance:
+ line:
+ width: 2
+ area:
+ opacity: 0
+ - query_range: 'sum(kube_node_status_capacity_memory_bytes{kubernetes_namespace="gitlab-managed-apps"})/2^30'
+ label: Capacity (GiB)
+ unit: "GiB"
+ appearance:
+ line:
+ type: 'dashed'
+ width: 2
+ area:
+ opacity: 0