zabbix_export: version: '6.2' date: '2022-04-06T20:01:18Z' groups: - uuid: 748ad4d098d447d492bb935c907f652f name: Templates/Databases templates: - uuid: 8ec72ebbe3204d7789429640abcac610 template: 'TiDB by HTTP' name: 'TiDB by HTTP' description: | The template to monitor TiDB server of TiDB cluster by Zabbix that works without any external scripts. Most of the metrics are collected in one go, thanks to Zabbix bulk data collection. Don't forget to change the macros {$TIDB.URL}, {$TIDB.PORT}. Template `TiDB by HTTP` — collects metrics by HTTP agent from PD /metrics endpoint and from monitoring API. You can discuss this template or leave feedback on our forum https://www.zabbix.com/forum/zabbix-suggestions-and-feedback Template tooling version used: 0.41 groups: - name: Templates/Databases items: - uuid: f6c68d3961e04d5185ab38c12fa25532 name: 'TiDB: CPU' type: DEPENDENT key: tidb.cpu.util delay: '0' history: 7d value_type: FLOAT units: '%' description: 'Total user and system CPU usage ratio.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="process_cpu_seconds_total")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: tidb.get_metrics tags: - tag: component value: cpu - uuid: e1da33fc95ab46bcbe6bc6a68154d172 name: 'TiDB: DDL waiting jobs' type: DEPENDENT key: tidb.ddl_waiting_jobs delay: '0' history: 7d value_type: FLOAT description: 'The number of TiDB operations that resolve locks per second. When TiDB''s read or write request encounters a lock, it tries to resolve the lock.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_ddl_waiting_jobs")].value.sum()' master_item: key: tidb.get_metrics tags: - tag: component value: operations triggers: - uuid: 4125d55d9931455091d2f3a0b25e9678 expression: 'min(/TiDB by HTTP/tidb.ddl_waiting_jobs,5m)>{$TIDB.DDL.WAITING.MAX.WARN}' name: 'TiDB: Too many DDL waiting jobs' event_name: 'TiDB: Too many DDL waiting jobs (over {$TIDB.DDL.WAITING.MAX.WARN} for 5m)' priority: WARNING tags: - tag: scope value: performance - uuid: 0fe0c1dee67e469d9d38193ef7b040ec name: 'TiDB: Load schema failed, rate' type: DEPENDENT key: tidb.domain_load_schema.failed.rate delay: '0' history: 7d value_type: FLOAT description: 'The total number of failures to reload the latest schema information in TiDB per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_domain_load_schema_total && @.labels.type == "failed"")].value.first()' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: domain triggers: - uuid: b4948f37af804c01a2b8887f9e89ec90 expression: 'min(/TiDB by HTTP/tidb.domain_load_schema.failed.rate,5m)>{$TIDB.SCHEMA_LOAD_ERRORS.MAX.WARN}' name: 'TiDB: Too many schema lease errors' event_name: 'TiDB: Too many schema lease errors (over {$TIDB.SCHEMA_LOAD_ERRORS.MAX.WARN} for 5m)' priority: AVERAGE tags: - tag: scope value: availability - uuid: b0e960ddac4d43a3860e8a06dfe09e03 name: 'TiDB: Load schema total, rate' type: DEPENDENT key: tidb.domain_load_schema.rate delay: '0' history: 7d value_type: FLOAT description: 'The statistics of the schemas that TiDB obtains from TiKV per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_domain_load_schema_total")].value.sum()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: domain - uuid: 8fffe502bd2f42368e60d2110f1c3319 name: 'TiDB: Failed Query, rate' type: DEPENDENT key: tidb.execute_error.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of error occurred when executing SQL statements per second (such as syntax errors and primary key conflicts).' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_server_execute_error_total")].value.sum()' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: sql - uuid: 954f5e433a7c44128d7772b87d493270 name: 'TiDB: Get instance metrics' type: HTTP_AGENT key: tidb.get_metrics history: '0' trends: '0' value_type: TEXT description: 'Get TiDB instance metrics.' preprocessing: - type: CHECK_NOT_SUPPORTED parameters: - '' - type: PROMETHEUS_TO_JSON parameters: - '' url: '{$TIDB.URL}:{$TIDB.PORT}/metrics' tags: - tag: component value: raw - uuid: e95ebe1050b8404f8274e243203fdecc name: 'TiDB: Get instance status' type: HTTP_AGENT key: tidb.get_status history: '0' trends: '0' value_type: TEXT description: 'Get TiDB instance status info.' preprocessing: - type: CHECK_NOT_SUPPORTED parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '{"status": "0"}' url: '{$TIDB.URL}:{$TIDB.PORT}/status' tags: - tag: component value: health - tag: component value: raw - uuid: 14cf8296365048a59fd42f03966b1e1e name: 'TiDB: Goroutine count' type: DEPENDENT key: tidb.goroutines delay: '0' history: 7d description: 'The number of Goroutines on TiDB instance.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="go_goroutines")].value.first()' master_item: key: tidb.get_metrics tags: - tag: component value: goroutines - uuid: 78e60bd44e3e4a4a859705c94ef94420 name: 'TiDB: Heap memory usage' type: DEPENDENT key: tidb.heap_bytes delay: '0' history: 7d value_type: FLOAT units: B description: 'Number of heap bytes that are in use.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="go_memstats_heap_inuse_bytes")].value.first()' master_item: key: tidb.get_metrics tags: - tag: component value: memory triggers: - uuid: 575a4821ed8c4a1881b7f9bd264b1929 expression: 'min(/TiDB by HTTP/tidb.heap_bytes,5m)>{$TIDB.HEAP.USAGE.MAX.WARN}' name: 'TiDB: Heap memory usage is too high' event_name: 'TiDB: Heap memory usage is too high (over {$TIDB.HEAP.USAGE.MAX.WARN} for 5m)' priority: WARNING tags: - tag: scope value: capacity - uuid: 104c09cb47f640fb804c136a09aa3bd2 name: 'TiDB: Keep alive, rate' type: DEPENDENT key: tidb.monitor_keep_alive.rate delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of times that the metrics are refreshed on TiDB instance per minute.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_monitor_keep_alive_total")].value.first()' error_handler: DISCARD_VALUE - type: SIMPLE_CHANGE parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: health triggers: - uuid: eb3c09904fa843d78401b00eff4f6a08 expression: 'max(/TiDB by HTTP/tidb.monitor_keep_alive.rate,5m)<{$TIDB.MONITOR_KEEP_ALIVE.MAX.WARN}' name: 'TiDB: Too few keep alive operations' event_name: 'TiDB: Too few keep alive operations (less {$TIDB.MONITOR_KEEP_ALIVE.MAX.WARN} for 5m)' priority: AVERAGE description: 'Indicates whether the TiDB process still exists. If the number of times for tidb_monitor_keep_alive_total increases less than 10 per minute, the TiDB process might already exit and an alert is triggered.' tags: - tag: scope value: availability - uuid: d3ef55b9fd8b4a2aa1cbbc48f15203cb name: 'TiDB: Time jump back, rate' type: DEPENDENT key: tidb.monitor_time_jump_back.rate delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of times that the operating system rewinds every second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_monitor_time_jump_back_total")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: application triggers: - uuid: c3c1f6eab224453b92534d5393aca2b7 expression: 'min(/TiDB by HTTP/tidb.monitor_time_jump_back.rate,5m)>{$TIDB.TIME_JUMP_BACK.MAX.WARN}' name: 'TiDB: Too many time jump backs' event_name: 'TiDB: Too many time jump backs (over {$TIDB.TIME_JUMP_BACK.MAX.WARN} for 5m)' priority: WARNING tags: - tag: scope value: performance - uuid: dce76be0ccbe44969bb09ad29d599790 name: 'TiDB: PD TSO commands, rate' type: DEPENDENT key: tidb.pd_tso_cmd.rate delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of TSO commands that TiDB obtains from PD per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="pd_client_cmd_handle_cmds_duration_seconds_count" && @.labels.type == "tso")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: pd-commands - uuid: 2bde50849f5541c3ac8aa5b26f5b8b52 name: 'TiDB: PD TSO requests, rate' type: DEPENDENT key: tidb.pd_tso_request.rate delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of TSO requests that TiDB obtains from PD per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="pd_client_request_handle_requests_duration_seconds_count" && @.labels.type == "tso")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: pd-commands - uuid: ad8764623b8e46238efc0a94f0766a5b name: 'TiDB: Open file descriptors, max' type: DEPENDENT key: tidb.process_max_fds delay: '0' history: 7d value_type: FLOAT description: 'Maximum number of open file descriptors.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="process_max_fds")].value.first()' master_item: key: tidb.get_metrics tags: - tag: component value: fds - uuid: 04b3d6bf810c449db1e4b79be6b263a9 name: 'TiDB: Open file descriptors' type: DEPENDENT key: tidb.process_open_fds delay: '0' history: 7d value_type: FLOAT description: 'Number of open file descriptors.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="process_open_fds")].value.first()' master_item: key: tidb.get_metrics tags: - tag: component value: fds - uuid: fbee374d0fda4679a693ccccc26e5713 name: 'TiDB: RSS memory usage' type: DEPENDENT key: tidb.rss_bytes delay: '0' history: 7d value_type: FLOAT units: B description: 'Resident memory size in bytes.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="process_resident_memory_bytes")].value.first()' master_item: key: tidb.get_metrics tags: - tag: component value: memory - uuid: 651140aae7334994a31d24568c08a9ab name: 'TiDB: Total "error" server query, rate' type: DEPENDENT key: tidb.server_query.error.rate delay: '0' history: 7d value_type: FLOAT units: Qps description: 'The number of queries on TiDB instance per second with failure of command execution results.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name == "tidb_server_query_total" && @.labels.result == "Error")].value.sum()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: queries - uuid: 00374eab11a14ab1b4e636996519ab80 name: 'TiDB: Total "ok" server query, rate' type: DEPENDENT key: tidb.server_query.ok.rate delay: '0' history: 7d value_type: FLOAT units: Qps description: 'The number of queries on TiDB instance per second with success of command execution results.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name == "tidb_server_query_total" && @.labels.result == "OK")].value.sum()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: queries - uuid: 938e7bb83c714e198db55f3bb009daaf name: 'TiDB: Total server query, rate' type: DEPENDENT key: tidb.server_query.rate delay: '0' history: 7d value_type: FLOAT units: Qps description: 'The number of queries per second on TiDB instance.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name == "tidb_server_query_total")].value.sum()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: queries - uuid: 84824ff459b74679b33e34a1f1e8cc69 name: 'TiDB: Schema lease "change" errors, rate' type: DEPENDENT key: tidb.session_schema_lease_error.change.rate delay: '0' history: 7d value_type: FLOAT description: | The number of schema lease errors per second. "change" means that the schema has changed preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_session_schema_lease_error_total && @.labels.type == "change"")].value.first()' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: sessions - uuid: fe35df7cc7de4b0b8d616b042da99d69 name: 'TiDB: Schema lease "outdate" errors , rate' type: DEPENDENT key: tidb.session_schema_lease_error.outdate.rate delay: '0' history: 7d value_type: FLOAT description: | The number of schema lease errors per second. "outdate" errors means that the schema cannot be updated, which is a more serious error and triggers an alert. preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_session_schema_lease_error_total && @.labels.type == "outdate"")].value.first()' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: sessions triggers: - uuid: b077eb1afe6a4da79707987324fb40c8 expression: 'min(/TiDB by HTTP/tidb.session_schema_lease_error.outdate.rate,5m)>{$TIDB.SCHEMA_LEASE_ERRORS.MAX.WARN}' name: 'TiDB: Too many schema lease errors' event_name: 'TiDB: Too many schema lease errors (over {$TIDB.SCHEMA_LEASE_ERRORS.MAX.WARN} for 5m)' priority: AVERAGE description: 'The latest schema information is not reloaded in TiDB within one lease.' tags: - tag: scope value: availability - uuid: fe8cf5e5c7704db797e76fd9d64e6d17 name: 'TiDB: SQL statements, rate' type: DEPENDENT key: tidb.statement_total.rate delay: '0' history: 7d value_type: FLOAT description: 'The total number of SQL statements executed per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_executor_statement_total")].value.sum()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: sql - uuid: ed86585a496b4b438c521c4765d76b11 name: 'TiDB: Status' type: DEPENDENT key: tidb.status delay: '0' history: 7d trends: '0' value_type: CHAR description: 'Status of PD instance.' valuemap: name: 'Service state' preprocessing: - type: JSONPATH parameters: - $.status error_handler: CUSTOM_VALUE error_handler_params: '1' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: tidb.get_status tags: - tag: component value: health triggers: - uuid: 82a638ac4a3f4b349ee7bb0d53bc1f29 expression: 'last(/TiDB by HTTP/tidb.status)=0' name: 'TiDB: Instance is not responding' priority: AVERAGE tags: - tag: scope value: availability - uuid: b5db09e71f2341b8ac1b9e48cbdefc82 name: 'TiDB: Server connections' type: DEPENDENT key: tidb.tidb_server_connections delay: '0' history: 7d description: 'The connection number of current TiDB instance.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_server_connections")].value.first()' master_item: key: tidb.get_metrics tags: - tag: component value: connections - uuid: 73992d4be61e443eafcc03aaa1bbf4a5 name: 'TiDB: Server critical error, rate' type: DEPENDENT key: tidb.tidb_server_critical_error_total.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of critical errors occurred in TiDB per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_server_critical_error_total")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: server - uuid: f1e2436ced3c456a85650e0622715777 name: 'TiDB: Server panic, rate' type: DEPENDENT key: tidb.tidb_server_panic_total.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of panics occurred in TiDB per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_server_panic_total")].value.first()' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: server triggers: - uuid: c457465731c947eab7b477186d8ba876 expression: 'last(/TiDB by HTTP/tidb.tidb_server_panic_total.rate)>0' name: 'TiDB: There are panicked TiDB threads' priority: AVERAGE description: 'When a panic occurs, an alert is triggered. The thread is often recovered, otherwise, TiDB will frequently restart.' tags: - tag: scope value: availability - uuid: 8a63b326356f4fdbb9cb6e73437348be name: 'TiDB: KV backoff, rate' type: DEPENDENT key: tidb.tikvclient_backoff.rate delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of errors returned by TiKV.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_backoff_total")].value.sum()' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: tikv-commands - uuid: aef7cdfd07f1420c970f20c821fed8dd name: 'TiDB: Lock resolves, rate' type: DEPENDENT key: tidb.tikvclient_lock_resolver_action.rate delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of DDL tasks that are waiting.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_lock_resolver_actions_total")].value.sum()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: tikv-commands - uuid: c116313e63fa45d89bf44772cc9cb3b8 name: 'TiDB: TiClient region errors, rate' type: DEPENDENT key: tidb.tikvclient_region_err.rate delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of region related errors returned by TiKV per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_region_err_total")].value.sum()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: regions triggers: - uuid: d3cb81c46e414ff2a7e411a877b899ef expression: 'min(/TiDB by HTTP/tidb.tikvclient_region_err.rate,5m)>{$TIDB.REGION_ERROR.MAX.WARN}' name: 'TiDB: Too many region related errors' event_name: 'TiDB: Too many region related errors (over {$TIDB.REGION_ERROR.MAX.WARN} for 5m)' priority: AVERAGE tags: - tag: scope value: performance - uuid: 8ddf164df8c9404ba9f7c0f87db3bc2f name: 'TiDB: KV commands, rate' type: DEPENDENT key: tidb.tikvclient_txn.rate delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of executed KV commands per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_txn_cmd_duration_seconds_count")].value.sum()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: tikv-commands - uuid: 20c0fffd66c84a16bb4d8f7882c896bc name: 'TiDB: Uptime' type: DEPENDENT key: tidb.uptime delay: '0' history: 7d value_type: FLOAT units: uptime description: 'The runtime of each TiDB instance.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="process_start_time_seconds")].value.first()' - type: JAVASCRIPT parameters: - | //use boottime to calculate uptime return (Math.floor(Date.now()/1000)-Number(value)); master_item: key: tidb.get_metrics tags: - tag: component value: application triggers: - uuid: 91adc4c6b9364693891faf58c8cced75 expression: 'last(/TiDB by HTTP/tidb.uptime)<10m' name: 'TiDB: has been restarted' event_name: 'TiDB: has been restarted (uptime < 10m)' priority: INFO description: 'Uptime is less than 10 minutes' manual_close: 'YES' tags: - tag: scope value: notice - uuid: 65fa2b077073467387b39bef035b0644 name: 'TiDB: Version' type: DEPENDENT key: tidb.version delay: '0' history: 7d trends: '0' value_type: CHAR description: 'Version of the TiDB instance.' preprocessing: - type: JSONPATH parameters: - $.version - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 3h master_item: key: tidb.get_status tags: - tag: component value: application triggers: - uuid: 5f863fc0944848fdad145f42c94dbea3 expression: 'last(/TiDB by HTTP/tidb.version,#1)<>last(/TiDB by HTTP/tidb.version,#2) and length(last(/TiDB by HTTP/tidb.version))>0' name: 'TiDB: Version has changed' event_name: 'TiDB: Version has changed (new version: {ITEM.VALUE})' priority: INFO description: 'TiDB version has changed. Ack to close.' manual_close: 'YES' tags: - tag: scope value: notice discovery_rules: - uuid: 4db735b652eb451d911f6dc01de6b1ba name: 'KV metrics discovery' type: DEPENDENT key: tidb.kv_ops.discovery delay: '0' description: 'Discovery KV specific metrics.' item_prototypes: - uuid: 79b0a13b25c745d288c313c4197f2e5c name: 'TiDB: KV Commands: {#TYPE}, rate' type: DEPENDENT key: 'tidb.tikvclient_txn.rate[{#TYPE}]' delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of executed KV commands per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_txn_cmd_duration_seconds_count" && @.labels.type == "{#TYPE}")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: tikv-commands - tag: type value: '{#TYPE}' master_item: key: tidb.get_metrics preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_txn_cmd_duration_seconds_count")]' - type: JAVASCRIPT parameters: - | output = JSON.parse(value).map(function(item){ return { "{#TYPE}": item.labels.type, }}) return JSON.stringify({"data": output}) - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h - uuid: bcaf700328b94b59b35cf5bfaf27d5ac name: 'QPS metrics discovery' type: DEPENDENT key: tidb.qps.discovery delay: '0' description: 'Discovery QPS specific metrics.' item_prototypes: - uuid: 3b450f5a853247358a8fe6f31f000575 name: 'TiDB: Server query "Error": {#TYPE}, rate' type: DEPENDENT key: 'tidb.server_query.error.rate[{#TYPE}]' delay: '0' history: 7d value_type: FLOAT units: Qps description: 'The number of queries on TiDB instance per second with failure of command execution results.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name == "tidb_server_query_total" && @.labels.result == "Error" && @.labels.type == "{#TYPE}")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: queries - tag: type value: '{#TYPE}' - uuid: 125bdd3eb1b643f4ad00d58fbed455f6 name: 'TiDB: Server query "OK": {#TYPE}, rate' type: DEPENDENT key: 'tidb.server_query.ok.rate[{#TYPE}]' delay: '0' history: 7d value_type: FLOAT units: Qps description: 'The number of queries on TiDB instance per second with success of command execution results.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name == "tidb_server_query_total" && @.labels.result == "OK" && @.labels.type == "{#TYPE}")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: queries - tag: type value: '{#TYPE}' master_item: key: tidb.get_metrics preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_server_query_total")]' - type: JAVASCRIPT parameters: - | var lookup = {}, result = []; JSON.parse(value).forEach(function (item) { var type = item.labels.type; if (!(lookup[type])) { lookup[type] = 1; result.push({ "{#TYPE}": type }); } }) return JSON.stringify(result); - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h - uuid: e3c23b94a9514389b37e3911f79db8f8 name: 'Statement metrics discovery' type: DEPENDENT key: tidb.statement.discover delay: '0' description: 'Discovery statement specific metrics.' item_prototypes: - uuid: 111ffa26edd247eb9325f5ab5f5f3f94 name: 'TiDB: SQL statements: {#TYPE}, rate' type: DEPENDENT key: 'tidb.statement.rate[{#TYPE}]' delay: '0' history: 7d value_type: FLOAT description: 'The number of SQL statements executed per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_executor_statement_total" && @.labels.type == "{#TYPE}")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: sql - tag: type value: '{#TYPE}' master_item: key: tidb.get_metrics preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_executor_statement_total")]' - type: JAVASCRIPT parameters: - | output = JSON.parse(value).map(function(item){ return { "{#TYPE}": item.labels.type, }}) return JSON.stringify({"data": output}) - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h - uuid: f9af6d56e612459583a1f5d3b4a7d61b name: 'KV backoff discovery' type: DEPENDENT key: tidb.tikvclient_backoff.discovery delay: '0' description: 'Discovery KV backoff specific metrics.' item_prototypes: - uuid: a11cf69675654e7ca70dbab255509bb8 name: 'TiDB: KV backoff: {#TYPE}, rate' type: DEPENDENT key: 'tidb.tikvclient_backoff.rate[{#TYPE}]' delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of TiDB operations that resolve locks per second. When TiDB''s read or write request encounters a lock, it tries to resolve the lock.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_backoff_total" && @.labels.type == "{#TYPE}")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: tikv-backoff - tag: type value: '{#TYPE}' master_item: key: tidb.get_metrics preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_backoff_total")]' error_handler: DISCARD_VALUE - type: JAVASCRIPT parameters: - | output = JSON.parse(value).map(function(item){ return { "{#TYPE}": item.labels.type, }}) return JSON.stringify({"data": output}) - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h - uuid: 72d1f090ce9c4f89b06e4195989e4b2b name: 'GC action results discovery' type: DEPENDENT key: tidb.tikvclient_gc_action.discovery delay: '0' description: 'Discovery GC action results metrics.' item_prototypes: - uuid: e3c19674dcdf4fdea1f8cd82e3df9f72 name: 'TiDB: GC action result: {#TYPE}, rate' type: DEPENDENT key: 'tidb.tikvclient_gc_action.rate[{#TYPE}]' delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of results of GC-related operations per second.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_gc_action_result" && @.labels.type == "{#TYPE}")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: gc - tag: type value: '{#TYPE}' trigger_prototypes: - uuid: 2d24956d0a4f459fad155590376696c1 expression: 'min(/TiDB by HTTP/tidb.tikvclient_gc_action.rate[{#TYPE}],5m)>{$TIDB.GC_ACTIONS.ERRORS.MAX.WARN}' name: 'TiDB: Too many failed GC-related operations' event_name: 'TiDB: Too many failed GC-related operations (over {$TIDB.GC_ACTIONS.ERRORS.MAX.WARN} in 5m)' discover: NO_DISCOVER priority: WARNING tags: - tag: scope value: performance master_item: key: tidb.get_metrics preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_gc_action_result")]' error_handler: DISCARD_VALUE - type: JAVASCRIPT parameters: - | output = JSON.parse(value).map(function(item){ return { "{#TYPE}": item.labels.type, }}) return JSON.stringify({"data": output}) - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h overrides: - name: 'Failed GC-related operations trigger' step: '1' filter: conditions: - macro: '{#TYPE}' value: failed formulaid: A operations: - operationobject: TRIGGER_PROTOTYPE operator: LIKE value: 'Too many failed GC-related operations' status: ENABLED discover: DISCOVER - uuid: a4f2e1902cc54f0db9e9c345c63285e2 name: 'Lock resolves discovery' type: DEPENDENT key: tidb.tikvclient_lock_resolver_action.discovery delay: '0' description: 'Discovery lock resolves specific metrics.' item_prototypes: - uuid: 2eebf2ccc44a4cadb0b70e153b70b3a2 name: 'TiDB: Lock resolves: {#TYPE}, rate' type: DEPENDENT key: 'tidb.tikvclient_lock_resolver_action.rate[{#TYPE}]' delay: '0' history: 7d value_type: FLOAT units: Ops description: 'The number of TiDB operations that resolve locks per second. When TiDB''s read or write request encounters a lock, it tries to resolve the lock.' preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_lock_resolver_actions_total" && @.labels.type == "{#TYPE}")].value.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: tidb.get_metrics tags: - tag: component value: locks - tag: type value: '{#TYPE}' master_item: key: tidb.get_metrics preprocessing: - type: JSONPATH parameters: - '$[?(@.name=="tidb_tikvclient_lock_resolver_actions_total")]' - type: JAVASCRIPT parameters: - | output = JSON.parse(value).map(function(item){ return { "{#TYPE}": item.labels.type, }}) return JSON.stringify({"data": output}) - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h tags: - tag: class value: database - tag: target value: tidb macros: - macro: '{$TIDB.DDL.WAITING.MAX.WARN}' value: '5' description: 'Maximum number of DDL tasks that are waiting' - macro: '{$TIDB.GC_ACTIONS.ERRORS.MAX.WARN}' value: '1' description: 'Maximum number of GC-related operations failures' - macro: '{$TIDB.HEAP.USAGE.MAX.WARN}' value: 10G description: 'Maximum heap memory used' - macro: '{$TIDB.MONITOR_KEEP_ALIVE.MAX.WARN}' value: '10' description: 'Minimum number of keep alive operations' - macro: '{$TIDB.OPEN.FDS.MAX.WARN}' value: '90' description: 'Maximum percentage of used file descriptors' - macro: '{$TIDB.PORT}' value: '10080' description: 'The port of TiDB server metrics web endpoint' - macro: '{$TIDB.REGION_ERROR.MAX.WARN}' value: '50' description: 'Maximum number of region related errors' - macro: '{$TIDB.SCHEMA_LEASE_ERRORS.MAX.WARN}' value: '0' description: 'Maximum number of schema lease errors' - macro: '{$TIDB.SCHEMA_LOAD_ERRORS.MAX.WARN}' value: '1' description: 'Maximum number of load schema errors' - macro: '{$TIDB.TIME_JUMP_BACK.MAX.WARN}' value: '1' description: 'Maximum number of times that the operating system rewinds every second' - macro: '{$TIDB.URL}' value: localhost description: 'TiDB server URL' valuemaps: - uuid: 047f0303f1bc424a959f5d0ceaab77c7 name: 'Service state' mappings: - value: '0' newvalue: Down - value: '1' newvalue: Up triggers: - uuid: cfd6ba0dbf294b9e9ab6afc7d44be7b3 expression: 'min(/TiDB by HTTP/tidb.process_open_fds,5m)/last(/TiDB by HTTP/tidb.process_max_fds)*100>{$TIDB.OPEN.FDS.MAX.WARN}' name: 'TiDB: Current number of open files is too high' event_name: 'TiDB: Current number of open files is too high (over {$TIDB.OPEN.FDS.MAX.WARN}% for 5m)' priority: WARNING description: 'Heavy file descriptor usage (i.e., near the process''s file descriptor limit) indicates a potential file descriptor exhaustion issue.' tags: - tag: scope value: capacity graphs: - uuid: ee25671d0b5446348341be56967a74b2 name: 'TiDB: File descriptors' graph_items: - drawtype: GRADIENT_LINE color: 1A7C11 item: host: 'TiDB by HTTP' key: tidb.process_open_fds - sortorder: '1' drawtype: BOLD_LINE color: 2774A4 item: host: 'TiDB by HTTP' key: tidb.process_max_fds - uuid: fb29a5ea3a62416f8eeee804a6f83c46 name: 'TiDB: Memory usage' graph_items: - color: 1A7C11 item: host: 'TiDB by HTTP' key: tidb.heap_bytes - sortorder: '1' color: 2774A4 item: host: 'TiDB by HTTP' key: tidb.rss_bytes - uuid: 5ab746ad86ef4710948263d4d6157742 name: 'TiDB: Server query rate' graph_items: - color: 1A7C11 item: host: 'TiDB by HTTP' key: tidb.server_query.rate - sortorder: '1' color: 2774A4 item: host: 'TiDB by HTTP' key: tidb.server_query.ok.rate - sortorder: '2' color: F63100 item: host: 'TiDB by HTTP' key: tidb.server_query.error.rate