zabbix_export: version: '6.2' date: '2022-04-13T21:49:37Z' groups: - uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6 name: Templates/Applications templates: - uuid: b25b8b517a4743c48037bfa10af3dc3c template: 'Etcd by HTTP' name: 'Etcd by HTTP' description: | Get Etcd metrics by HTTP agent. Template tooling version used: 0.41 groups: - name: Templates/Applications items: - uuid: 32a59c8e93e141d6a471266df6dbfbd3 name: 'Etcd: Cluster version' type: DEPENDENT key: etcd.cluster.version delay: '0' history: 7d trends: '0' value_type: CHAR description: 'Version of the Etcd cluster.' preprocessing: - type: JSONPATH parameters: - $.etcdcluster - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: etcd.get_version tags: - tag: component value: application triggers: - uuid: 7c87424c4fb34c56bc9b976755b4ec02 expression: 'last(/Etcd by HTTP/etcd.cluster.version,#1)<>last(/Etcd by HTTP/etcd.cluster.version,#2) and length(last(/Etcd by HTTP/etcd.cluster.version))>0' name: 'Etcd: Cluster version has changed' event_name: 'Etcd: Cluster version has changed (new version: {ITEM.VALUE})' priority: INFO description: 'Etcd version has changed. Ack to close.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: 04b0fa552b7d4267b4c5b67ee82ef5f1 name: 'Etcd: CPU' type: DEPENDENT key: etcd.cpu.util delay: '0' history: 7d value_type: FLOAT units: s description: 'Total user and system CPU time spent in seconds.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_cpu_seconds_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: cpu - uuid: bfa5bd42637642808802f7b2485a0c4d name: 'Etcd: DB size' type: DEPENDENT key: etcd.db.size delay: '0' history: 7d value_type: FLOAT units: B description: 'Total size of the underlying database.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_db_total_size_in_bytes - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: database - uuid: a5bc7ffb090641ab92f537b38b6055e5 name: 'Etcd: Deletes per second' type: DEPENDENT key: etcd.delete.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of deletes seen by this member per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_delete_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: operations - uuid: 183843bd93f84dc887a03fb638b2d323 name: 'Etcd: Pending events' type: DEPENDENT key: etcd.events.sent.rate delay: '0' history: 7d description: 'Total number of pending events to be sent.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_pending_events_total - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: events - uuid: 34ffab33275a400ab88e5217dee5ef96 name: 'Etcd: Get node metrics' type: HTTP_AGENT key: etcd.get_metrics history: '0' trends: '0' value_type: TEXT authtype: BASIC username: '{$ETCD.USER}' password: '{$ETCD.PASSWORD}' url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/metrics' tags: - tag: component value: raw - uuid: fdf7593420ab42b2a5af8f8b8030b517 name: 'Etcd: Get version' type: HTTP_AGENT key: etcd.get_version history: '0' trends: '0' value_type: TEXT url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/version' tags: - tag: component value: application - uuid: d23baf75628043e193ba0a607e1b4215 name: 'Etcd: RPCs received per second' type: DEPENDENT key: etcd.grpc.received.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of RPC stream messages received on the server.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - grpc_server_msg_received_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: eda81182710e47e1b5f2f21bb05b4775 name: 'Etcd: RPCs sent per second' type: DEPENDENT key: etcd.grpc.sent.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of gRPC stream messages sent by the server.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - grpc_server_msg_sent_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: bf59a130b20d480d93eb9330750e8e28 name: 'Etcd: RPCs started per second' type: DEPENDENT key: etcd.grpc.started.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of RPCs started on the server.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - grpc_server_started_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: e03575f4c472410eb6fbcf731ac6aab2 name: 'Etcd: Server has a leader' type: DEPENDENT key: etcd.has.leader delay: '0' history: 7d description: 'Whether or not a leader exists. 1 is existence, 0 is not.' valuemap: name: 'Etcd leader' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_has_leader - value - '' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m master_item: key: etcd.get_metrics tags: - tag: component value: leader triggers: - uuid: 20165719d521453bb239d818ac57805c expression: 'last(/Etcd by HTTP/etcd.has.leader)=0' name: 'Etcd: Member has no leader' priority: AVERAGE description: 'If a member does not have a leader, it is totally unavailable.' tags: - tag: scope value: availability - uuid: 3fde4db8b9684ba4b56ba915e48957b5 name: 'Etcd: Node health' type: HTTP_AGENT key: etcd.health history: 7d authtype: BASIC username: '{$ETCD.USER}' password: '{$ETCD.PASSWORD}' valuemap: name: 'Etcd healthcheck' preprocessing: - type: JSONPATH parameters: - $.health - type: BOOL_TO_DECIMAL parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/health' tags: - tag: component value: health triggers: - uuid: 6acda7bdc9df4a4ab5b7cca76c6369f0 expression: 'last(/Etcd by HTTP/etcd.health)=0' name: 'Etcd: Node healthcheck failed' opdata: 'Current state: {ITEM.LASTVALUE1}' priority: AVERAGE description: 'https://etcd.io/docs/v3.4.0/op-guide/monitoring/#health-check' dependencies: - name: 'Etcd: Service is unavailable' expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0' tags: - tag: scope value: availability - uuid: 923a408dd4514e808b6e2137a94f8140 name: 'Etcd: HTTP 4XX' type: DEPENDENT key: etcd.http.requests.4xx.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'Number of handle failures of requests (non-watches), by method (GET/PUT etc.), and code 4XX.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - 'etcd_http_failed_total{code=~"4.+"}' - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: http - tag: http-code value: 4xx - uuid: c0f27d4bfba344079a31ce8c10b22683 name: 'Etcd: HTTP 5XX' type: DEPENDENT key: etcd.http.requests.5xx.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'Number of handle failures of requests (non-watches), by method (GET/PUT etc.), and code 5XX.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - 'etcd_http_failed_total{code=~"5.+"}' - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: http - tag: http-code value: 5xx triggers: - uuid: 0302957e0f6b43389546e1cfb958ed9c expression: 'min(/Etcd by HTTP/etcd.http.requests.5xx.rate,5m)>{$ETCD.HTTP.FAIL.MAX.WARN}' name: 'Etcd: Too many HTTP requests failures' event_name: 'Etcd: Too many HTTP requests failures (over {$ETCD.HTTP.FAIL.MAX.WARN} for 5m)' priority: WARNING description: 'Too many requests failed on etcd instance with 5xx HTTP code.' tags: - tag: scope value: availability - uuid: 2a19db1c58ee4a509061fcb1b557c1a3 name: 'Etcd: HTTP requests received' type: DEPENDENT key: etcd.http.requests.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'Number of requests received into the system (successfully parsed and authd).' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - etcd_http_received_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: http - uuid: b3760811472440baad6a338f481ba13a name: 'Etcd: Server is a leader' type: DEPENDENT key: etcd.is.leader delay: '0' history: 7d description: 'Whether or not this member is a leader. 1 if is, 0 otherwise.' valuemap: name: 'Etcd leader' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_is_leader - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m master_item: key: etcd.get_metrics tags: - tag: component value: leader triggers: - uuid: 44d66eb1a332418daf4c3a1110db5458 expression: 'nodata(/Etcd by HTTP/etcd.is.leader,30m)=1' name: 'Etcd: Failed to fetch info data' event_name: 'Etcd: Failed to fetch info data (or no data for 30m)' priority: WARNING description: 'Zabbix has not received data for items for the last 30 minutes.' manual_close: 'YES' dependencies: - name: 'Etcd: Service is unavailable' expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0' tags: - tag: scope value: notice - uuid: ecd1ae9c038f4fc2b720ad562ced0191 name: 'Etcd: Keys compacted per second' type: DEPENDENT key: etcd.keys.compacted.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of DB keys compacted per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_db_compaction_keys_total - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: keys - uuid: a3f910efb0a04cc494c07b8703f9d2ec name: 'Etcd: Keys expired per second' type: DEPENDENT key: etcd.keys.expired.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of expired keys per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_store_expires_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: keys - uuid: fbda737014544cf1bcf544a48aa6e48b name: 'Etcd: Keys total' type: DEPENDENT key: etcd.keys.total delay: '0' history: 7d description: 'Total number of keys.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_keys_total - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: keys - uuid: e45ba61d99b8432b86f5797a2cfdb416 name: 'Etcd: Leader changes' type: DEPENDENT key: etcd.leader.changes delay: '0' history: 7d description: 'The number of leader changes the member has seen since its start.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_leader_changes_seen_total - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: leader triggers: - uuid: 1ea623927179489890a5a73eeb8177f7 expression: '(max(/Etcd by HTTP/etcd.leader.changes,15m)-min(/Etcd by HTTP/etcd.leader.changes,15m))>{$ETCD.LEADER.CHANGES.MAX.WARN}' name: 'Etcd: Instance has seen too many leader changes' event_name: 'Etcd: Instance has seen too many leader changes (over {$ETCD.LEADER.CHANGES.MAX.WARN} for 15m)''' priority: WARNING description: 'Rapid leadership changes impact the performance of etcd significantly. It also signals that the leader is unstable, perhaps due to network connectivity issues or excessive load hitting the etcd cluster.' tags: - tag: scope value: availability - uuid: 348e15d2ec3a4bb88e2ca371f96c2f00 name: 'Etcd: Maximum open file descriptors' type: DEPENDENT key: etcd.max.fds delay: '0' history: 7d value_type: FLOAT description: 'The Maximum number of open file descriptors.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_max_fds - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: fds - uuid: d016b8674ebd4251943f2e94b22f5ff2 name: 'Etcd: Client gRPC received bytes per second' type: DEPENDENT key: etcd.network.grpc.received.rate delay: '0' history: 7d value_type: FLOAT units: Bps description: 'The number of bytes received from grpc clients per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_network_client_grpc_received_bytes_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: e50d2d088c6448dbb3ecaeebc3b2b8f1 name: 'Etcd: Client gRPC sent bytes per second' type: DEPENDENT key: etcd.network.grpc.sent.rate delay: '0' history: 7d value_type: FLOAT units: Bps description: 'The number of bytes sent from grpc clients per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_network_client_grpc_sent_bytes_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: d5099d4cdb3044ba95935c2aea2b6352 name: 'Etcd: Open file descriptors' type: DEPENDENT key: etcd.open.fds delay: '0' history: 7d value_type: FLOAT description: 'Number of open file descriptors.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_open_fds - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: fds - uuid: a2927b1e85af41cab9c28b1b79c229ea name: 'Etcd: Proposals applied per second' type: DEPENDENT key: etcd.proposals.applied.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of consensus proposals applied.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_proposals_applied_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: proposals - uuid: e829f3df055e42dfbce5f27eb7ca487c name: 'Etcd: Proposals committed per second' type: DEPENDENT key: etcd.proposals.committed.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of consensus proposals committed.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_proposals_committed_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: proposals - uuid: 6b45b99526394a219d31b5c22cb98c85 name: 'Etcd: Proposals failed per second' type: DEPENDENT key: etcd.proposals.failed.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of failed proposals seen.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_proposals_failed_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: proposals triggers: - uuid: 432cea8bddd742ca98528be2fbc7e35e expression: 'min(/Etcd by HTTP/etcd.proposals.failed.rate,5m)>{$ETCD.PROPOSAL.FAIL.MAX.WARN}' name: 'Etcd: Too many proposal failures' event_name: 'Etcd: Too many proposal failures (over {$ETCD.PROPOSAL.FAIL.MAX.WARN} for 5m)''' priority: WARNING description: 'Normally related to two issues: temporary failures related to a leader election or longer downtime caused by a loss of quorum in the cluster.' tags: - tag: scope value: performance - uuid: 1c506ff69e7b4564a6d95fd35b1a11fd name: 'Etcd: Proposals pending' type: DEPENDENT key: etcd.proposals.pending delay: '0' history: 7d description: 'The current number of pending proposals to commit.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_proposals_pending - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: proposals triggers: - uuid: 5feefc4dd5d14fe2b56dd63029b57026 expression: 'min(/Etcd by HTTP/etcd.proposals.pending,5m)>{$ETCD.PROPOSAL.PENDING.MAX.WARN}' name: 'Etcd: Too many proposals are queued to commit' event_name: 'Etcd: Too many proposals are queued to commit (over {$ETCD.PROPOSAL.PENDING.MAX.WARN} for 5m)''' priority: WARNING description: 'Rising pending proposals suggests there is a high client load or the member cannot commit proposals.' tags: - tag: scope value: performance - uuid: bd7398507c274bfab53339380df16761 name: 'Etcd: PUT per second' type: DEPENDENT key: etcd.put.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of puts seen by this member per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_put_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: operations - uuid: b744c07f3290467b96b21ea38ad5d497 name: 'Etcd: Range per second' type: DEPENDENT key: etcd.range.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of ranges seen by this member per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_range_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: operations - uuid: 88c91b36eca94fd2b357a67d171dc621 name: 'Etcd: Reads per second' type: DEPENDENT key: etcd.reads.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'Number of reads action by (get/getRecursive), local to this member.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - etcd_debugging_store_reads_total - type: JAVASCRIPT parameters: - | //calculates total reads var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: store - uuid: 4b881e32094e4f478c5d0849cb5d07a7 name: 'Etcd: Resident memory' type: DEPENDENT key: etcd.res.bytes delay: '0' history: 7d value_type: FLOAT units: B description: 'Resident memory size in bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_resident_memory_bytes - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: memory - uuid: dee9ed8897cf4d3582957707ea09cdf8 name: 'Etcd: Server version' type: DEPENDENT key: etcd.server.version delay: '0' history: 7d trends: '0' value_type: CHAR description: 'Version of the Etcd server.' preprocessing: - type: JSONPATH parameters: - $.etcdserver - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: etcd.get_version tags: - tag: component value: application triggers: - uuid: cfb2fc467b224ef694d59b5c081ed965 expression: 'last(/Etcd by HTTP/etcd.server.version,#1)<>last(/Etcd by HTTP/etcd.server.version,#2) and length(last(/Etcd by HTTP/etcd.server.version))>0' name: 'Etcd: Server version has changed' event_name: 'Etcd: Server version has changed (new version: {ITEM.VALUE})' priority: INFO description: 'Etcd version has changed. Ack to close.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: b14c787c716146e990bc388d277a2803 name: 'Etcd: Transaction per second' type: DEPENDENT key: etcd.txn.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of transactions seen by this member per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_range_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: transactions - uuid: 98ec9085d621446aa462efc86cf93905 name: 'Etcd: Uptime' type: DEPENDENT key: etcd.uptime delay: '0' history: 7d value_type: FLOAT units: s description: 'Etcd server uptime.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_start_time_seconds - value - '' - type: JAVASCRIPT parameters: - | //use boottime to calculate uptime return (Math.floor(Date.now()/1000)-Number(value)); master_item: key: etcd.get_metrics tags: - tag: component value: application triggers: - uuid: 6843369969f5410a840494104d71fe1f expression: 'last(/Etcd by HTTP/etcd.uptime)<10m' name: 'Etcd: has been restarted' event_name: 'Etcd: has been restarted (uptime < 10m)' priority: INFO description: 'Uptime is less than 10 minutes.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: c35810b8b7bc4a62970b5293fb2d8fb2 name: 'Etcd: Virtual memory' type: DEPENDENT key: etcd.virtual.bytes delay: '0' history: 7d value_type: FLOAT units: B description: 'Virtual memory size in bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_virtual_memory_bytes - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: memory - uuid: 16c041fc189248bfaaa5826ffaf38459 name: 'Etcd: Writes per second' type: DEPENDENT key: etcd.writes.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'Number of writes (e.g. set/compareAndDelete) seen by this member.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - etcd_debugging_store_writes_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: store - uuid: a0f94f429b99432e86d15ffa74d6eada name: 'Etcd: Service''s TCP port state' type: SIMPLE key: 'net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"]' history: 7d valuemap: name: 'Service state' preprocessing: - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m tags: - tag: component value: health - tag: component value: network triggers: - uuid: 74164f0783ae4227ba44f3e865fee3bd expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0' name: 'Etcd: Service is unavailable' priority: AVERAGE manual_close: 'YES' tags: - tag: scope value: availability discovery_rules: - uuid: 5e6121383e5d4f3eb1150a2068a4633b name: 'gRPC codes discovery' type: DEPENDENT key: etcd.grpc_code.discovery delay: '0' filter: evaltype: AND conditions: - macro: '{#GRPC.CODE}' value: '{$ETCD.GRPC_CODE.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: A - macro: '{#GRPC.CODE}' value: '{$ETCD.GRPC_CODE.MATCHES}' formulaid: B item_prototypes: - uuid: 7d316cbec2ce4718ac133d90b7a89585 name: 'Etcd: RPCs completed with code {#GRPC.CODE}' type: DEPENDENT key: 'etcd.grpc.handled.rate[{#GRPC.CODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of RPCs completed on the server with grpc_code {#GRPC.CODE}.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - 'grpc_server_handled_total{grpc_method="{#GRPC.CODE}"}' - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - tag: grpc-code value: '{#GRPC.CODE}' trigger_prototypes: - uuid: 459b6ee5735047d597a6a4ab41b76e21 expression: 'min(/Etcd by HTTP/etcd.grpc.handled.rate[{#GRPC.CODE}],5m)>{$ETCD.GRPC.ERRORS.MAX.WARN}' name: 'Etcd: Too many failed gRPC requests with code: {#GRPC.CODE}' event_name: 'Etcd: Too many failed gRPC requests with code: {#GRPC.CODE} (over {$ETCD.GRPC.ERRORS.MAX.WARN} in 5m)' priority: WARNING tags: - tag: scope value: availability master_item: key: etcd.get_metrics preprocessing: - type: PROMETHEUS_TO_JSON parameters: - grpc_server_handled_total - type: JAVASCRIPT parameters: - | var data = JSON.parse(value), lookup = {}, result =[]; for (var item, i = 0; item = data[i++];) { var code = item.labels.grpc_code; if (!(code in lookup)) { lookup[code] = 1; result.push({ "{#GRPC.CODE}": code}); } } return JSON.stringify(result); - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h overrides: - name: trigger step: '1' filter: conditions: - macro: '{#GRPC.CODE}' value: '{$ETCD.GRPC_CODE.TRIGGER.MATCHES}' formulaid: A operations: - operationobject: TRIGGER_PROTOTYPE operator: LIKE value: 'Too many failed gRPC requests' status: ENABLED discover: DISCOVER - uuid: b7b527ee30b84a569afcd1f85b705810 name: 'Peers discovery' type: DEPENDENT key: etcd.peer.discovery delay: '0' item_prototypes: - uuid: 4129aa7b8acf4ca3b5476461fe5275c9 name: 'Etcd: Etcd peer {#ETCD.PEER}: Bytes received' type: DEPENDENT key: 'etcd.bytes.received.rate[{#ETCD.PEER}]' delay: '0' history: 7d value_type: FLOAT units: Bps description: 'The number of bytes received from peer with ID {#ETCD.PEER}.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'etcd_network_peer_received_bytes_total{From="{#ETCD.PEER}"}' - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: network - tag: component value: peers - tag: peer value: '{#ETCD.PEER}' - uuid: 8f5fecbabe474baaab40df46879401af name: 'Etcd: Etcd peer {#ETCD.PEER}: Bytes sent' type: DEPENDENT key: 'etcd.bytes.sent.rate[{#ETCD.PEER}]' delay: '0' history: 7d value_type: FLOAT units: Bps description: 'The number of bytes sent to peer with ID {#ETCD.PEER}.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'etcd_network_peer_sent_bytes_total{To="{#ETCD.PEER}"}' - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: network - tag: component value: peers - tag: peer value: '{#ETCD.PEER}' - uuid: 2521ccfc16fc43069001883b85aa0243 name: 'Etcd: Etcd peer {#ETCD.PEER}: Receive failures failures' type: DEPENDENT key: 'etcd.received.fail.rate[{#ETCD.PEER}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of receive failures from the peer with ID {#ETCD.PEER}.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'etcd_network_peer_received_failures_total{To="{#ETCD.PEER}"}' - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: network - tag: component value: peers - tag: peer value: '{#ETCD.PEER}' - uuid: 5756f1a16e5c42b79f6d6225c5382599 name: 'Etcd: Etcd peer {#ETCD.PEER}: Send failures' type: DEPENDENT key: 'etcd.sent.fail.rate[{#ETCD.PEER}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of send failures from peer with ID {#ETCD.PEER}.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'etcd_network_peer_sent_failures_total{To="{#ETCD.PEER}"}' - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: network - tag: component value: peers - tag: peer value: '{#ETCD.PEER}' master_item: key: etcd.get_metrics lld_macro_paths: - lld_macro: '{#ETCD.PEER}' path: $.labels.To preprocessing: - type: PROMETHEUS_TO_JSON parameters: - etcd_network_peer_sent_bytes_total tags: - tag: class value: application - tag: target value: etcd macros: - macro: '{$ETCD.GRPC.ERRORS.MAX.WARN}' value: '1' description: 'Maximum number of gRPC requests failures.' - macro: '{$ETCD.GRPC_CODE.MATCHES}' value: '.*' description: 'Filter of discoverable gRPC codes https://github.com/grpc/grpc/blob/master/doc/statuscodes.md.' - macro: '{$ETCD.GRPC_CODE.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'Filter to exclude discovered gRPC codes https://github.com/grpc/grpc/blob/master/doc/statuscodes.md.' - macro: '{$ETCD.GRPC_CODE.TRIGGER.MATCHES}' value: Aborted|Unavailable description: 'Filter of discoverable gRPC codes which will create triggers.' - macro: '{$ETCD.HTTP.FAIL.MAX.WARN}' value: '2' description: 'Maximum number of HTTP requests failures.' - macro: '{$ETCD.LEADER.CHANGES.MAX.WARN}' value: '5' description: 'Maximum number of leader changes.' - macro: '{$ETCD.OPEN.FDS.MAX.WARN}' value: '90' description: 'Maximum percentage of used file descriptors.' - macro: '{$ETCD.PASSWORD}' - macro: '{$ETCD.PORT}' value: '2379' description: 'The port of Etcd API endpoint.' - macro: '{$ETCD.PROPOSAL.FAIL.MAX.WARN}' value: '2' description: 'Maximum number of proposal failures.' - macro: '{$ETCD.PROPOSAL.PENDING.MAX.WARN}' value: '5' description: 'Maximum number of proposals in queue.' - macro: '{$ETCD.SCHEME}' value: http description: 'Request scheme which may be http or https.' - macro: '{$ETCD.USER}' valuemaps: - uuid: f25e21a70baa4e009bdbcb44acb1a22e name: 'Etcd healthcheck' mappings: - value: '0' newvalue: Failed - value: '1' newvalue: Ok - uuid: 7bcaf8a520e24613a96d49e63a91a55b name: 'Etcd leader' mappings: - value: '0' newvalue: 'No' - value: '1' newvalue: 'Yes' - uuid: 1735a8d251b24c3fbab32e766064536b name: 'Service state' mappings: - value: '0' newvalue: Down - value: '1' newvalue: Up triggers: - uuid: c45583928d204c04ad8884115e1e35c5 expression: 'min(/Etcd by HTTP/etcd.open.fds,5m)/last(/Etcd by HTTP/etcd.max.fds)*100>{$ETCD.OPEN.FDS.MAX.WARN}' name: 'Etcd: Current number of open files is too high' event_name: 'Etcd: Current number of open files is too high (over {$ETCD.OPEN.FDS.MAX.WARN}% for 5m)' priority: WARNING description: | Heavy file descriptor usage (i.e., near the process's file descriptor limit) indicates a potential file descriptor exhaustion issue. If the file descriptors are exhausted, etcd may panic because it cannot create new WAL files. tags: - tag: scope value: capacity graphs: - uuid: 18baccd03c0f4814a42d32b51334787d name: 'Etcd: File descriptors' graph_items: - drawtype: GRADIENT_LINE color: 1A7C11 item: host: 'Etcd by HTTP' key: etcd.open.fds - sortorder: '1' drawtype: BOLD_LINE color: 2774A4 item: host: 'Etcd by HTTP' key: etcd.max.fds - uuid: eefd07cf30d84cc4b84f802468363200 name: 'Etcd: gRPC client traffic' graph_items: - color: 1A7C11 item: host: 'Etcd by HTTP' key: etcd.network.grpc.received.rate - sortorder: '1' color: 2774A4 item: host: 'Etcd by HTTP' key: etcd.network.grpc.sent.rate - uuid: c53ee0dba42d4a1f8afedbe0f6e42785 name: 'Etcd: gRPC requests rate' graph_items: - color: 1A7C11 item: host: 'Etcd by HTTP' key: etcd.grpc.received.rate - sortorder: '1' color: 2774A4 item: host: 'Etcd by HTTP' key: etcd.grpc.sent.rate - sortorder: '2' color: F63100 item: host: 'Etcd by HTTP' key: etcd.grpc.started.rate - uuid: 520ff92815d84e0f84e9296d249c04ae name: 'Etcd: HTTP requests rate' graph_items: - color: 1A7C11 item: host: 'Etcd by HTTP' key: etcd.http.requests.4xx.rate - sortorder: '1' color: 2774A4 item: host: 'Etcd by HTTP' key: etcd.http.requests.5xx.rate - sortorder: '2' color: F63100 item: host: 'Etcd by HTTP' key: etcd.http.requests.rate - uuid: 90af5b2f75b7402693bad7a8f371ab8e name: 'Etcd: Memory usage' graph_items: - drawtype: GRADIENT_LINE color: 1A7C11 item: host: 'Etcd by HTTP' key: etcd.res.bytes - sortorder: '1' drawtype: GRADIENT_LINE color: 2774A4 item: host: 'Etcd by HTTP' key: etcd.virtual.bytes - uuid: 59cd15292ad04ebd902a7d3080b53838 name: 'Etcd: Proposals rate' graph_items: - color: 1A7C11 item: host: 'Etcd by HTTP' key: etcd.proposals.failed.rate - sortorder: '1' color: 2774A4 item: host: 'Etcd by HTTP' key: etcd.proposals.committed.rate - sortorder: '2' color: F63100 item: host: 'Etcd by HTTP' key: etcd.proposals.applied.rate - sortorder: '3' drawtype: BOLD_LINE color: A54F10 yaxisside: RIGHT item: host: 'Etcd by HTTP' key: etcd.proposals.pending - uuid: b374fab55bcc452e9279214ddb2c8024 name: 'Etcd: Read/Write rate' graph_items: - color: 1A7C11 item: host: 'Etcd by HTTP' key: etcd.reads.rate - sortorder: '1' color: 2774A4 item: host: 'Etcd by HTTP' key: etcd.writes.rate