zabbix_export: version: '6.2' date: '2022-06-07T19:33:18Z' template_groups: - uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6 name: Templates/Applications templates: - uuid: 52b2664578884d9eba62e47375c99f8e template: 'Elasticsearch Cluster by HTTP' name: 'Elasticsearch Cluster by HTTP' description: | The template to monitor Elasticsearch by Zabbix that work without any external scripts. It works with both standalone and cluster instances. The metrics are collected in one pass remotely using an HTTP agent. They are getting values from REST API _cluster/health, _cluster/stats, _nodes/stats requests. You can set {$ELASTICSEARCH.USERNAME} and {$ELASTICSEARCH.PASSWORD} macros in the template for using on the host level. If you use an atypical location ES API, don't forget to change the macros {$ELASTICSEARCH.SCHEME},{$ELASTICSEARCH.PORT}. You can discuss this template or leave feedback on our forum https://www.zabbix.com/forum/zabbix-suggestions-and-feedback/399473-discussion-thread-for-official-zabbix-template-for-elasticsearch Template tooling version used: 0.41 groups: - name: Templates/Applications items: - uuid: f3531c005c7f477b9916b4bf1ad273c2 name: 'ES: Delayed unassigned shards' type: DEPENDENT key: es.cluster.delayed_unassigned_shards delay: '0' history: 7d description: 'The number of shards whose allocation has been delayed by the timeout settings.' preprocessing: - type: JSONPATH parameters: - $.delayed_unassigned_shards master_item: key: es.cluster.get_health tags: - tag: component value: shards - uuid: 7f587fd270be4eb68d81ae1de2a3ca1f name: 'ES: Get cluster health' type: HTTP_AGENT key: es.cluster.get_health history: 0h trends: '0' value_type: TEXT authtype: BASIC username: '{$ELASTICSEARCH.USERNAME}' password: '{$ELASTICSEARCH.PASSWORD}' description: 'Returns the health status of a cluster.' timeout: 15s url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_cluster/health?timeout=5s' tags: - tag: component value: raw - uuid: 7066a66f352e4d79ba4aec11c0c5c611 name: 'ES: Get cluster stats' type: HTTP_AGENT key: es.cluster.get_stats history: 0h trends: '0' value_type: TEXT authtype: BASIC username: '{$ELASTICSEARCH.USERNAME}' password: '{$ELASTICSEARCH.PASSWORD}' description: 'Returns cluster statistics.' timeout: 15s url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_cluster/stats' tags: - tag: component value: raw - uuid: 500a763b9bfd4044b2d3bc95d3a0586c name: 'ES: Inactive shards percentage' type: DEPENDENT key: es.cluster.inactive_shards_percent_as_number delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The ratio of inactive shards in the cluster expressed as a percentage.' preprocessing: - type: JSONPATH parameters: - $.active_shards_percent_as_number - type: JAVASCRIPT parameters: - 'return (100 - value)' master_item: key: es.cluster.get_health tags: - tag: component value: shards - uuid: e0b684d5992a496981cf6fb9bb85be62 name: 'ES: Number of initializing shards' type: DEPENDENT key: es.cluster.initializing_shards delay: '0' history: 7d description: 'The number of shards that are under initialization.' preprocessing: - type: JSONPATH parameters: - $.initializing_shards master_item: key: es.cluster.get_health tags: - tag: component value: shards triggers: - uuid: ee59684bb2044540ad0306a5b6deb408 expression: 'min(/Elasticsearch Cluster by HTTP/es.cluster.initializing_shards,10m)>0' name: 'ES: Cluster has the initializing shards' priority: AVERAGE description: 'The cluster has the initializing shards longer than 10 minutes.' tags: - tag: scope value: notice - uuid: 7d3c87e2fcae49438a14380f7d5faa81 name: 'ES: Number of data nodes' type: DEPENDENT key: es.cluster.number_of_data_nodes delay: '0' history: 7d description: 'The number of nodes that are dedicated to data nodes.' preprocessing: - type: JSONPATH parameters: - $.number_of_data_nodes - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_health tags: - tag: component value: nodes - uuid: 4ec7496b441643f39df3e25c0225e6ec name: 'ES: Number of nodes' type: DEPENDENT key: es.cluster.number_of_nodes delay: '0' history: 7d description: 'The number of nodes within the cluster.' preprocessing: - type: JSONPATH parameters: - $.number_of_nodes - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_health tags: - tag: component value: nodes triggers: - uuid: 5be4cb40e72442aca18ea014adbead23 expression: 'change(/Elasticsearch Cluster by HTTP/es.cluster.number_of_nodes)<0' name: 'ES: The number of nodes within the cluster has decreased' priority: INFO manual_close: 'YES' tags: - tag: scope value: notice - uuid: 2fba2b29425b4ff9a4008db9bde49d7f expression: 'change(/Elasticsearch Cluster by HTTP/es.cluster.number_of_nodes)>0' name: 'ES: The number of nodes within the cluster has increased' priority: INFO manual_close: 'YES' tags: - tag: scope value: notice - uuid: 2c84eea7f4a642f8892cb7f50febb562 name: 'ES: Number of pending tasks' type: DEPENDENT key: es.cluster.number_of_pending_tasks delay: '0' history: 7d description: 'The number of cluster-level changes that have not yet been executed.' preprocessing: - type: JSONPATH parameters: - $.number_of_pending_tasks master_item: key: es.cluster.get_health tags: - tag: component value: tasks - uuid: 981818f43a3c4b36b36e4b3c4e3468e9 name: 'ES: Number of relocating shards' type: DEPENDENT key: es.cluster.relocating_shards delay: '0' history: 7d description: 'The number of shards that are under relocation.' preprocessing: - type: JSONPATH parameters: - $.relocating_shards master_item: key: es.cluster.get_health tags: - tag: component value: shards - uuid: b9568a6370dc40efae45ac1e0b719dd7 name: 'ES: Cluster health status' type: DEPENDENT key: es.cluster.status delay: '0' history: 7d description: | Health status of the cluster, based on the state of its primary and replica shards. Statuses are: green All shards are assigned. yellow All primary shards are assigned, but one or more replica shards are unassigned. If a node in the cluster fails, some data could be unavailable until that node is repaired. red One or more primary shards are unassigned, so some data is unavailable. This can occur briefly during cluster startup as primary shards are assigned. valuemap: name: 'ES cluster state' preprocessing: - type: JSONPATH parameters: - $.status - type: JAVASCRIPT parameters: - | var state = ['green', 'yellow', 'red']; return state.indexOf(value.trim()) === -1 ? 255 : state.indexOf(value.trim()); - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_health tags: - tag: component value: health triggers: - uuid: f3bb7e96f6074063bad76521e1dce24f expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=2' name: 'ES: Health is RED' priority: HIGH description: | One or more primary shards are unassigned, so some data is unavailable. This can occur briefly during cluster startup as primary shards are assigned. tags: - tag: scope value: availability - uuid: 33a6ef13f7b240768841919482709411 expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=255' name: 'ES: Health is UNKNOWN' priority: HIGH description: 'The health status of the cluster is unknown or cannot be obtained.' tags: - tag: scope value: availability - uuid: 99f36c2aa5d64248b96d1fc97c3b3065 expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=1' name: 'ES: Health is YELLOW' priority: AVERAGE description: | All primary shards are assigned, but one or more replica shards are unassigned. If a node in the cluster fails, some data could be unavailable until that node is repaired. tags: - tag: scope value: availability - uuid: 2295e2ba3d4949feb3519ac85ba6ff86 name: 'ES: Task max waiting in queue' type: DEPENDENT key: es.cluster.task_max_waiting_in_queue delay: '0' history: 7d value_type: FLOAT units: s description: 'The time expressed in seconds since the earliest initiated task is waiting for being performed.' preprocessing: - type: JSONPATH parameters: - $.task_max_waiting_in_queue_millis - type: MULTIPLIER parameters: - '0.001' master_item: key: es.cluster.get_health tags: - tag: component value: tasks - uuid: 85e82cba1e5c479caede2a94c58239be name: 'ES: Number of unassigned shards' type: DEPENDENT key: es.cluster.unassigned_shards delay: '0' history: 7d description: 'The number of shards that are not allocated.' preprocessing: - type: JSONPATH parameters: - $.unassigned_shards master_item: key: es.cluster.get_health tags: - tag: component value: shards triggers: - uuid: 35bdd54f27a64009b96a06a12508f99c expression: 'min(/Elasticsearch Cluster by HTTP/es.cluster.unassigned_shards,10m)>0' name: 'ES: Cluster has the unassigned shards' priority: AVERAGE description: 'The cluster has the unassigned shards longer than 10 minutes.' tags: - tag: scope value: notice - uuid: db2f3161eaff4eb0b25d4191b423c733 name: 'ES: Indices with shards assigned to nodes' type: DEPENDENT key: es.indices.count delay: '0' history: 7d description: 'The total number of indices with shards assigned to the selected nodes.' preprocessing: - type: JSONPATH parameters: - $.indices.count - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_stats tags: - tag: component value: indices - uuid: 25cb1e0203334efd96d3e626f81b4670 name: 'ES: Number of non-deleted documents' type: DEPENDENT key: es.indices.docs.count delay: '0' history: 7d description: | The total number of non-deleted documents across all primary shards assigned to the selected nodes. This number is based on the documents in Lucene segments and may include the documents from nested fields. preprocessing: - type: JSONPATH parameters: - $.indices.docs.count - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_stats tags: - tag: component value: documents - uuid: 54f51a653e014290aa3f91deaca44e47 name: 'ES: Nodes with the data role' type: DEPENDENT key: es.nodes.count.data delay: '0' history: 7d description: 'The number of selected nodes with the data role.' preprocessing: - type: JSONPATH parameters: - $.nodes.count.data - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_stats tags: - tag: component value: nodes - uuid: 613ab3469f234e278af99d61e57b46bd name: 'ES: Nodes with the ingest role' type: DEPENDENT key: es.nodes.count.ingest delay: '0' history: 7d description: 'The number of selected nodes with the ingest role.' preprocessing: - type: JSONPATH parameters: - $.nodes.count.ingest - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_stats tags: - tag: component value: nodes - uuid: e7b36f37b86845339a306dacf874164a name: 'ES: Nodes with the master role' type: DEPENDENT key: es.nodes.count.master delay: '0' history: 7d description: 'The number of selected nodes with the master role.' preprocessing: - type: JSONPATH parameters: - $.nodes.count.master - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_stats tags: - tag: component value: nodes triggers: - uuid: 8011883baef04aa0bed343d1bb0df288 expression: 'last(/Elasticsearch Cluster by HTTP/es.nodes.count.master)=2' name: 'ES: Cluster has only two master nodes' priority: DISASTER description: 'The cluster has only two nodes with a master role and will be unavailable if one of them breaks.' tags: - tag: scope value: notice - uuid: c3ea99e5897b4663a3239d0edd66f1f0 name: 'ES: Total available size to JVM in all file stores' type: DEPENDENT key: es.nodes.fs.available_in_bytes delay: '0' history: 7d units: B description: | The total number of bytes available to JVM in the file stores across all selected nodes. Depending on OS or process-level restrictions, this number may be less than nodes.fs.free_in_byes. This is the actual amount of free disk space the selected Elasticsearch nodes can use. preprocessing: - type: JSONPATH parameters: - $.nodes.fs.available_in_bytes - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_stats tags: - tag: component value: storage - uuid: b8c3c5d8866d4a6b9c6847b5c8da0631 name: 'ES: Total size of all file stores' type: DEPENDENT key: es.nodes.fs.total_in_bytes delay: '0' history: 7d units: B description: 'The total size in bytes of all file stores across all selected nodes.' preprocessing: - type: JSONPATH parameters: - $.nodes.fs.total_in_bytes - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.cluster.get_stats tags: - tag: component value: storage - uuid: 66c22b8b2b8b40fda7ac6f0ae472befd name: 'ES: Get nodes stats' type: HTTP_AGENT key: es.nodes.get_stats history: 0h trends: '0' value_type: TEXT authtype: BASIC username: '{$ELASTICSEARCH.USERNAME}' password: '{$ELASTICSEARCH.PASSWORD}' description: 'Returns cluster nodes statistics.' timeout: 30s url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_nodes/stats' tags: - tag: component value: raw - uuid: 2dcf54f21cbd4af9a7931e3a2522685c name: 'ES: Cluster uptime' type: DEPENDENT key: es.nodes.jvm.max_uptime delay: '0' history: 7d value_type: FLOAT units: s description: 'Uptime duration in seconds since JVM has last started.' preprocessing: - type: JSONPATH parameters: - $.nodes.jvm.max_uptime_in_millis - type: MULTIPLIER parameters: - '0.001' master_item: key: es.cluster.get_stats tags: - tag: component value: system triggers: - uuid: d28aa18c85cb4f48a9e7b8ba55d65400 expression: 'last(/Elasticsearch Cluster by HTTP/es.nodes.jvm.max_uptime)<10m' name: 'ES: Cluster has been restarted' event_name: 'ES: Cluster has been restarted (uptime < 10m)' priority: INFO description: 'Uptime is less than 10 minutes.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: 671888d47c724e54aca78fbe1b3ecaed name: 'ES: Service response time' type: SIMPLE key: 'net.tcp.service.perf["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"]' history: 7d value_type: FLOAT units: s description: 'Checks performance of the TCP service.' tags: - tag: component value: network triggers: - uuid: 2a3f3b96e5dd47de998ccc17f109e149 expression: 'min(/Elasticsearch Cluster by HTTP/net.tcp.service.perf["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"],5m)>{$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN}' name: 'ES: Service response time is too high' event_name: 'ES: Service response time is too high (over {$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN} for 5m)' priority: WARNING description: 'The performance of the TCP service is very low.' manual_close: 'YES' dependencies: - name: 'ES: Service is down' expression: 'last(/Elasticsearch Cluster by HTTP/net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"])=0' tags: - tag: scope value: performance - uuid: d0d38ce55f844a51a0f2131c86bec1ae name: 'ES: Service status' type: SIMPLE key: 'net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"]' history: 7d description: 'Checks if the service is running and accepting TCP connections.' valuemap: name: 'Service state' preprocessing: - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m tags: - tag: component value: health - tag: component value: network triggers: - uuid: b4d76f68ce94492f96bbfbb778d1f144 expression: 'last(/Elasticsearch Cluster by HTTP/net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"])=0' name: 'ES: Service is down' priority: AVERAGE description: 'The service is unavailable or does not accept TCP connections.' manual_close: 'YES' tags: - tag: scope value: availability discovery_rules: - uuid: 5105173f24d941b2969fe3d190d22e82 name: 'Cluster nodes discovery' type: HTTP_AGENT key: es.nodes.discovery delay: 1h authtype: BASIC username: '{$ELASTICSEARCH.USERNAME}' password: '{$ELASTICSEARCH.PASSWORD}' description: 'Discovery ES cluster nodes.' item_prototypes: - uuid: b4e87d039e9d4feeb03e0e33f14b2c82 name: 'ES {#ES.NODE}: Total available size' type: DEPENDENT key: 'es.node.fs.total.available_in_bytes[{#ES.NODE}]' delay: '0' history: 7d units: B description: | The total number of bytes available to this Java virtual machine on all file stores. Depending on OS or process level restrictions, this might appear less than fs.total.free_in_bytes. This is the actual amount of free disk space the Elasticsearch node can utilize. preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].fs.total.available_in_bytes.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: storage - tag: node value: '{#ES.NODE}' - uuid: 4f315fdf62884b0284bf04f1a85aeb98 name: 'ES {#ES.NODE}: Total size' type: DEPENDENT key: 'es.node.fs.total.total_in_bytes[{#ES.NODE}]' delay: '0' history: 7d units: B description: 'Total size (in bytes) of all file stores.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].fs.total.total_in_bytes.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: es.nodes.get_stats tags: - tag: component value: storage - tag: node value: '{#ES.NODE}' - uuid: 2ba1fc7e2dad4d0ab1807221fb1e4fca name: 'ES {#ES.NODE}: Number of open HTTP connections' type: DEPENDENT key: 'es.node.http.current_open[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of currently open HTTP connections for the node.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].http.current_open.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: connections - tag: node value: '{#ES.NODE}' - uuid: fcf163d5db9b455fa38823e8ad16e578 name: 'ES {#ES.NODE}: Rate of HTTP connections opened' type: DEPENDENT key: 'es.node.http.opened.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of HTTP connections opened for the node per second.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].http.total_opened.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: connections - tag: node value: '{#ES.NODE}' - uuid: c530311329e348cca74c38fa2260236b name: 'ES {#ES.NODE}: Flush latency' type: CALCULATED key: 'es.node.indices.flush.latency[{#ES.NODE}]' history: 7d value_type: FLOAT units: ms params: 'change(//es.node.indices.flush.total_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.flush.total[{#ES.NODE}]) + (change(//es.node.indices.flush.total[{#ES.NODE}]) = 0) )' description: 'The average flush latency calculated from the available flush.total and flush.total_time_in_millis metrics.' tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: 6c2ea291374b4de1ab06ddeccba635b1 expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.flush.latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}' name: 'ES {#ES.NODE}: Flush latency is too high' event_name: 'ES {#ES.NODE}: Flush latency is too high (over {$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}ms for 5m)' priority: WARNING description: | If you see this metric increasing steadily, it may indicate a problem with slow disks; this problem may escalate and eventually prevent you from being able to add new information to your index. tags: - tag: scope value: notice - uuid: 8e270dfff9c84d2a96a134dd6d86533b name: 'ES {#ES.NODE}: Total number of index flushes to disk' type: DEPENDENT key: 'es.node.indices.flush.total[{#ES.NODE}]' delay: '0' history: 7d description: 'The total number of flush operations.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.flush.total.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: 7e18149dcaee47748e4073f4ce814c03 name: 'ES {#ES.NODE}: Total time spent on flushing indices to disk' type: DEPENDENT key: 'es.node.indices.flush.total_time_in_millis[{#ES.NODE}]' delay: '0' history: 7d units: ms description: 'Total time in milliseconds spent performing flush operations.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.flush.total_time_in_millis.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: e91cc33c088a4f56a9176fd6a09f2411 name: 'ES {#ES.NODE}: Current indexing operations' type: DEPENDENT key: 'es.node.indices.indexing.index_current[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of indexing operations currently running.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.indexing.index_current.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: 23f54c51c30a4dbdbef48611a7907db6 name: 'ES {#ES.NODE}: Indexing latency' type: CALCULATED key: 'es.node.indices.indexing.index_latency[{#ES.NODE}]' history: 7d value_type: FLOAT units: ms params: 'change(//es.node.indices.indexing.index_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.indexing.index_total[{#ES.NODE}]) + (change(//es.node.indices.indexing.index_total[{#ES.NODE}]) = 0) )' description: 'The average indexing latency calculated from the available index_total and index_time_in_millis metrics.' tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: 2755b3f8811a45fba7c48347707ceaf8 expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.indexing.index_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}' name: 'ES {#ES.NODE}: Indexing latency is too high' event_name: 'ES {#ES.NODE}: Indexing latency is too high (over {$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}ms for 5m)' priority: WARNING description: | If the latency is increasing, it may indicate that you are indexing too many documents at the same time (Elasticsearch's documentation recommends starting with a bulk indexing size of 5 to 15 megabytes and increasing slowly from there). tags: - tag: scope value: notice - uuid: f471dad45ff149b09a479963cb616fc2 name: 'ES {#ES.NODE}: Total time spent performing indexing' type: DEPENDENT key: 'es.node.indices.indexing.index_time_in_millis[{#ES.NODE}]' delay: '0' history: 7d units: ms description: 'Total time in milliseconds spent performing indexing operations.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.indexing.index_time_in_millis.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: ad36b8495eca49c48e7d8a7877a325c2 name: 'ES {#ES.NODE}: Total number of indexing' type: DEPENDENT key: 'es.node.indices.indexing.index_total[{#ES.NODE}]' delay: '0' history: 7d description: 'The total number of indexing operations.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.indexing.index_total.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: bb0cb2196b14483f8807a855f2f531a7 name: 'ES {#ES.NODE}: Time spent throttling operations' type: DEPENDENT key: 'es.node.indices.indexing.throttle_time[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: s description: 'Time in seconds spent throttling operations for the last measuring span.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.indexing.throttle_time_in_millis.first()' - type: MULTIPLIER parameters: - '0.001' - type: SIMPLE_CHANGE parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: 5f3b7dca802343cd905d54e66ac0e113 name: 'ES {#ES.NODE}: Time spent throttling merge operations' type: DEPENDENT key: 'es.node.indices.merges.total_throttled_time[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: s description: 'Time in seconds spent throttling merge operations for the last measuring span.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.merges.total_throttled_time_in_millis.first()' - type: MULTIPLIER parameters: - '0.001' - type: SIMPLE_CHANGE parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: 9c82da45a63947dd91a4e19e6f2d121d name: 'ES {#ES.NODE}: Time spent throttling recovery operations' type: DEPENDENT key: 'es.node.indices.recovery.throttle_time[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: s description: 'Time in seconds spent throttling recovery operations for the last measuring span.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.recovery.throttle_time_in_millis.first()' - type: MULTIPLIER parameters: - '0.001' - type: SIMPLE_CHANGE parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: e27361fdce5a4635854960066ac050ca name: 'ES {#ES.NODE}: Rate of index refreshes' type: DEPENDENT key: 'es.node.indices.refresh.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of refresh operations per second.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.refresh.total.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: fd20bbc5012d4c5693710b321e252193 name: 'ES {#ES.NODE}: Time spent performing refresh' type: DEPENDENT key: 'es.node.indices.refresh.time[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: s description: 'Time in seconds spent performing refresh operations for the last measuring span.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.refresh.total_time_in_millis.first()' - type: MULTIPLIER parameters: - '0.001' - type: SIMPLE_CHANGE parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: operations - tag: node value: '{#ES.NODE}' - uuid: 6d3b074aecb44a08a3573aba0ff006f9 name: 'ES {#ES.NODE}: Rate of fetch' type: DEPENDENT key: 'es.node.indices.search.fetch.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of fetch operations per second.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.fetch_total.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: fetches - tag: node value: '{#ES.NODE}' - uuid: 36b2ffa3ed9f4c9781ccded273c395d7 name: 'ES {#ES.NODE}: Current fetch operations' type: DEPENDENT key: 'es.node.indices.search.fetch_current[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of fetch operations currently running.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.fetch_current.first()' master_item: key: es.nodes.get_stats tags: - tag: component value: fetches - tag: node value: '{#ES.NODE}' - uuid: 79b5fd5e04e249da9f73fbe68915fdb1 name: 'ES {#ES.NODE}: Fetch latency' type: CALCULATED key: 'es.node.indices.search.fetch_latency[{#ES.NODE}]' history: 7d value_type: FLOAT units: ms params: 'change(//es.node.indices.search.fetch_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.search.fetch_total[{#ES.NODE}]) + (change(//es.node.indices.search.fetch_total[{#ES.NODE}]) = 0) )' description: 'The average fetch latency calculated by sampling the total number of fetches and the total elapsed time at regular intervals.' tags: - tag: component value: fetches - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: 6edd6851f31842659a57e29c444a9b32 expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.search.fetch_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}' name: 'ES {#ES.NODE}: Fetch latency is too high' event_name: 'ES {#ES.NODE}: Fetch latency is too high (over {$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}ms for 5m)' priority: WARNING description: | The fetch phase should typically take much less time than the query phase. If you notice this metric consistently increasing, this could indicate a problem with slow disks, enriching of documents (highlighting the relevant text in search results, etc.), or requesting too many results. tags: - tag: scope value: notice - uuid: a10e7dca72c8411a9b7fdcbeb676017e name: 'ES {#ES.NODE}: Time spent performing fetch' type: DEPENDENT key: 'es.node.indices.search.fetch_time[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: s description: 'Time in seconds spent performing fetch operations for the last measuring span.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.fetch_time_in_millis.first()' - type: MULTIPLIER parameters: - '0.001' - type: SIMPLE_CHANGE parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: fetches - tag: node value: '{#ES.NODE}' - uuid: f35e3e9773394632b422dbc4e4442171 name: 'ES {#ES.NODE}: Total time spent performing fetch' type: DEPENDENT key: 'es.node.indices.search.fetch_time_in_millis[{#ES.NODE}]' delay: '0' history: 7d units: ms description: 'Time in milliseconds spent performing fetch operations.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.fetch_time_in_millis.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: fetches - tag: node value: '{#ES.NODE}' - uuid: b3ac68f3531f478b9ad28fb1988df257 name: 'ES {#ES.NODE}: Total number of fetch' type: DEPENDENT key: 'es.node.indices.search.fetch_total[{#ES.NODE}]' delay: '0' history: 7d description: 'The total number of fetch operations.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.fetch_total.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: fetches - tag: node value: '{#ES.NODE}' - uuid: d347df9c9eee4aa89ccfb9147143b5d5 name: 'ES {#ES.NODE}: Rate of queries' type: DEPENDENT key: 'es.node.indices.search.query.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of query operations per second.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.query_total.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: queries - tag: node value: '{#ES.NODE}' - uuid: 20fb738abf7a4aa1bf3ccb84790a26c9 name: 'ES {#ES.NODE}: Current query operations' type: DEPENDENT key: 'es.node.indices.search.query_current[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of query operations currently running.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.query_current.first()' master_item: key: es.nodes.get_stats tags: - tag: component value: queries - tag: node value: '{#ES.NODE}' - uuid: fadb6fe530ab4b1296d4c17728667b12 name: 'ES {#ES.NODE}: Query latency' type: CALCULATED key: 'es.node.indices.search.query_latency[{#ES.NODE}]' history: 7d value_type: FLOAT units: ms params: | change(//es.node.indices.search.query_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.search.query_total[{#ES.NODE}]) + (change(//es.node.indices.search.query_total[{#ES.NODE}]) = 0) ) description: 'The average query latency calculated by sampling the total number of queries and the total elapsed time at regular intervals.' tags: - tag: component value: queries - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: 1892000290c843e887d128bf9f97c869 expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.search.query_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}' name: 'ES {#ES.NODE}: Query latency is too high' event_name: 'ES {#ES.NODE}: Query latency is too high (over {$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}ms for 5m)' priority: WARNING description: 'If latency exceeds a threshold, look for potential resource bottlenecks, or investigate whether you need to optimize your queries.' tags: - tag: scope value: notice - uuid: ce807b641b0b4501b6a8e253d8403ce4 name: 'ES {#ES.NODE}: Time spent performing query' type: DEPENDENT key: 'es.node.indices.search.query_time[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: s description: 'Time in seconds spent performing query operations for the last measuring span.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.query_time_in_millis.first()' - type: MULTIPLIER parameters: - '0.001' - type: SIMPLE_CHANGE parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: queries - tag: node value: '{#ES.NODE}' - uuid: 7afc767463c64bbb9290975a8cef3cec name: 'ES {#ES.NODE}: Total time spent performing query' type: DEPENDENT key: 'es.node.indices.search.query_time_in_millis[{#ES.NODE}]' delay: '0' history: 7d units: ms description: 'Time in milliseconds spent performing query operations.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.query_time_in_millis.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: queries - tag: node value: '{#ES.NODE}' - uuid: 7279b682fa3e4661a1600d6da25e0fc7 name: 'ES {#ES.NODE}: Total number of query' type: DEPENDENT key: 'es.node.indices.search.query_total[{#ES.NODE}]' delay: '0' history: 7d description: 'The total number of query operations.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].indices.search.query_total.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: queries - tag: node value: '{#ES.NODE}' - uuid: 269cc21be4c94ba58a52b9fca9590632 name: 'ES {#ES.NODE}: Amount of JVM heap committed' type: DEPENDENT key: 'es.node.jvm.mem.heap_committed_in_bytes[{#ES.NODE}]' delay: '0' history: 7d units: B description: 'The amount of memory, in bytes, available for use by the heap.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].jvm.mem.heap_committed_in_bytes.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: memory - tag: node value: '{#ES.NODE}' - uuid: 7f205e21644a4f629e4a419c42670158 name: 'ES {#ES.NODE}: Maximum JVM memory available for use' type: DEPENDENT key: 'es.node.jvm.mem.heap_max_in_bytes[{#ES.NODE}]' delay: '0' history: 7d units: B description: 'The maximum amount of memory, in bytes, available for use by the heap.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].jvm.mem.heap_max_in_bytes.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: es.nodes.get_stats tags: - tag: component value: memory - tag: node value: '{#ES.NODE}' - uuid: bb55c78c2aca49e49f6200e14c25dee8 name: 'ES {#ES.NODE}: Amount of JVM heap currently in use' type: DEPENDENT key: 'es.node.jvm.mem.heap_used_in_bytes[{#ES.NODE}]' delay: '0' history: 7d units: B description: 'The memory, in bytes, currently in use by the heap.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].jvm.mem.heap_used_in_bytes.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: memory - tag: node value: '{#ES.NODE}' - uuid: 71fca039cad847da9623aaeb722168f1 name: 'ES {#ES.NODE}: Percent of JVM heap currently in use' type: DEPENDENT key: 'es.node.jvm.mem.heap_used_percent[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The percentage of memory currently in use by the heap.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].jvm.mem.heap_used_percent.first()' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: es.nodes.get_stats tags: - tag: component value: memory - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: 9d199607ddda48eebe9f93cae6d6575d expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}' name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical' event_name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical (over {$ELASTICSEARCH.HEAP_USED.MAX.CRIT}% for 1h)' priority: HIGH description: | This indicates that the rate of garbage collection isn't keeping up with the rate of garbage creation. To address this problem, you can either increase your heap size (as long as it remains below the recommended guidelines stated above), or scale out the cluster by adding more nodes. tags: - tag: scope value: performance - uuid: bbba4a577a2c4328b2392fdeb1ff9bb4 expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.WARN}' name: 'ES {#ES.NODE}: Percent of JVM heap in use is high' event_name: 'ES {#ES.NODE}: Percent of JVM heap in use is high (over {$ELASTICSEARCH.HEAP_USED.MAX.WARN}% for 1h)' priority: WARNING description: | This indicates that the rate of garbage collection isn't keeping up with the rate of garbage creation. To address this problem, you can either increase your heap size (as long as it remains below the recommended guidelines stated above), or scale out the cluster by adding more nodes. dependencies: - name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical' expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}' tags: - tag: scope value: performance - uuid: 3c2f512ae6ff4221a7de4e5dbff2ed48 name: 'ES {#ES.NODE}: Node uptime' type: DEPENDENT key: 'es.node.jvm.uptime[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: s description: 'JVM uptime in seconds.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].jvm.uptime_in_millis.first()' - type: MULTIPLIER parameters: - '0.001' master_item: key: es.nodes.get_stats tags: - tag: component value: system - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: 580ae8aadf994fbcb34c9c8ad7cd5fad expression: 'last(/Elasticsearch Cluster by HTTP/es.node.jvm.uptime[{#ES.NODE}])<10m' name: 'ES {#ES.NODE}: has been restarted' event_name: 'ES {#ES.NODE}: has been restarted (uptime < 10m)' priority: INFO description: 'Uptime is less than 10 minutes.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: 14ea2732fbfc40ceaafadfff1830ac4d name: 'ES {#ES.NODE}: Refresh thread pool active threads' type: DEPENDENT key: 'es.node.thread_pool.refresh.active[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of active threads in the refresh thread pool.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.refresh.active.first()' master_item: key: es.nodes.get_stats tags: - tag: component value: refresh-thread-pool - tag: node value: '{#ES.NODE}' - uuid: b6496d2878914b06bf6301630b4609a8 name: 'ES {#ES.NODE}: Refresh thread pool executor tasks completed' type: DEPENDENT key: 'es.node.thread_pool.refresh.completed.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of tasks completed by the refresh thread pool executor.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.refresh.completed.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: refresh-thread-pool - tag: node value: '{#ES.NODE}' - uuid: 4b748eeb937e45308e58d699b713cf01 name: 'ES {#ES.NODE}: Refresh thread pool tasks in queue' type: DEPENDENT key: 'es.node.thread_pool.refresh.queue[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of tasks in queue for the refresh thread pool.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.refresh.queue.first()' master_item: key: es.nodes.get_stats tags: - tag: component value: refresh-thread-pool - tag: node value: '{#ES.NODE}' - uuid: 593514af005044ce8e6207a5616270f2 name: 'ES {#ES.NODE}: Refresh thread pool executor tasks rejected' type: DEPENDENT key: 'es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of tasks rejected by the refresh thread pool executor.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.refresh.rejected.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: refresh-thread-pool - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: b416941e8e0141c6af27c62f052ac860 expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}],5m)>0' name: 'ES {#ES.NODE}: Refresh thread pool executor has the rejected tasks' event_name: 'ES {#ES.NODE}: Refresh thread pool executor has the rejected tasks (for 5m)' priority: WARNING description: 'The number of tasks rejected by the refresh thread pool executor is over 0 for 5m.' tags: - tag: scope value: notice - uuid: 63875f40cf7c4f8f842562be2adfb7c7 name: 'ES {#ES.NODE}: Search thread pool active threads' type: DEPENDENT key: 'es.node.thread_pool.search.active[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of active threads in the search thread pool.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.search.active.first()' master_item: key: es.nodes.get_stats tags: - tag: component value: search-thread-pool - tag: node value: '{#ES.NODE}' - uuid: dc67ab311d4945aaae3347464785abb1 name: 'ES {#ES.NODE}: Search thread pool executor tasks completed' type: DEPENDENT key: 'es.node.thread_pool.search.completed.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of tasks completed by the search thread pool executor.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.search.completed.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: search-thread-pool - tag: node value: '{#ES.NODE}' - uuid: d11933b62131425d83ab09c6d5fd5e85 name: 'ES {#ES.NODE}: Search thread pool tasks in queue' type: DEPENDENT key: 'es.node.thread_pool.search.queue[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of tasks in queue for the search thread pool.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.search.queue.first()' master_item: key: es.nodes.get_stats tags: - tag: component value: search-thread-pool - tag: node value: '{#ES.NODE}' - uuid: 820ed330abc845919b1dada3cfa81387 name: 'ES {#ES.NODE}: Search thread pool executor tasks rejected' type: DEPENDENT key: 'es.node.thread_pool.search.rejected.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of tasks rejected by the search thread pool executor.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.search.rejected.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: search-thread-pool - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: 5cfd3f9924614a4283f1255752b672f0 expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.search.rejected.rate[{#ES.NODE}],5m)>0' name: 'ES {#ES.NODE}: Search thread pool executor has the rejected tasks' event_name: 'ES {#ES.NODE}: Search thread pool executor has the rejected tasks (for 5m)' priority: WARNING description: 'The number of tasks rejected by the search thread pool executor is over 0 for 5m.' tags: - tag: scope value: notice - uuid: 4b7dc34d78a64b24a8fd19af95e0f0bd name: 'ES {#ES.NODE}: Write thread pool active threads' type: DEPENDENT key: 'es.node.thread_pool.write.active[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of active threads in the write thread pool.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.write.active.first()' master_item: key: es.nodes.get_stats tags: - tag: component value: write-thread-pool - tag: node value: '{#ES.NODE}' - uuid: 5b5ac74702564bd490c7378adcf75c28 name: 'ES {#ES.NODE}: Write thread pool executor tasks completed' type: DEPENDENT key: 'es.node.thread_pool.write.completed.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of tasks completed by the write thread pool executor.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.write.completed.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: write-thread-pool - tag: node value: '{#ES.NODE}' - uuid: a0d8f8d896a546d1ade07c355992308d name: 'ES {#ES.NODE}: Write thread pool tasks in queue' type: DEPENDENT key: 'es.node.thread_pool.write.queue[{#ES.NODE}]' delay: '0' history: 7d description: 'The number of tasks in queue for the write thread pool.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.write.queue.first()' master_item: key: es.nodes.get_stats tags: - tag: component value: write-thread-pool - tag: node value: '{#ES.NODE}' - uuid: a21213815a30485a88b183e7b40a4e7e name: 'ES {#ES.NODE}: Write thread pool executor tasks rejected' type: DEPENDENT key: 'es.node.thread_pool.write.rejected.rate[{#ES.NODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of tasks rejected by the write thread pool executor.' preprocessing: - type: JSONPATH parameters: - '$..[?(@.name==''{#ES.NODE}'')].thread_pool.write.rejected.first()' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: es.nodes.get_stats tags: - tag: component value: write-thread-pool - tag: node value: '{#ES.NODE}' trigger_prototypes: - uuid: 1f5e30a34bb84cef831670974ec7c3e8 expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.write.rejected.rate[{#ES.NODE}],5m)>0' name: 'ES {#ES.NODE}: Write thread pool executor has the rejected tasks' event_name: 'ES {#ES.NODE}: Write thread pool executor has the rejected tasks (for 5m)' priority: WARNING description: 'The number of tasks rejected by the write thread pool executor is over 0 for 5m.' tags: - tag: scope value: notice graph_prototypes: - uuid: 0725d91f63b64346bbef2c20d2901e9b name: 'ES {#ES.NODE}: Latency' graph_items: - color: 1A7C11 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.indices.search.query_latency[{#ES.NODE}]' - sortorder: '1' color: 2774A4 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.indices.indexing.index_latency[{#ES.NODE}]' - sortorder: '2' color: F63100 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.indices.search.fetch_latency[{#ES.NODE}]' - sortorder: '3' color: A54F10 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.indices.flush.latency[{#ES.NODE}]' - uuid: 5196ed75f24d48f7ae55b564b8d925e5 name: 'ES {#ES.NODE}: Query load' graph_items: - color: 1A7C11 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.indices.search.fetch_current[{#ES.NODE}]' - sortorder: '1' color: 2774A4 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.indices.search.query_current[{#ES.NODE}]' - uuid: 6d0e4fde0eeb47b6af4c9f40311b384d name: 'ES {#ES.NODE}: Refresh thread pool' graph_items: - color: 1A7C11 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.refresh.active[{#ES.NODE}]' - sortorder: '1' color: 2774A4 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.refresh.queue[{#ES.NODE}]' - sortorder: '2' color: F63100 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.refresh.completed.rate[{#ES.NODE}]' - sortorder: '3' color: A54F10 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}]' - uuid: 9abe085b960d40bf8a99c88ec3564a8d name: 'ES {#ES.NODE}: Search thread pool' graph_items: - color: 1A7C11 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.search.active[{#ES.NODE}]' - sortorder: '1' color: 2774A4 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.search.queue[{#ES.NODE}]' - sortorder: '2' color: F63100 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.search.completed.rate[{#ES.NODE}]' - sortorder: '3' color: A54F10 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.search.rejected.rate[{#ES.NODE}]' - uuid: 689492a2614642a1a5d3279c42b29e85 name: 'ES {#ES.NODE}: Write thread pool' graph_items: - color: 1A7C11 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.write.active[{#ES.NODE}]' - sortorder: '1' color: 2774A4 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.write.queue[{#ES.NODE}]' - sortorder: '2' color: F63100 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.write.completed.rate[{#ES.NODE}]' - sortorder: '3' color: A54F10 item: host: 'Elasticsearch Cluster by HTTP' key: 'es.node.thread_pool.write.rejected.rate[{#ES.NODE}]' timeout: 15s url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_nodes/_all/nodes' lld_macro_paths: - lld_macro: '{#ES.NODE}' path: $..name.first() preprocessing: - type: JSONPATH parameters: - '$.nodes.[*]' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d tags: - tag: class value: software - tag: target value: elasticsearch macros: - macro: '{$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}' value: '100' description: 'Maximum of fetch latency in milliseconds for trigger expression.' - macro: '{$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}' value: '100' description: 'Maximum of flush latency in milliseconds for trigger expression.' - macro: '{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}' value: '95' description: 'The maximum percent in the use of JVM heap for critically trigger expression.' - macro: '{$ELASTICSEARCH.HEAP_USED.MAX.WARN}' value: '85' description: 'The maximum percent in the use of JVM heap for warning trigger expression.' - macro: '{$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}' value: '100' description: 'Maximum of indexing latency in milliseconds for trigger expression.' - macro: '{$ELASTICSEARCH.PASSWORD}' description: 'The password of the Elasticsearch.' - macro: '{$ELASTICSEARCH.PORT}' value: '9200' description: 'The port of the Elasticsearch host.' - macro: '{$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}' value: '100' description: 'Maximum of query latency in milliseconds for trigger expression.' - macro: '{$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN}' value: 10s description: 'The ES cluster maximum response time in seconds for trigger expression.' - macro: '{$ELASTICSEARCH.SCHEME}' value: http description: 'The scheme of the Elasticsearch (http/https).' - macro: '{$ELASTICSEARCH.USERNAME}' description: 'The username of the Elasticsearch.' valuemaps: - uuid: d651bdf75d0849d5ab2b0802fab76e22 name: 'ES cluster state' mappings: - value: '0' newvalue: green - value: '1' newvalue: yellow - value: '2' newvalue: red - value: '255' newvalue: unknown - uuid: 15d416d869894fdb959ca2cda2c5e37c name: 'Service state' mappings: - value: '0' newvalue: Down - value: '1' newvalue: Up triggers: - uuid: a2f33888d2774325b7955b633a7aae81 expression: '(last(/Elasticsearch Cluster by HTTP/es.nodes.fs.total_in_bytes)-last(/Elasticsearch Cluster by HTTP/es.nodes.fs.available_in_bytes))/(last(/Elasticsearch Cluster by HTTP/es.cluster.number_of_data_nodes)-1)>last(/Elasticsearch Cluster by HTTP/es.nodes.fs.available_in_bytes)' name: 'ES: Cluster does not have enough space for resharding' priority: HIGH description: 'There is not enough disk space for index resharding.' tags: - tag: scope value: capacity graphs: - uuid: 50f90b092fa24658b83b131fa7a3f2ce name: 'ES: Cluster health' graph_items: - color: 1A7C11 item: host: 'Elasticsearch Cluster by HTTP' key: es.cluster.inactive_shards_percent_as_number - sortorder: '1' color: 2774A4 item: host: 'Elasticsearch Cluster by HTTP' key: es.cluster.relocating_shards - sortorder: '2' color: F63100 item: host: 'Elasticsearch Cluster by HTTP' key: es.cluster.initializing_shards - sortorder: '3' color: A54F10 item: host: 'Elasticsearch Cluster by HTTP' key: es.cluster.unassigned_shards - sortorder: '4' color: FC6EA3 item: host: 'Elasticsearch Cluster by HTTP' key: es.cluster.delayed_unassigned_shards - sortorder: '5' color: 6C59DC item: host: 'Elasticsearch Cluster by HTTP' key: es.cluster.number_of_pending_tasks - sortorder: '6' color: AC8C14 item: host: 'Elasticsearch Cluster by HTTP' key: es.cluster.task_max_waiting_in_queue