diff options
author | Andrejs Sitals <andrejs.sitals@zabbix.com> | 2018-12-20 14:43:25 +0300 |
---|---|---|
committer | Andrejs Sitals <andrejs.sitals@zabbix.com> | 2018-12-20 14:43:25 +0300 |
commit | 6317107547879aef0edfee6e847fc6ad7835ec32 (patch) | |
tree | 64d3aa48b01566014b65b44cdd1588c61ba80336 | |
parent | 1ca000c88ea3b9f5e2a92f2e26e55f06e60e9e88 (diff) |
...G...... [ZBX-15225] improved handling of timeouts in "vfs.dir.size" and "vfs.dir.count" items on Windows agent
-rw-r--r-- | ChangeLog | 2 | ||||
-rw-r--r-- | include/sysinfo.h | 4 | ||||
-rw-r--r-- | src/libs/zbxsysinfo/common/dir.c | 59 | ||||
-rw-r--r-- | src/libs/zbxsysinfo/sysinfo.c | 73 | ||||
-rw-r--r-- | src/libs/zbxsysinfo/win32/diskspace.c | 6 |
5 files changed, 126 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog index fe7e42a607c..ba735cdf219 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ New features: ..F....PS. [ZBXNEXT-2315] implemented content matching in web monitoring headers (talbergs, vso) Bug fixes: +...G...... [ZBX-15225] improved handling of timeouts in "vfs.dir.size" and "vfs.dir.count" items on Windows agent (asitals) .......PS. [ZBX-15206] fixed deprecated net-snmp attribute (MVekslers) .......PS. [ZBX-15067] fixed configuration sync of interfaces without hosts (kalimulin) .......PS. [ZBX-15238] fixed updating nextcheck time in discovery rules to avoid overlaps between discovery executions (asitals) @@ -45,6 +46,7 @@ Changes for 4.0.4rc1 New features: Bug fixes: +...G...... [ZBX-15225] improved handling of timeouts in "vfs.dir.size" and "vfs.dir.count" items on Windows agent (asitals) -------------------------------------------------------------------------------- Changes for 4.0.3 diff --git a/include/sysinfo.h b/include/sysinfo.h index b1592168c14..60d704ce08e 100644 --- a/include/sysinfo.h +++ b/include/sysinfo.h @@ -285,7 +285,11 @@ int VM_VMEMORY_SIZE(AGENT_REQUEST *request, AGENT_RESULT *result); int SYSTEM_STAT(AGENT_REQUEST *request, AGENT_RESULT *result); #endif +#ifdef _WINDOWS +typedef int (*zbx_metric_func_t)(AGENT_REQUEST *request, AGENT_RESULT *result, HANDLE timeout_event); +#else typedef int (*zbx_metric_func_t)(AGENT_REQUEST *request, AGENT_RESULT *result); +#endif typedef struct { diff --git a/src/libs/zbxsysinfo/common/dir.c b/src/libs/zbxsysinfo/common/dir.c index 6b0112c0cab..c4476e20175 100644 --- a/src/libs/zbxsysinfo/common/dir.c +++ b/src/libs/zbxsysinfo/common/dir.c @@ -412,6 +412,43 @@ static void descriptors_vector_destroy(zbx_vector_ptr_t *descriptors) #define DW2UI64(h,l) ((zbx_uint64_t)h << 32 | l) #define FT2UT(ft) (time_t)(DW2UI64(ft.dwHighDateTime,ft.dwLowDateTime) / 10000000ULL - 11644473600ULL) +/****************************************************************************** + * * + * Function: has_timed_out * + * * + * Purpose: Checks if timeout has occurred. If it is, thread should * + * immediately stop whatever it is doing, clean up everything and * + * return SYSINFO_RET_FAIL. * + * * + * Parameters: timeout_event - [IN] handle of a timeout event that was passed * + * to the metric function * + * * + * Return value: TRUE, if timeout or error was detected, FALSE otherwise. * + * * + ******************************************************************************/ +static BOOL has_timed_out(HANDLE timeout_event) +{ + DWORD rc; + + rc = WaitForSingleObject(timeout_event, 0); + + switch (rc) + { + case WAIT_OBJECT_0: + return TRUE; + case WAIT_TIMEOUT: + return FALSE; + case WAIT_FAILED: + zabbix_log(LOG_LEVEL_CRIT, "WaitForSingleObject() returned WAIT_FAILED: %s", + strerror_from_system(GetLastError())); + return TRUE; + default: + zabbix_log(LOG_LEVEL_CRIT, "WaitForSingleObject() returned 0x%x", (unsigned int)rc); + THIS_SHOULD_NEVER_HAPPEN; + return TRUE; + } +} + static int get_file_info_by_handle(wchar_t *wpath, BY_HANDLE_FILE_INFORMATION *link_info, char **error) { HANDLE file_handle; @@ -480,7 +517,7 @@ static int link_processed(DWORD attrib, wchar_t *wpath, zbx_vector_ptr_t *descri return FAIL; } -static int vfs_dir_size(AGENT_REQUEST *request, AGENT_RESULT *result) +static int vfs_dir_size(AGENT_REQUEST *request, AGENT_RESULT *result, HANDLE timeout_event) { const char *__function_name = "vfs_dir_size"; char *dir = NULL; @@ -507,7 +544,7 @@ static int vfs_dir_size(AGENT_REQUEST *request, AGENT_RESULT *result) else goto err2; - while (0 < list.values_num) + while (0 < list.values_num && FALSE == has_timed_out(timeout_event)) { char *name, *error = NULL; wchar_t *wpath; @@ -613,7 +650,7 @@ static int vfs_dir_size(AGENT_REQUEST *request, AGENT_RESULT *result) zbx_free(name); } - while (0 != FindNextFile(handle, &data)); + while (0 != FindNextFile(handle, &data) && FALSE == has_timed_out(timeout_event)); if (0 == FindClose(handle)) { @@ -625,6 +662,11 @@ skip: zbx_free(item); } + if (TRUE == has_timed_out(timeout_event)) + { + goto err2; + } + SET_UI64_RESULT(result, size); ret = SYSINFO_RET_OK; err2: @@ -788,7 +830,7 @@ int VFS_DIR_SIZE(AGENT_REQUEST *request, AGENT_RESULT *result) * * *****************************************************************************/ #ifdef _WINDOWS -static int vfs_dir_count(const AGENT_REQUEST *request, AGENT_RESULT *result) +static int vfs_dir_count(const AGENT_REQUEST *request, AGENT_RESULT *result, HANDLE timeout_event) { const char *__function_name = "vfs_dir_count"; char *dir = NULL; @@ -817,7 +859,7 @@ static int vfs_dir_count(const AGENT_REQUEST *request, AGENT_RESULT *result) else goto err2; - while (0 < list.values_num) + while (0 < list.values_num && FALSE == has_timed_out(timeout_event)) { char *name; wchar_t *wpath; @@ -922,7 +964,7 @@ free_path: zbx_free(name); - } while (0 != FindNextFile(handle, &data)); + } while (0 != FindNextFile(handle, &data) && FALSE == has_timed_out(timeout_event)); if (0 == FindClose(handle)) { @@ -934,6 +976,11 @@ skip: zbx_free(item); } + if (TRUE == has_timed_out(timeout_event)) + { + goto err2; + } + SET_UI64_RESULT(result, count); ret = SYSINFO_RET_OK; err2: diff --git a/src/libs/zbxsysinfo/sysinfo.c b/src/libs/zbxsysinfo/sysinfo.c index 957d9ef661c..33ad59ce79d 100644 --- a/src/libs/zbxsysinfo/sysinfo.c +++ b/src/libs/zbxsysinfo/sysinfo.c @@ -1361,6 +1361,7 @@ typedef struct AGENT_REQUEST *request; AGENT_RESULT *result; zbx_uint32_t mutex_flag; /* in regular case should always be = ZBX_MUTEX_ALL_ALLOW */ + HANDLE timeout_event; int agent_ret; } zbx_metric_thread_args_t; @@ -1372,7 +1373,7 @@ ZBX_THREAD_ENTRY(agent_metric_thread, data) zabbix_log(LOG_LEVEL_DEBUG, "executing in data thread for key:'%s'", args->request->key); - if (SYSINFO_RET_FAIL == (args->agent_ret = args->func(args->request, args->result))) + if (SYSINFO_RET_FAIL == (args->agent_ret = args->func(args->request, args->result, args->timeout_event))) { if (NULL == GET_MSG_RESULT(args->result)) SET_MSG_RESULT(args->result, zbx_strdup(NULL, ZBX_NOTSUPPORTED)); @@ -1401,45 +1402,95 @@ int zbx_execute_threaded_metric(zbx_metric_func_t metric_func, AGENT_REQUEST *re const char *__function_name = "zbx_execute_threaded_metric"; ZBX_THREAD_HANDLE thread; - zbx_thread_args_t args; + zbx_thread_args_t thread_args; zbx_metric_thread_args_t metric_args = {metric_func, request, result, ZBX_MUTEX_THREAD_DENIED | ZBX_MUTEX_LOGGING_DENIED}; DWORD rc; + BOOL terminate_thread = FALSE; zabbix_log(LOG_LEVEL_DEBUG, "In %s() key:'%s'", __function_name, request->key); - args.args = (void *)&metric_args; + if (NULL == (metric_args.timeout_event = CreateEvent(NULL, TRUE, FALSE, NULL))) + { + SET_MSG_RESULT(result, zbx_dsprintf(NULL, "Cannot create timeout event for data thread: %s", + strerror_from_system(GetLastError()))); + return SYSINFO_RET_FAIL; + } - zbx_thread_start(agent_metric_thread, &args, &thread); + thread_args.args = (void *)&metric_args; + + zbx_thread_start(agent_metric_thread, &thread_args, &thread); if (ZBX_THREAD_ERROR == thread) { SET_MSG_RESULT(result, zbx_dsprintf(NULL, "Cannot start data thread: %s", strerror_from_system(GetLastError()))); + CloseHandle(metric_args.timeout_event); return SYSINFO_RET_FAIL; } + /* 1000 is multiplier for converting seconds into milliseconds */ if (WAIT_FAILED == (rc = WaitForSingleObject(thread, CONFIG_TIMEOUT * 1000))) { + /* unexpected error */ + SET_MSG_RESULT(result, zbx_dsprintf(NULL, "Cannot wait for data: %s", strerror_from_system(GetLastError()))); - TerminateThread(thread, 0); - CloseHandle(thread); - return SYSINFO_RET_FAIL; + terminate_thread = TRUE; } else if (WAIT_TIMEOUT == rc) { SET_MSG_RESULT(result, zbx_strdup(NULL, "Timeout while waiting for data.")); - TerminateThread(thread, 0); - CloseHandle(thread); - return SYSINFO_RET_FAIL; + + /* timeout; notify thread to clean up and exit, if stuck then terminate it */ + + if (FALSE == SetEvent(metric_args.timeout_event)) + { + zabbix_log(LOG_LEVEL_ERR, "SetEvent() failed: %s", strerror_from_system(GetLastError())); + terminate_thread = TRUE; + } + else + { + DWORD timeout_rc = WaitForSingleObject(thread, 3000); /* wait up to 3 seconds */ + + if (WAIT_FAILED == timeout_rc) + { + zabbix_log(LOG_LEVEL_ERR, "Waiting for data failed: %s", + strerror_from_system(GetLastError())); + terminate_thread = TRUE; + } + else if (WAIT_TIMEOUT == timeout_rc) + { + zabbix_log(LOG_LEVEL_ERR, "Stuck data thread"); + terminate_thread = TRUE; + } + /* timeout_rc must be WAIT_OBJECT_0 (signaled) */ + } + } + + if (TRUE == terminate_thread) + { + if (FALSE != TerminateThread(thread, 0)) + { + zabbix_log(LOG_LEVEL_ERR, "%s(): TerminateThread() for %s[%s%s] succeeded", __function_name, + request->key, (0 < request->nparam) ? request->params[0] : "", + (1 < request->nparam) ? ",..." : ""); + } + else + { + zabbix_log(LOG_LEVEL_ERR, "%s(): TerminateThread() for %s[%s%s] failed: %s", __function_name, + request->key, (0 < request->nparam) ? request->params[0] : "", + (1 < request->nparam) ? ",..." : "", + strerror_from_system(GetLastError())); + } } CloseHandle(thread); + CloseHandle(metric_args.timeout_event); zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s '%s'", __function_name, zbx_sysinfo_ret_string(metric_args.agent_ret), ISSET_MSG(result) ? result->msg : ""); - return metric_args.agent_ret; + return WAIT_OBJECT_0 == rc ? metric_args.agent_ret : SYSINFO_RET_FAIL; } #endif diff --git a/src/libs/zbxsysinfo/win32/diskspace.c b/src/libs/zbxsysinfo/win32/diskspace.c index 16d38395809..98c78705322 100644 --- a/src/libs/zbxsysinfo/win32/diskspace.c +++ b/src/libs/zbxsysinfo/win32/diskspace.c @@ -21,12 +21,16 @@ #include "sysinfo.h" #include "zbxjson.h" -static int vfs_fs_size(AGENT_REQUEST *request, AGENT_RESULT *result) +static int vfs_fs_size(AGENT_REQUEST *request, AGENT_RESULT *result, HANDLE timeout_event) { char *path, *mode; wchar_t *wpath; ULARGE_INTEGER freeBytes, totalBytes; + /* 'timeout_event' argument is here to make the vfs_fs_size() prototype as required by */ + /* zbx_execute_threaded_metric() on MS Windows */ + ZBX_UNUSED(timeout_event); + if (2 < request->nparam) { SET_MSG_RESULT(result, zbx_strdup(NULL, "Too many parameters.")); |