From 2c4b60fe24bfbccb3fa1553aed1065ce072343e1 Mon Sep 17 00:00:00 2001 From: Alexis Christoforides Date: Fri, 9 Aug 2019 10:31:57 +0300 Subject: [merp] Use a separate program as the hang supervisor. (#15715) * [merp] Use a separate program as the hang supervisor. Fixes https://github.com/mono/mono/issues/15646 macOS does not like signals being sent from the forked supervisor process, possibly towards anywhere but definitely when sent to the parent process. The following message is currently spewed after the supervisor process attempting to send a SIGSEGV to a hanged Mono process: "The process has forked and you cannot use this CoreFoundation functionality safely. You MUST exec()." We follow that direction and introduce a new binary that, when available in the mono executable's binary directory, is used to abort the parent process for us. --- configure.ac | 2 ++ mono/metadata/icall.c | 2 +- mono/metadata/threads-types.h | 3 ++ mono/metadata/threads.c | 39 ++++++++++---------- mono/utils/mono-state.c | 1 + runtime/bin/mono-hang-watchdog.in | 3 ++ tools/Makefile.am | 2 +- tools/mono-hang-watchdog/Makefile.am | 12 +++++++ tools/mono-hang-watchdog/mono-hang-watchdog.c | 51 +++++++++++++++++++++++++++ 9 files changed, 92 insertions(+), 23 deletions(-) create mode 100644 runtime/bin/mono-hang-watchdog.in create mode 100644 tools/mono-hang-watchdog/Makefile.am create mode 100644 tools/mono-hang-watchdog/mono-hang-watchdog.c diff --git a/configure.ac b/configure.ac index b41dbb5f31d..99380f0ead1 100644 --- a/configure.ac +++ b/configure.ac @@ -6075,6 +6075,7 @@ AC_CONFIG_FILES([po/mcs/Makefile.in]) AC_CONFIG_FILES([acceptance-tests/microbench-perf.sh],[chmod +x acceptance-tests/microbench-perf.sh]) AC_CONFIG_FILES([runtime/mono-wrapper],[chmod +x runtime/mono-wrapper]) AC_CONFIG_FILES([runtime/monodis-wrapper],[chmod +x runtime/monodis-wrapper]) +AC_CONFIG_FILES([runtime/bin/mono-hang-watchdog],[chmod +x runtime/bin/mono-hang-watchdog]) AC_CONFIG_COMMANDS([runtime/etc/mono/1.0/machine.config], [ depth=../../../.. @@ -6746,6 +6747,7 @@ tools/Makefile tools/locale-builder/Makefile tools/sgen/Makefile tools/pedump/Makefile +tools/mono-hang-watchdog/Makefile runtime/Makefile msvc/Makefile po/Makefile diff --git a/mono/metadata/icall.c b/mono/metadata/icall.c index 67cb79d0a06..e4ad5a96451 100644 --- a/mono/metadata/icall.c +++ b/mono/metadata/icall.c @@ -6382,7 +6382,7 @@ void ves_icall_Mono_Runtime_EnableCrashReportingLog (const char *directory, MonoError *error) { #ifndef DISABLE_CRASH_REPORTING - mono_summarize_set_timeline_dir (directory); + mono_threads_summarize_init (directory); #endif } diff --git a/mono/metadata/threads-types.h b/mono/metadata/threads-types.h index 4e96f99c9d6..716b4c017e6 100644 --- a/mono/metadata/threads-types.h +++ b/mono/metadata/threads-types.h @@ -540,6 +540,9 @@ typedef struct { MonoContext ctx_mem; } MonoThreadSummary; +void +mono_threads_summarize_init (const char *timeline_dir); + gboolean mono_threads_summarize (MonoContext *ctx, gchar **out, MonoStackHash *hashes, gboolean silent, gboolean signal_handler_controller, gchar *mem, size_t provided_size); diff --git a/mono/metadata/threads.c b/mono/metadata/threads.c index 1f11f490e72..aa616d7e7f8 100644 --- a/mono/metadata/threads.c +++ b/mono/metadata/threads.c @@ -58,6 +58,7 @@ #include #include #include +#include #ifdef HAVE_SYS_WAIT_H #include @@ -6133,7 +6134,6 @@ mono_threads_summarize_one (MonoThreadSummary *out, MonoContext *ctx) return success; } -#define TIMEOUT_CRASH_REPORTER_FATAL 30 #define MAX_NUM_THREADS 128 typedef struct { gint32 has_owner; // state of this memory @@ -6149,7 +6149,7 @@ typedef struct { gboolean silent; // print to stdout } SummarizerGlobalState; -#if defined(HAVE_KILL) && !defined(HOST_ANDROID) && defined(HAVE_WAITPID) && ((!defined(HOST_DARWIN) && defined(SYS_fork)) || HAVE_FORK) +#if defined(HAVE_KILL) && !defined(HOST_ANDROID) && defined(HAVE_WAITPID) && defined(HAVE_EXECVE) && ((!defined(HOST_DARWIN) && defined(SYS_fork)) || HAVE_FORK) #define HAVE_MONO_SUMMARIZER_SUPERVISOR 1 #endif @@ -6160,8 +6160,9 @@ typedef struct { } SummarizerSupervisorState; #ifndef HAVE_MONO_SUMMARIZER_SUPERVISOR -static void -summarizer_supervisor_wait (SummarizerSupervisorState *state) + +void +mono_threads_summarize_init (const char *timeline_dir) { return; } @@ -6180,23 +6181,14 @@ summarizer_supervisor_end (SummarizerSupervisorState *state) } #else -static void -summarizer_supervisor_wait (SummarizerSupervisorState *state) -{ - sleep (TIMEOUT_CRASH_REPORTER_FATAL); +static const char *hang_watchdog_path; - // If we haven't been SIGKILL'ed yet, we signal our parent - // and then exit -#ifdef HAVE_KILL - g_async_safe_printf("Crash Reporter has timed out, sending SIGSEGV\n"); - kill (state->pid, SIGSEGV); -#else - g_error ("kill () is not supported by this platform"); -#endif - - exit (1); +void +mono_threads_summarize_init (const char *timeline_dir) +{ + hang_watchdog_path = g_build_filename (mono_get_config_dir (), "..", "bin", "mono-hang-watchdog", NULL); + mono_summarize_set_timeline_dir (timeline_dir); } - static pid_t summarizer_supervisor_start (SummarizerSupervisorState *state) { @@ -6223,6 +6215,13 @@ summarizer_supervisor_start (SummarizerSupervisorState *state) if (pid != 0) state->supervisor_pid = pid; + else { + char pid_str[20]; // pid is a uint64_t, 20 digits max in decimal form + sprintf (pid_str, "%llu", (uint64_t)state->pid); + const char *const args[] = { hang_watchdog_path, pid_str, NULL }; + execve (args[0], (char * const*)args, NULL); // run 'mono-hang-watchdog [pid]' + g_assert_not_reached (); + } return pid; } @@ -6567,8 +6566,6 @@ mono_threads_summarize (MonoContext *ctx, gchar **out, MonoStackHash *hashes, gb g_assert (mem); success = mono_threads_summarize_execute_internal (ctx, out, hashes, silent, mem, provided_size, TRUE); summarizer_supervisor_end (&synch); - } else { - summarizer_supervisor_wait (&synch); } if (!already_async) diff --git a/mono/utils/mono-state.c b/mono/utils/mono-state.c index 6c793b7a896..d635d9ade89 100644 --- a/mono/utils/mono-state.c +++ b/mono/utils/mono-state.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/runtime/bin/mono-hang-watchdog.in b/runtime/bin/mono-hang-watchdog.in new file mode 100644 index 00000000000..9862038fccb --- /dev/null +++ b/runtime/bin/mono-hang-watchdog.in @@ -0,0 +1,3 @@ +#! /bin/sh +r='@mono_build_root@' +exec "$r/tools/mono-hang-watchdog/mono-hang-watchdog" "$@" \ No newline at end of file diff --git a/tools/Makefile.am b/tools/Makefile.am index 7a810890200..831f7548310 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = locale-builder sgen pedump +SUBDIRS = locale-builder sgen pedump mono-hang-watchdog diff --git a/tools/mono-hang-watchdog/Makefile.am b/tools/mono-hang-watchdog/Makefile.am new file mode 100644 index 00000000000..596c7cd63e4 --- /dev/null +++ b/tools/mono-hang-watchdog/Makefile.am @@ -0,0 +1,12 @@ + +AM_CPPFLAGS = $(SHARED_CFLAGS) + +if DISABLE_EXECUTABLES +bin_PROGRAMS = +else +bin_PROGRAMS = mono-hang-watchdog +endif + +CFLAGS = $(filter-out @CXX_REMOVE_CFLAGS@, @CFLAGS@) + +mono_hang_watchdog_SOURCES = mono-hang-watchdog.c diff --git a/tools/mono-hang-watchdog/mono-hang-watchdog.c b/tools/mono-hang-watchdog/mono-hang-watchdog.c new file mode 100644 index 00000000000..d703d8c2f96 --- /dev/null +++ b/tools/mono-hang-watchdog/mono-hang-watchdog.c @@ -0,0 +1,51 @@ +/* Given a external process' id as argument, the program waits for a set timeout then attempts to abort that process */ +/* Used by the Mono runtime's crash reporting. */ + +#include +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_SIGNAL_H +#include +#endif + +#define TIMEOUT 30 + +static char* program_name; +void program_exit (int exit_code, const char* message); + +int main (int argc, char* argv[]) +{ + program_name = argv [0]; + if (argc != 2) + program_exit (1, "Please provide one argument (pid)"); + errno = 0; + pid_t pid = (pid_t)strtoul (argv [1], NULL, 10); + if (errno) + program_exit (2, "Invalid pid"); + + sleep (TIMEOUT); + + /* if we survived the timeout, we consider the Mono process as hung */ + +#ifndef HAVE_KILL + /* just inform the user */ + printf ("Mono process with pid %llu appears to be hung", (uint64_t)pid); + return 0; +#else + printf ("Mono process hang detected, sending kill signal to pid %llu\n", (uint64_t)pid); + return kill (pid, SIGKILL); +#endif +} + +void program_exit (int exit_code, const char* message) +{ + if (message) + printf ("%s\n", message); + printf ("Usage: '%s [pid]'\t\t[pid]: The id for the Mono process\n", program_name); + exit (exit_code); +} -- cgit v1.2.3