diff options
-rw-r--r-- | mono/metadata/appdomain.c | 5 | ||||
-rw-r--r-- | mono/metadata/threads-types.h | 5 | ||||
-rw-r--r-- | mono/metadata/threads.c | 490 | ||||
-rw-r--r-- | mono/mini/debugger-agent.c | 4 | ||||
-rw-r--r-- | mono/mini/mini-exceptions.c | 13 | ||||
-rw-r--r-- | mono/mini/mini-runtime.c | 2 | ||||
-rw-r--r-- | mono/tests/libtest.c | 28 | ||||
-rw-r--r-- | mono/tests/merp-crash-test.cs | 12 | ||||
-rw-r--r-- | mono/utils/hazard-pointer.c | 2 |
9 files changed, 530 insertions, 31 deletions
diff --git a/mono/metadata/appdomain.c b/mono/metadata/appdomain.c index 724f4ea9a6b..95f3dcabc09 100644 --- a/mono/metadata/appdomain.c +++ b/mono/metadata/appdomain.c @@ -345,6 +345,11 @@ mono_runtime_init_checked (MonoDomain *domain, MonoThreadStartCB start_cb, MonoT mono_thread_attach (domain); +#ifndef DISABLE_CRASH_REPORTING + if (!mono_runtime_get_no_exec ()) + mono_summarizer_create_leader_thread (); +#endif + mono_type_initialization_init (); if (!mono_runtime_get_no_exec ()) diff --git a/mono/metadata/threads-types.h b/mono/metadata/threads-types.h index dc06fb56538..8a0fa9de244 100644 --- a/mono/metadata/threads-types.h +++ b/mono/metadata/threads-types.h @@ -569,4 +569,9 @@ mono_threads_summarize_execute (MonoContext *ctx, gchar **out, MonoStackHash *ha gboolean mono_threads_summarize_one (MonoThreadSummary *out, MonoContext *ctx); +#ifndef DISABLE_CRASH_REPORTING +void +mono_summarizer_create_leader_thread (void); +#endif + #endif /* _MONO_METADATA_THREADS_TYPES_H_ */ diff --git a/mono/metadata/threads.c b/mono/metadata/threads.c index 5354a51b9e7..2cafb71bba5 100644 --- a/mono/metadata/threads.c +++ b/mono/metadata/threads.c @@ -6351,6 +6351,302 @@ summarizer_supervisor_end (SummarizerSupervisorState *state) } #endif +/** + * + * Why a summarizer leader thread? + * + * The issue is that we set up signal handlers globally for all sorts of + * process-wide problems: sigsegv, sigterm, etc. Which means that if a thread + * is created outside of the control of Mono, our signal handlers may still run + * on those threads. But one of the things we need to do is interact with our + * thread state machinery, enumerate managed threads, etc - things that are + * generally not possible to do from an unattached thread. We have a choice: + * we can either attach in the signal handler or we can punt the crash data + * collection to a thread that we know is already running and is in a healthy + * state. Attaching in a signal handler is unlikely to work (attaching is not + * async signal safe). So instead we initialize a crash report leader thread + * at startup, and ask it to collect the crash report on our behalf. + * + * + * The order of operations is: + * - at startup: + * - main thread starts leader thread + * - leader thread starts, toggles leader_running and waits for begin_crash_report + * - to report a crash: + * - mono_threads_summarize gates the crashes so only one thread originates a crash report at a time + * - originating thread writes its info to the leader and posts to begin_crash_report. + * - leader wakes up, copies the originator data, starts collecting a crash report + * - leader and originator coordinate via leader_commanded and the response_fds to collect a crash report + * - orignator returns to mono_threads_summarize, unblocks the next crash originator, if any, and then returns (which sends off the crash report in some way). + */ + +typedef struct _MonoSummarizerOriginator { + /* in data */ + SummarizerGlobalState *state; + MonoNativeThreadId originator_tid; + MonoContext *originator_ctx; + gchar *working_mem; /* in-data: memory for the summary report and its size */ + size_t provided_size; + gchar **out; /* pointer into working_mem containing the output string */ + /* in data - set after the originator dumps itself, while leader is waiting for all threads */ + MonoThreadSummary *originator_summary; + /* out data */ + /* index of originator thread in list of threads collected by the leader */ + int originator_index; +} MonoSummarizerOriginator; + + +typedef struct _MonoSummarizerLeader { + MonoNativeThreadId leader_tid; + int32_t leader_running; /* only atomic reads */ + MonoSemType begin_crash_report; + /* originator cant post commands to the leader */ + MonoSemType leader_commanded; + int leader_command; + /* pipe to communicate back from the summarizer leader to the originator */ + int response_fds[2]; + /* Only one orignator at a time, gated by mono_threads_summarize tickets */ + MonoSummarizerOriginator originator; +} MonoSummarizerLeader; + +static MonoSummarizerLeader summarizer_leader_data; + +/* Commands from crash originator to the crash leader */ +enum LeaderCommand { + LEADER_COMMAND_ZERO = 0, /* not used */ + LEADER_COMMAND_CANCEL = -1, + LEADER_COMMAND_PROCEED_TO_SUSPEND = 1, + LEADER_COMMAND_PROCEED_TO_TERM = 2, +}; + +/* Responses from the crash leader to the crash originator */ +enum LeaderResponse { + LEADER_RESPONSE_IDS_COLLECTED = 1, + LEADER_RESPONSE_THREADS_SUSPENDED = 2, + LEADER_RESPONSE_STACKS_WALKED = 3, +}; + +/* Uncomment to get additional debugging code in the crash leader */ +/* #define LEADER_DEBUG */ + +#ifdef LEADER_DEBUG +#define LEADER_LOG(...) g_async_safe_printf (__VA_ARGS__) +#else +#define LEADER_LOG(...) /*empty*/ +#endif + +/* Called by the leader to send responses to the originator */ +static void +summarizer_leader_response_write (char b) +{ + int res; + LEADER_LOG ("leader --> originator: %d\n", (int)b); + while ((res = write (summarizer_leader_data.response_fds[1], &b, sizeof (b))) < 0 && errno == EINTR); +} + +/* Called by the originator to receive (blocking) responses from the leader */ +static int +summarizer_leader_response_read (void) +{ + char buf; + int nread = 0; + do { + int res = read(summarizer_leader_data.response_fds[0], &buf, sizeof (buf)); + if (res < 0) { + if (errno == EINTR) + continue; + else + return -1; + } + nread += res; + } while (nread < sizeof (buf)); + LEADER_LOG ("originator <---- leader : %d\n", (int)buf); + return (int)buf; +} + +/* Called by the leader to wait for a command from the crash originator */ +static gboolean +summarizer_leader_wait_for_command (int *leader_command) +{ + MONO_ENTER_GC_SAFE; + /* allow interruptions */ + while (mono_os_sem_wait (&summarizer_leader_data.leader_commanded, MONO_SEM_FLAGS_ALERTABLE) < 0); + MONO_EXIT_GC_SAFE; + *leader_command = summarizer_leader_data.leader_command; + if (*leader_command == LEADER_COMMAND_CANCEL) { + return FALSE; + } + return TRUE; +} + + +/* Called by the originator to post commands to the leader. Usually just to proceed to the next state */ +static void +summarizer_leader_post_command (int command) +{ + summarizer_leader_data.leader_command = command; + mono_os_sem_post (&summarizer_leader_data.leader_commanded); +} + +static void +summarizer_leader_collect_thread_ids (SummarizerGlobalState *state); +static void +summarizer_leader_suspend_others (SummarizerGlobalState *state, MonoNativeThreadId originator, int originator_idx); +static void +summarizer_leader_set_originator_summary (MonoThreadSummary *orignator_summary); +static void +summarizer_state_get_index_for_thread (SummarizerGlobalState *state, MonoNativeThreadId current, int *my_index); +static void +summarizer_state_wait_and_term (MonoNativeThreadId caller_tid, SummarizerGlobalState *state, gchar **out, gchar *working_mem, size_t provided_size, MonoThreadSummary *originator_summary); +static void +summarizer_leader_adjust_tids_for_foreign_originator (void); + +static void +summarizer_leader (void) +{ + MonoInternalThread *thread = mono_thread_internal_current (); + thread->flags |= MONO_THREAD_FLAG_DONT_MANAGE; + + /* + * This thread must not be stopped by the profiler or the STW + * machinery. While collecting crashes it also violates the coop GC + * rules by accessing managed memory to gather crash reports. But + * crash reporting has its own signal-based mechanism to interrupt the + * other threads, so this is okay. + */ + mono_thread_info_set_flags (MONO_THREAD_INFO_FLAGS_NO_GC | MONO_THREAD_INFO_FLAGS_NO_SAMPLE); + + mono_thread_set_name_constant_ignore_error (thread, "Crash Report Leader", MonoSetThreadNameFlag_None); + + mono_thread_set_state (mono_thread_internal_current (), ThreadState_Background); + + /* This thread is always in async context */ + mono_thread_info_set_is_async_context (TRUE); + + mono_atomic_store_i32 (&summarizer_leader_data.leader_running, 1); + while (TRUE) { + /* Leader ready to receive crashe report requests */ + MONO_ENTER_GC_SAFE; + /* allow interruptions */ + while (mono_os_sem_wait (&summarizer_leader_data.begin_crash_report, MONO_SEM_FLAGS_ALERTABLE) < 0); + MONO_EXIT_GC_SAFE; + LEADER_LOG ("Crash report leader %p beginning collection for originator %p\n", (gpointer)(intptr_t)summarizer_leader_data.leader_tid, (gpointer)(intptr_t)summarizer_leader_data.originator.originator_tid); + + /* Leader is collecting thread ids for a crash */ + + /* collect a crash report */ + summarizer_leader_collect_thread_ids (summarizer_leader_data.originator.state); + + summarizer_leader_data.originator.originator_index = -1; + summarizer_state_get_index_for_thread (summarizer_leader_data.originator.state, summarizer_leader_data.originator.originator_tid, &summarizer_leader_data.originator.originator_index); + if (summarizer_leader_data.originator.originator_index == -1) + summarizer_leader_adjust_tids_for_foreign_originator (); + + /* wake up originator */ + summarizer_leader_response_write (LEADER_RESPONSE_IDS_COLLECTED); + + /* Leader is waiting for report originator before suspending the other threads */ + int cmd; + if (!summarizer_leader_wait_for_command (&cmd)) + continue; /* restart */ + g_assert (cmd == LEADER_COMMAND_PROCEED_TO_SUSPEND); + + /* Leader is suspending other threads */ + summarizer_leader_suspend_others(summarizer_leader_data.originator.state, summarizer_leader_data.originator.originator_tid, summarizer_leader_data.originator.originator_index); + summarizer_leader_response_write (LEADER_RESPONSE_THREADS_SUSPENDED); + + /* Pause leader until originator populates its stack data */ + + if (!summarizer_leader_wait_for_command (&cmd)) + continue; + g_assert (cmd == LEADER_COMMAND_PROCEED_TO_TERM); + + /* Finish up the crash report */ + summarizer_state_wait_and_term (summarizer_leader_data.leader_tid, summarizer_leader_data.originator.state, summarizer_leader_data.originator.out, summarizer_leader_data.originator.working_mem, summarizer_leader_data.originator.provided_size, summarizer_leader_data.originator.originator_summary); + LEADER_LOG ("Crash report leader finished reporting. Ready for next crash\n"); + summarizer_leader_response_write (LEADER_RESPONSE_STACKS_WALKED); + } +} + +static gboolean +summarizer_leader_is_running (void) +{ + return mono_atomic_load_i32 (&summarizer_leader_data.leader_running); +} + + +static void +summarizer_leader_init (void) +{ + /* TODO: do we really need two semaphores? There's always one + * originator and one leader - we can signal the leader to begin by + * posting a command. */ + mono_os_sem_init (&summarizer_leader_data.begin_crash_report, 0); + mono_os_sem_init (&summarizer_leader_data.leader_commanded, 0); + /* Can't create the leader thread early on because MonoInternalThread needs the corlib InternalThread type */ + int res = pipe (summarizer_leader_data.response_fds); + g_assert (!res); +} + +void +mono_summarizer_create_leader_thread (void) +{ + ERROR_DECL (error); + + MonoInternalThread *leader = mono_thread_create_internal (mono_get_root_domain (), summarizer_leader, NULL, MONO_THREAD_CREATE_FLAGS_NONE, error); + mono_error_assert_ok (error); + + summarizer_leader_data.leader_tid = thread_get_tid (leader); +} +static void +summarizer_originator_prepare (MonoSummarizerOriginator *orig, SummarizerGlobalState *state, MonoNativeThreadId tid, MonoContext *ctx, gchar **out, gchar *working_mem, size_t provided_size) +{ + orig->state = state; + orig->originator_tid = tid; + orig->originator_ctx = ctx; + orig->out = out; + orig->working_mem = working_mem; + orig->provided_size = provided_size; +} + +static void +summarizer_leader_set_originator_summary (MonoThreadSummary *originator_summary) +{ + summarizer_leader_data.originator.originator_summary = originator_summary; +} + + +/* returns 0 if leader is not running and crash reporting should be done on the originator thread. + * returns <0 if leader could not collect a crash report. + * otherwise returns 1 + */ +static int +summarizer_originate_crash_report (SummarizerGlobalState *state, MonoNativeThreadId originator_tid, MonoContext *ctx, gchar **out, gchar *working_mem, size_t provided_size) +{ + /* FIXME: we already have a mechanism for gating requests in mono_threads_summarize, don't need another one here */ + if (!summarizer_leader_is_running ()) { + /* FIXME: in that case, just gather the crash report on the main thread - it's early during startup */ + g_async_safe_printf ("crash summarizer leader thread is not running. collecting crash report syncronously.\n"); + return 0; + } + + if (mono_native_thread_id_equals (originator_tid, summarizer_leader_data.leader_tid)) { + g_async_safe_printf ("crash summarizer leader thread crashed. collecting a crash report synchronously\n"); + /* Make it look like there's no summarizer leader */ + mono_atomic_store_i32 (&summarizer_leader_data.leader_running, 0); + memset (&summarizer_leader_data.leader_tid, 0, sizeof (summarizer_leader_data.leader_tid)); + /* Collect the crash report synchronously on this thread */ + return 0; + } + + summarizer_originator_prepare (&summarizer_leader_data.originator, state, originator_tid, ctx, out, working_mem, provided_size); + /* we're intentionally not switching to GC Safe mode in case we crashed in the coop state machine */ + mono_os_sem_post (&summarizer_leader_data.begin_crash_report); + + return summarizer_leader_response_read (); +} + + static void collect_thread_id (gpointer key, gpointer value, gpointer user) { @@ -6382,24 +6678,67 @@ collect_thread_ids (MonoNativeThreadId *thread_ids, int max_threads) return ud.nthreads; } + +/* + * Try to initialize the global summarizer thread. At this point the caller + * doesn't know if it's the crash originator, or if another crash is in + * progress and the current thread was asked to summarize its state. Returns + * TRUE if the current thread is to be the crash originator, returns FALSE if + * there's already a crash collection in progress. + */ static gboolean -summarizer_state_init (SummarizerGlobalState *state, MonoNativeThreadId current, int *my_index) +summarizer_state_init (SummarizerGlobalState *state) { gint32 started_state = mono_atomic_cas_i32 (&state->has_owner, 1 /* set */, 0 /* compare */); gboolean not_started = started_state == 0; - if (not_started) { - state->nthreads = collect_thread_ids (state->thread_array, MAX_NUM_THREADS); + if (not_started) mono_os_sem_init (&state->update, 0); + return not_started; +} + +static void +summarizer_state_collect_thread_ids (SummarizerGlobalState *state) +{ + state->nthreads = collect_thread_ids (state->thread_array, MAX_NUM_THREADS); +} + +static void +summarizer_leader_collect_thread_ids (SummarizerGlobalState *state) +{ + summarizer_state_collect_thread_ids (state); + +} + +static void +summarizer_leader_adjust_tids_for_foreign_originator (void) +{ + if (summarizer_leader_data.originator.originator_index == -1) { + /* The crash originator is not in Mono's thread list - it's some foreign thread that crashed in native code */ + /* Add a slot for it at the end of the summarizer global state */ + SummarizerGlobalState *state = summarizer_leader_data.originator.state; + if (state->nthreads < MAX_NUM_THREADS - 1) { + int originator_index = state->nthreads++; + state->thread_array [originator_index] = summarizer_leader_data.originator.originator_tid; + summarizer_leader_data.originator.originator_index = originator_index; + } } +} +static int +summarizer_leader_get_originator_index (void) +{ + return summarizer_leader_data.originator.originator_index; +} + +static void +summarizer_state_get_index_for_thread (SummarizerGlobalState *state, MonoNativeThreadId current, int *my_index) +{ for (int i = 0; i < state->nthreads; i++) { if (state->thread_array [i] == current) { *my_index = i; break; } } - - return not_started; } static void @@ -6414,23 +6753,38 @@ summarizer_signal_other_threads (SummarizerGlobalState *state, MonoNativeThreadI if (i == current_idx) continue; + MonoNativeThreadId tid = state->thread_array [i]; + + if (mono_native_thread_id_equals (tid, summarizer_leader_data.leader_tid)) + continue; + #ifdef HAVE_PTHREAD_KILL pthread_kill (state->thread_array [i], SIGTERM); if (!state->silent) - g_async_safe_printf("Pkilling 0x%" G_GSIZE_FORMAT "x from 0x%" G_GSIZE_FORMAT "x\n", (gsize)MONO_NATIVE_THREAD_ID_TO_UINT (state->thread_array [i]), (gsize)MONO_NATIVE_THREAD_ID_TO_UINT (current)); + g_async_safe_printf("Pkilling %p from %p\n", (gpointer)(intptr_t) state->thread_array [i], (gpointer)(intptr_t)current); #else g_error ("pthread_kill () is not supported by this platform"); #endif } } +static void +summarizer_leader_suspend_others (SummarizerGlobalState *state, MonoNativeThreadId originator, int originator_idx) +{ + summarizer_signal_other_threads (state, originator, originator_idx); +} + // Returns true when there are shared global references to "this_thread" static gboolean summarizer_post_dump (SummarizerGlobalState *state, MonoThreadSummary *this_thread, int current_idx) { mono_memory_barrier (); + /* If the thread wasn't assigned a slot, don't save its dump */ + if (current_idx < 0) + return FALSE; + gpointer old = mono_atomic_cas_ptr ((volatile gpointer *)&state->all_threads [current_idx], this_thread, NULL); if (old == GINT_TO_POINTER (-1)) { @@ -6504,20 +6858,24 @@ summarizer_state_term (SummarizerGlobalState *state, gchar **out, gchar *mem, si mono_summarize_timeline_phase_log (MonoSummaryManagedStacks); for (int i=0; i < state->nthreads; i++) { threads [i] = summarizer_try_read_thread (state, i); + LEADER_LOG("managed stack for thread %d (%p) \"%s\"\n", i, threads[i] ? (gpointer)threads[i]->native_thread_id : (gpointer)NULL, threads[i] && threads[i]->name[0] != '\0' ? threads[i]->name : ""); if (!threads [i]) continue; - // We are doing this dump on the controlling thread because this isn't + // We are doing this dump on the leader or controlling thread because this isn't // an async context sometimes. There's still some reliance on malloc here, but it's - // much more stable to do it all from the controlling thread. + // much more stable to do it all from the leader or controlling thread. // // This is non-null, checked in mono_threads_summarize // with early exit there mono_get_eh_callbacks ()->mono_summarize_managed_stack (threads [i]); + + LEADER_LOG("finished managed stack for thread %d (%p)\n", i, threads[i] ? (gpointer)threads[i]->native_thread_id : (gpointer)NULL); } /* The value of the breadcrumb should match the "StackHash" value written by `mono_merp_write_fingerprint_payload` */ mono_create_crash_hash_breadcrumb (controlling); + LEADER_LOG("wrote hash breadcrumb for controlling thread %p", controlling ? (gpointer)controlling->native_thread_id : (gpointer)NULL); MonoStateWriter writer; memset (&writer, 0, sizeof (writer)); @@ -6555,6 +6913,22 @@ summarizer_state_wait (MonoThreadSummary *thread) mono_os_sem_timedwait (&thread->done_wait, milliseconds_in_second, MONO_SEM_FLAGS_NONE); } +static void +summarizer_state_wait_and_term (MonoNativeThreadId caller_tid, SummarizerGlobalState *state, gchar **out, gchar *working_mem, size_t provided_size, MonoThreadSummary *originator_summary) +{ + if (!state->silent) + g_async_safe_printf("Entering thread summarizer pause from %p\n", (gpointer)(intptr_t)caller_tid); + + // Wait up to 2 seconds for all of the other threads to catch up + summary_timedwait (state, 2); + + if (!state->silent) + g_async_safe_printf("Finished thread summarizer pause from %p.\n", (gpointer)(intptr_t)caller_tid); + + // Dump and cleanup all the stack memory + summarizer_state_term (state, out, working_mem, provided_size, originator_summary); +} + static gboolean mono_threads_summarize_execute_internal (MonoContext *ctx, gchar **out, MonoStackHash *hashes, gboolean silent, gchar *working_mem, size_t provided_size, gboolean this_thread_controls) { @@ -6562,10 +6936,56 @@ mono_threads_summarize_execute_internal (MonoContext *ctx, gchar **out, MonoStac int current_idx; MonoNativeThreadId current = mono_native_thread_id_get (); - gboolean thread_given_control = summarizer_state_init (&state, current, ¤t_idx); + gboolean thread_given_control = summarizer_state_init (&state); g_assert (this_thread_controls == thread_given_control); + /* if true, the crash leader is not running yet - collect the report on the current originating thread */ + gboolean collect_synchronously = FALSE; + if (this_thread_controls) { + int res = summarizer_originate_crash_report (&state, current, ctx, out, working_mem, provided_size); + + /* + * We need to coordinate the originator and the leader in a few + * places. + * + * The leader needs to pause to after collecting the thread ids + * before suspending the non-originator threads, and again + * while the originator is dumping its own stack. + * + * The originator needs to wait for the leader to collect the + * thread IDs and to tell it its assigned slot. Then it tells + * the leader to suspend the others, dumps its own memory, then + * tell the leader to dump the whole crash report and waits for + * it to reply when it's done. + */ + + if (res == 0) { + /* collect the crash report synchronously */ + collect_synchronously = TRUE; + } else if (res < 0) { + g_async_safe_printf ("Crash summary leader could not collect thread data. No crash report will be created.\n"); + /* something went wrong */ + return FALSE; + } else { + g_assert (res == LEADER_RESPONSE_IDS_COLLECTED); + /* get the thread index from the crash leader */ + current_idx = summarizer_leader_get_originator_index (); + if (current_idx < 0) { + g_async_safe_printf ("Summarizer originator not in the thread list\n"); + } else { + LEADER_LOG ("Summarizer originator has index %d\n", current_idx); + } + } + } + + if (this_thread_controls && collect_synchronously) { + summarizer_state_collect_thread_ids (&state); + } + + if (!this_thread_controls || collect_synchronously) + summarizer_state_get_index_for_thread (&state, current, ¤t_idx); + if (state.nthreads == 0) { if (!silent) g_async_safe_printf("No threads attached to runtime.\n"); @@ -6574,18 +6994,31 @@ mono_threads_summarize_execute_internal (MonoContext *ctx, gchar **out, MonoStac } if (this_thread_controls) { - g_assert (working_mem); - mono_summarize_timeline_phase_log (MonoSummarySuspendHandshake); state.silent = silent; - summarizer_signal_other_threads (&state, current, current_idx); + if (!collect_synchronously) { + /* + * crash leader signals the other threads, but not the + * originator thread - we're going to dump by + * ourselves, below. + */ + summarizer_leader_post_command (LEADER_COMMAND_PROCEED_TO_SUSPEND); + int res = summarizer_leader_response_read (); + g_assert (res == LEADER_RESPONSE_THREADS_SUSPENDED); + } else { + summarizer_signal_other_threads (&state, current, current_idx); + } mono_summarize_timeline_phase_log (MonoSummaryUnmanagedStacks); } MonoStateMem mem; gboolean success = mono_state_alloc_mem (&mem, (long) current, sizeof (MonoThreadSummary)); - if (!success) + if (!success) { + if (this_thread_controls && !collect_synchronously) { + summarizer_leader_post_command (LEADER_COMMAND_CANCEL); + } return FALSE; + } MonoThreadSummary *this_thread = (MonoThreadSummary *) mem.mem; @@ -6597,24 +7030,23 @@ mono_threads_summarize_execute_internal (MonoContext *ctx, gchar **out, MonoStac // Store a reference to our stack memory into global state gboolean success = summarizer_post_dump (&state, this_thread, current_idx); if (!success && !state.silent) - g_async_safe_printf("Thread 0x%" G_GSIZE_FORMAT "x reported itself.\n", (gsize)MONO_NATIVE_THREAD_ID_TO_UINT (current)); + g_async_safe_printf("Thread %p reported itself.\n", (gpointer)(intptr_t)current); } else if (!state.silent) { - g_async_safe_printf("Thread 0x%" G_GSIZE_FORMAT "x couldn't report itself.\n", (gsize)MONO_NATIVE_THREAD_ID_TO_UINT (current)); + g_async_safe_printf("Thread %p couldn't report itself.\n", (gpointer)(intptr_t)current); } // From summarizer, wait and dump. if (this_thread_controls) { - if (!state.silent) - g_async_safe_printf("Entering thread summarizer pause from 0x%" G_GSIZE_FORMAT "x\n", (gsize)MONO_NATIVE_THREAD_ID_TO_UINT (current)); - - // Wait up to 2 seconds for all of the other threads to catch up - summary_timedwait (&state, 2); - - if (!state.silent) - g_async_safe_printf("Finished thread summarizer pause from 0x%" G_GSIZE_FORMAT "x.\n", (gsize)MONO_NATIVE_THREAD_ID_TO_UINT (current)); - - // Dump and cleanup all the stack memory - summarizer_state_term (&state, out, working_mem, provided_size, this_thread); + if (collect_synchronously) { + summarizer_state_wait_and_term (current, &state, out, working_mem, provided_size, this_thread); + } else { + summarizer_leader_set_originator_summary (this_thread); + summarizer_leader_post_command (LEADER_COMMAND_PROCEED_TO_TERM); + /* blocks here until leader is done, keeping + * originator's stack memory alive for the dumper */ + int res = summarizer_leader_response_read (); + g_assert (res == LEADER_RESPONSE_STACKS_WALKED); + } } else { // Wait here, keeping our stack memory alive // for the dumper @@ -6634,6 +7066,7 @@ void mono_threads_summarize_init (void) { summarizer_supervisor_init (); + summarizer_leader_init (); } gboolean @@ -6660,6 +7093,7 @@ mono_threads_summarize (MonoContext *ctx, gchar **out, MonoStackHash *hashes, gb static gint64 request_available_to_run = 1; gint64 this_request_id = mono_atomic_inc_i64 ((volatile gint64 *) &next_pending_request_id); + g_async_safe_printf ("Thread %p starting summarize_execute\n", (gpointer)(intptr_t)mono_native_thread_id_get ()); // This is a global queue of summary requests. // It's not safe to signal a thread while they're in the // middle of a dump. Dladdr is not reentrant. It's the one lock @@ -6682,13 +7116,15 @@ mono_threads_summarize (MonoContext *ctx, gchar **out, MonoStackHash *hashes, gb gint64 next_request_id = mono_atomic_load_i64 ((volatile gint64 *) &request_available_to_run); if (next_request_id == this_request_id) { - gboolean already_async = mono_thread_info_is_async_context (); + gboolean foreign = mono_thread_info_current_unchecked () == NULL; + gboolean already_async = foreign || mono_thread_info_is_async_context (); if (!already_async) mono_thread_info_set_is_async_context (TRUE); SummarizerSupervisorState synch; if (summarizer_supervisor_start (&synch)) { g_assert (mem); + success = mono_threads_summarize_execute_internal (ctx, out, hashes, silent, mem, provided_size, TRUE); summarizer_supervisor_end (&synch); } diff --git a/mono/mini/debugger-agent.c b/mono/mini/debugger-agent.c index 54c0ca97801..b2787edce48 100644 --- a/mono/mini/debugger-agent.c +++ b/mono/mini/debugger-agent.c @@ -5161,6 +5161,10 @@ ss_clear_for_assembly (SingleStepReq *req, MonoAssembly *assembly) static void mono_debugger_agent_send_crash (char *json_dump, MonoStackHash *hashes, int pause) { + /* Did we crash on an unattached thread? Can't do runtime notifications from there */ + if (!mono_thread_info_current_unchecked ()) + return; + MONO_ENTER_GC_UNSAFE; #ifndef DISABLE_CRASH_REPORTING int suspend_policy; diff --git a/mono/mini/mini-exceptions.c b/mono/mini/mini-exceptions.c index e712a623bb4..643450f2fd1 100644 --- a/mono/mini/mini-exceptions.c +++ b/mono/mini/mini-exceptions.c @@ -1751,6 +1751,10 @@ mono_summarize_unmanaged_stack (MonoThreadSummary *out) // Summarize unmanaged stack // #ifdef HAVE_BACKTRACE_SYMBOLS + MonoDomain *domain = mono_domain_get (); + + gboolean has_jit_tls = mono_tls_get_jit_tls () != NULL; + intptr_t frame_ips [MONO_MAX_SUMMARY_FRAMES]; out->num_unmanaged_frames = backtrace ((void **)frame_ips, MONO_MAX_SUMMARY_FRAMES); @@ -1761,11 +1765,16 @@ mono_summarize_unmanaged_stack (MonoThreadSummary *out) const char* module_buf = frame->unmanaged_data.module; int success = mono_get_portable_ip (ip, &frame->unmanaged_data.ip, &frame->unmanaged_data.offset, &module_buf, (char *) frame->str_descr); + /* If the thread is not attached to the JIT, (ie crashed native + * thread), don't try to look for managed method info - it will + * assert in mono_jit_info_table_find_internal */ + if (!has_jit_tls) + continue; + /* attempt to look up any managed method at that ip */ /* TODO: Trampolines - follow examples from mono_print_method_from_ip() */ MonoJitInfo *ji; - MonoDomain *domain = mono_domain_get (); MonoDomain *target_domain; ji = mini_jit_info_table_find_ext (domain, (char *)ip, TRUE, &target_domain); if (ji) { @@ -1796,7 +1805,7 @@ mono_summarize_unmanaged_stack (MonoThreadSummary *out) MonoThreadInfo *thread = mono_thread_info_current_unchecked (); out->info_addr = (intptr_t) thread; - out->jit_tls = thread->jit_data; + out->jit_tls = thread ? thread->jit_data : NULL; out->domain = mono_domain_get (); if (!out->ctx) { diff --git a/mono/mini/mini-runtime.c b/mono/mini/mini-runtime.c index 4619638f31b..e04626d7ff2 100644 --- a/mono/mini/mini-runtime.c +++ b/mono/mini/mini-runtime.c @@ -3351,6 +3351,8 @@ MONO_SIG_HANDLER_FUNC (, mono_sigsegv_signal_handler) mono_chain_signal (MONO_SIG_HANDLER_PARAMS); return; } + /* thread not registered with the runtime, make sure we return now. */ + return; } #endif diff --git a/mono/tests/libtest.c b/mono/tests/libtest.c index e5e5f2f14d3..2af1ef5302b 100644 --- a/mono/tests/libtest.c +++ b/mono/tests/libtest.c @@ -8118,6 +8118,34 @@ mono_test_MerpCrashSignalIll (void) #endif } +#ifndef HOST_WIN32 +void* +foreign_thread_crash_body (void* ud) +{ + while (1) { + fprintf (stderr, "alive\n"); + sleep (2); + } + return NULL; +} +#endif + +LIBTEST_API void mono_test_MerpCrashOnForeignThread (void) +{ + +#ifndef HOST_WIN32 + pthread_t t; + int res; + + res = pthread_create (&t, NULL, foreign_thread_crash_body, NULL); + + sleep (1); + pthread_kill (t, SIGILL); + + pthread_join (t, NULL); +#endif +} + #ifdef __cplusplus } // extern C #endif diff --git a/mono/tests/merp-crash-test.cs b/mono/tests/merp-crash-test.cs index d534a64fae5..93a9426c45d 100644 --- a/mono/tests/merp-crash-test.cs +++ b/mono/tests/merp-crash-test.cs @@ -55,7 +55,8 @@ class C Crashers.Add(new Crasher ("MerpCrashExceptionHook", MerpCrashUnhandledExceptionHook)); // Specific Edge Cases - Crashers.Add(new Crasher ("MerpCrashDladdr", MerpCrashDladdr)); + //FIXME: crash in dlopen holds the global dyld lock, which we need for stack walks. + //Crashers.Add(new Crasher ("MerpCrashDladdr", MerpCrashDladdr)); Crashers.Add(new Crasher ("MerpCrashSnprintf", MerpCrashSnprintf)); Crashers.Add(new Crasher ("MerpCrashDomainUnload", MerpCrashDomainUnload)); Crashers.Add(new Crasher ("MerpCrashUnbalancedGCSafe", MerpCrashUnbalancedGCSafe)); @@ -66,6 +67,7 @@ class C Crashers.Add(new Crasher ("MerpCrashSignalSegv", MerpCrashSignalSegv)); Crashers.Add(new Crasher ("MerpCrashSignalIll", MerpCrashSignalIll)); Crashers.Add(new Crasher ("MerpCrashTestBreadcrumbs", MerpCrashTestBreadcrumbs, validator: ValidateBreadcrumbs)); + Crashers.Add(new Crasher ("MerpCrashOnForeignThread", MerpCrashOnForeignThread)); } public static void @@ -245,6 +247,14 @@ class C mono_test_MerpCrashSignalSegv (); } + [DllImport("libtest")] + public static extern void mono_test_MerpCrashOnForeignThread (); + + public static void + MerpCrashOnForeignThread () + { + mono_test_MerpCrashOnForeignThread (); + } private static object jsonGetKey (object o, string key) => (o as Dictionary<string,object>)[key]; private static object jsonGetKeys (object o, params string[] keys) { diff --git a/mono/utils/hazard-pointer.c b/mono/utils/hazard-pointer.c index 8fe373e64fb..e01912d8121 100644 --- a/mono/utils/hazard-pointer.c +++ b/mono/utils/hazard-pointer.c @@ -187,7 +187,7 @@ mono_hazard_pointer_get (void) if (small_id < 0) { static MonoThreadHazardPointers emerg_hazard_table; - g_warning ("Thread %p may have been prematurely finalized", (gpointer) (gsize) mono_native_thread_id_get ()); + g_warning ("Thread %p may have been prematurely finalized\n", (gpointer) (gsize) mono_native_thread_id_get ()); return &emerg_hazard_table; } |