coredump: Don't perform any cleanups before dumping core

Rename coredump_exit_mm to coredump_task_exit and call it from do_exit before PTRACE_EVENT_EXIT, and before any cleanup work for a task happens. This ensures that an accurate copy of the process can be captured in the coredump as no cleanup for the process happens before the coredump completes. This also ensures that PTRACE_EVENT_EXIT will not be visited by any thread until the coredump is complete. Add a new flag PF_POSTCOREDUMP so that tasks that have passed through coredump_task_exit can be recognized and ignored in zap_process. Now that all of the coredumping happens before exit_mm remove code to test for a coredump in progress from mm_release. Replace "may_ptrace_stop()" with a simple test of "current->ptrace". The other tests in may_ptrace_stop all concern avoiding stopping during a coredump. These tests are no longer necessary as it is now guaranteed that fatal_signal_pending will be set if the code enters ptrace_stop during a coredump. The code in ptrace_stop is guaranteed not to stop if fatal_signal_pending returns true. Until this change "ptrace_event(PTRACE_EVENT_EXIT)" could call ptrace_stop without fatal_signal_pending being true, as signals are dequeued in get_signal before calling do_exit. This is no longer an issue as "ptrace_event(PTRACE_EVENT_EXIT)" is no longer reached until after the coredump completes. Link: https://lkml.kernel.org/r/874kaax26c.fsf@disp2133 Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
author: Eric W. Biederman <ebiederm@xmission.com> 2021-09-01 19:33:50 +0300
committer: Eric W. Biederman <ebiederm@xmission.com> 2021-10-06 19:28:39 +0300
commit: 92307383082daff5df884a25df9e283efb7ef261 (patch)
tree: c73d7be83738e56a3ae161c640562563b867549e /kernel
parent: d67e03e361619b20c51aaef3b7dd1497617c371d (diff)
3 files changed, 14 insertions, 35 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index cb1619d8fd64..774e6b5061b8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -339,23 +339,29 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 	}
 }
 
-static void coredump_exit_mm(struct mm_struct *mm)
+static void coredump_task_exit(struct task_struct *tsk)
 {
 	struct core_state *core_state;
+	struct mm_struct *mm;
+
+	mm = tsk->mm;
+	if (!mm)
+		return;
 
 	/*
 	 * Serialize with any possible pending coredump.
 	 * We must hold mmap_lock around checking core_state
-	 * and clearing tsk->mm.  The core-inducing thread
+	 * and setting PF_POSTCOREDUMP.  The core-inducing thread
 	 * will increment ->nr_threads for each thread in the
-	 * group with ->mm != NULL.
+	 * group without PF_POSTCOREDUMP set.
 	 */
+	mmap_read_lock(mm);
+	tsk->flags |= PF_POSTCOREDUMP;
 	core_state = mm->core_state;
+	mmap_read_unlock(mm);
 	if (core_state) {
 		struct core_thread self;
 
-		mmap_read_unlock(mm);
-
 		self.task = current;
 		if (self.task->flags & PF_SIGNALED)
 			self.next = xchg(&core_state->dumper.next, &self);
@@ -375,7 +381,6 @@ static void coredump_exit_mm(struct mm_struct *mm)
 			freezable_schedule();
 		}
 		__set_current_state(TASK_RUNNING);
-		mmap_read_lock(mm);
 	}
 }
 
@@ -480,7 +485,6 @@ static void exit_mm(void)
 		return;
 	sync_mm_rss(mm);
 	mmap_read_lock(mm);
-	coredump_exit_mm(mm);
 	mmgrab(mm);
 	BUG_ON(mm != current->active_mm);
 	/* more a memory barrier than a real lock */
@@ -768,6 +772,7 @@ void __noreturn do_exit(long code)
 	profile_task_exit(tsk);
 	kcov_task_exit(tsk);
 
+	coredump_task_exit(tsk);
 	ptrace_event(PTRACE_EVENT_EXIT, code);
 
 	validate_creds_for_do_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 38681ad44c76..9bd9f2da9e41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1392,8 +1392,7 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 	 * purposes.
 	 */
 	if (tsk->clear_child_tid) {
-		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
-		    atomic_read(&mm->mm_users) > 1) {
+		if (atomic_read(&mm->mm_users) > 1) {
 			/*
 			 * We don't check the error code - if userspace has
 			 * not set up a proper pointer then tough luck.
diff --git a/kernel/signal.c b/kernel/signal.c
index c9759ff2cb43..b0db80acc6ef 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2158,31 +2158,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	spin_unlock_irqrestore(&sighand->siglock, flags);
 }
 
-static inline bool may_ptrace_stop(void)
-{
-	if (!likely(current->ptrace))
-		return false;
-	/*
-	 * Are we in the middle of do_coredump?
-	 * If so and our tracer is also part of the coredump stopping
-	 * is a deadlock situation, and pointless because our tracer
-	 * is dead so don't allow us to stop.
-	 * If SIGKILL was already sent before the caller unlocked
-	 * ->siglock we must see ->core_state != NULL. Otherwise it
-	 * is safe to enter schedule().
-	 *
-	 * This is almost outdated, a task with the pending SIGKILL can't
-	 * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
-	 * after SIGKILL was already dequeued.
-	 */
-	if (unlikely(current->mm->core_state) &&
-	    unlikely(current->mm == current->parent->mm))
-		return false;
-
-	return true;
-}
-
-
 /*
  * This must be called with current->sighand->siglock held.
  *
@@ -2263,7 +2238,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
 
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
-	if (may_ptrace_stop()) {
+	if (likely(current->ptrace)) {
 		/*
 		 * Notify parents of the stop.
 		 *
author	Eric W. Biederman <ebiederm@xmission.com>	2021-09-01 19:33:50 +0300
committer	Eric W. Biederman <ebiederm@xmission.com>	2021-10-06 19:28:39 +0300
commit	92307383082daff5df884a25df9e283efb7ef261 (patch)
tree	c73d7be83738e56a3ae161c640562563b867549e /kernel
parent	d67e03e361619b20c51aaef3b7dd1497617c371d (diff)