wait_task_stopped: simplify and fix races with SIGCONT/SIGKILL/untrace

wait_task_stopped() has multiple races with SIGCONT/SIGKILL. tasklist_lock does not pin the child in TASK_TRACED/TASK_STOPPED stated, almost all info reported (including exit_code) may be wrong. In fact, the code under write_lock_irq(tasklist_lock) is not safe. The child may be PTRACE_DETACH'ed at this time by another subthread, in that case it is possible we are no longer its ->parent. Change wait_task_stopped() to take ->siglock before inspecting the task. This guarantees that the child can't resume and (for example) clear its ->exit_code, so we don't need to use xchg(&p->exit_code) and re-check. The only exception is ptrace_stop() which changes ->state and ->exit_code without ->siglock held during abort. But this can only happen if both the tracer and the tracee are dying (coredump is in progress), we don't care. With this patch wait_task_stopped() doesn't move the child to the end of the ->parent list on success. This optimization could be restored, but in that case we have to take write_lock(tasklist) and do some nasty checks. Also change the do_wait() since we don't return EAGAIN any longer. [akpm@linux-foundation.org: fix up after Willy renamed everything] Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: Roland McGrath <roland@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Oleg Nesterov <oleg@tv-sign.ru> 2008-02-08 04:19:01 -0800
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-02-08 09:22:26 -0800
commit: ee7c82da830ea860b1f9274f1f0cdf99f206e7c2 (patch)
tree: 449477479b25ec73a1c2242723b3fb1c09cd936d
parent: 6405f7f4675884b671bee66678e1c2859bdb0e56 (diff)
download: op-kernel-dev-ee7c82da830ea860b1f9274f1f0cdf99f206e7c2.zip
op-kernel-dev-ee7c82da830ea860b1f9274f1f0cdf99f206e7c2.tar.gz
1 files changed, 26 insertions, 62 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index b5ff2b1..da293ac 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1355,17 +1355,35 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
 			     int noreap, struct siginfo __user *infop,
 			     int __user *stat_addr, struct rusage __user *ru)
 {
-	int retval, exit_code;
+	int retval, exit_code, why;
+	uid_t uid = 0; /* unneeded, required by compiler */
 	pid_t pid;
 
-	if (!p->exit_code)
-		return 0;
+	exit_code = 0;
+	spin_lock_irq(&p->sighand->siglock);
+
+	if (unlikely(!task_is_stopped_or_traced(p)))
+		goto unlock_sig;
+
 	if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
 	    p->signal->group_stop_count > 0)
 		/*
 		 * A group stop is in progress and this is the group leader.
 		 * We won't report until all threads have stopped.
 		 */
+		goto unlock_sig;
+
+	exit_code = p->exit_code;
+	if (!exit_code)
+		goto unlock_sig;
+
+	if (!noreap)
+		p->exit_code = 0;
+
+	uid = p->uid;
+unlock_sig:
+	spin_unlock_irq(&p->sighand->siglock);
+	if (!exit_code)
 		return 0;
 
 	/*
@@ -1375,65 +1393,15 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
 	 * keep holding onto the tasklist_lock while we call getrusage and
 	 * possibly take page faults for user memory.
 	 */
-	pid = task_pid_nr_ns(p, current->nsproxy->pid_ns);
 	get_task_struct(p);
+	pid = task_pid_nr_ns(p, current->nsproxy->pid_ns);
+	why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
 	read_unlock(&tasklist_lock);
 
-	if (unlikely(noreap)) {
-		uid_t uid = p->uid;
-		int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
-
-		exit_code = p->exit_code;
-		if (unlikely(!exit_code) || unlikely(p->exit_state))
-			goto bail_ref;
+	if (unlikely(noreap))
 		return wait_noreap_copyout(p, pid, uid,
 					   why, exit_code,
 					   infop, ru);
-	}
-
-	write_lock_irq(&tasklist_lock);
-
-	/*
-	 * This uses xchg to be atomic with the thread resuming and setting
-	 * it.  It must also be done with the write lock held to prevent a
-	 * race with the EXIT_ZOMBIE case.
-	 */
-	exit_code = xchg(&p->exit_code, 0);
-	if (unlikely(p->exit_state)) {
-		/*
-		 * The task resumed and then died.  Let the next iteration
-		 * catch it in EXIT_ZOMBIE.  Note that exit_code might
-		 * already be zero here if it resumed and did _exit(0).
-		 * The task itself is dead and won't touch exit_code again;
-		 * other processors in this function are locked out.
-		 */
-		p->exit_code = exit_code;
-		exit_code = 0;
-	}
-	if (unlikely(exit_code == 0)) {
-		/*
-		 * Another thread in this function got to it first, or it
-		 * resumed, or it resumed and then died.
-		 */
-		write_unlock_irq(&tasklist_lock);
-bail_ref:
-		put_task_struct(p);
-		/*
-		 * We are returning to the wait loop without having successfully
-		 * removed the process and having released the lock. We cannot
-		 * continue, since the "p" task pointer is potentially stale.
-		 *
-		 * Return -EAGAIN, and do_wait() will restart the loop from the
-		 * beginning. Do _not_ re-acquire the lock.
-		 */
-		return -EAGAIN;
-	}
-
-	/* move to end of parent's list to avoid starvation */
-	remove_parent(p);
-	add_parent(p);
-
-	write_unlock_irq(&tasklist_lock);
 
 	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 	if (!retval && stat_addr)
@@ -1443,15 +1411,13 @@ bail_ref:
 	if (!retval && infop)
 		retval = put_user(0, &infop->si_errno);
 	if (!retval && infop)
-		retval = put_user((short)((p->ptrace & PT_PTRACED)
-					  ? CLD_TRAPPED : CLD_STOPPED),
-				  &infop->si_code);
+		retval = put_user(why, &infop->si_code);
 	if (!retval && infop)
 		retval = put_user(exit_code, &infop->si_status);
 	if (!retval && infop)
 		retval = put_user(pid, &infop->si_pid);
 	if (!retval && infop)
-		retval = put_user(p->uid, &infop->si_uid);
+		retval = put_user(uid, &infop->si_uid);
 	if (!retval)
 		retval = pid;
 	put_task_struct(p);
@@ -1558,8 +1524,6 @@ repeat:
 				retval = wait_task_stopped(p, ret == 2,
 						(options & WNOWAIT), infop,
 						stat_addr, ru);
-				if (retval == -EAGAIN)
-					goto repeat;
 				if (retval != 0) /* He released the lock.  */
 					goto end;
 			} else if (p->exit_state == EXIT_ZOMBIE) {
author	Oleg Nesterov <oleg@tv-sign.ru>	2008-02-08 04:19:01 -0800
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-02-08 09:22:26 -0800
commit	ee7c82da830ea860b1f9274f1f0cdf99f206e7c2 (patch)
tree	449477479b25ec73a1c2242723b3fb1c09cd936d
parent	6405f7f4675884b671bee66678e1c2859bdb0e56 (diff)
download	op-kernel-dev-ee7c82da830ea860b1f9274f1f0cdf99f206e7c2.zip op-kernel-dev-ee7c82da830ea860b1f9274f1f0cdf99f206e7c2.tar.gz