exec: make de_thread() killable
authorOleg Nesterov <oleg@redhat.com>
Mon, 8 Oct 2012 17:13:01 +0000 (19:13 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Oct 2012 21:53:20 +0000 (06:53 +0900)
Change de_thread() to use KILLABLE rather than UNINTERRUPTIBLE while
waiting for other threads.  The only complication is that we should
clear ->group_exit_task and ->notify_count before we return, and we
should do this under tasklist_lock.  -EAGAIN is used to match the
initial signal_group_exit() check/return, it doesn't really matter.

This fixes the (unlikely) race with coredump.  de_thread() checks
signal_group_exit() before it starts to kill the subthreads, but this
can't help if another CLONE_VM (but non CLONE_THREAD) task starts the
coredumping after de_thread() unlocks ->siglock.  In this case the
killed sub-thread can block in exit_mm() waiting for coredump_finish(),
execing thread waits for that sub-thead, and the coredumping thread
waits for execing thread.  Deadlock.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/exec.c

index 9824473a7ec1688737c2150ca8bd246587e7250b..19f4fb80cd17bf0a8e889e39608a866f5591e829 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -878,9 +878,11 @@ static int de_thread(struct task_struct *tsk)
                sig->notify_count--;
 
        while (sig->notify_count) {
-               __set_current_state(TASK_UNINTERRUPTIBLE);
+               __set_current_state(TASK_KILLABLE);
                spin_unlock_irq(lock);
                schedule();
+               if (unlikely(__fatal_signal_pending(tsk)))
+                       goto killed;
                spin_lock_irq(lock);
        }
        spin_unlock_irq(lock);
@@ -898,9 +900,11 @@ static int de_thread(struct task_struct *tsk)
                        write_lock_irq(&tasklist_lock);
                        if (likely(leader->exit_state))
                                break;
-                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       __set_current_state(TASK_KILLABLE);
                        write_unlock_irq(&tasklist_lock);
                        schedule();
+                       if (unlikely(__fatal_signal_pending(tsk)))
+                               goto killed;
                }
 
                /*
@@ -994,6 +998,14 @@ no_thread_group:
 
        BUG_ON(!thread_group_leader(tsk));
        return 0;
+
+killed:
+       /* protects against exit_notify() and __exit_signal() */
+       read_lock(&tasklist_lock);
+       sig->group_exit_task = NULL;
+       sig->notify_count = 0;
+       read_unlock(&tasklist_lock);
+       return -EAGAIN;
 }
 
 char *get_task_comm(char *buf, struct task_struct *tsk)