diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-01 10:25:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-01 10:25:54 -0700 |
commit | 94095a1fff89dffe9451839deae4c6a40cf3ec21 (patch) | |
tree | c1beac5dc336d836c269253db3b00a302777598f | |
parent | 620e77533f29796df7aff861e79bd72e08554ebb (diff) | |
parent | f784e8a7989c0da3062d04bfea3db90f41e8f738 (diff) | |
download | lwn-94095a1fff89dffe9451839deae4c6a40cf3ec21.tar.gz lwn-94095a1fff89dffe9451839deae4c6a40cf3ec21.zip |
Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core kernel fixes from Ingo Molnar:
"This is a complex task_work series from Oleg that fixes the bug that
this VFS commit tried to fix:
d35abdb28824 hold task_lock around checks in keyctl
but solves the problem without the lockup regression that d35abdb28824
introduced in v3.6.
This series came late in v3.6 and I did not feel confident about it so
late in the cycle. Might be worth backporting to -stable if it proves
itself upstream."
* 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
task_work: Simplify the usage in ptrace_notify() and get_signal_to_deliver()
task_work: Revert "hold task_lock around checks in keyctl"
task_work: task_work_add() should not succeed after exit_task_work()
task_work: Make task_work_add() lockless
-rw-r--r-- | include/linux/task_work.h | 3 | ||||
-rw-r--r-- | kernel/signal.c | 18 | ||||
-rw-r--r-- | kernel/task_work.c | 111 | ||||
-rw-r--r-- | security/keys/keyctl.c | 2 |
4 files changed, 66 insertions, 68 deletions
diff --git a/include/linux/task_work.h b/include/linux/task_work.h index fb46b03b1852..ca5a1cf27dae 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -18,8 +18,7 @@ void task_work_run(void); static inline void exit_task_work(struct task_struct *task) { - if (unlikely(task->task_works)) - task_work_run(); + task_work_run(); } #endif /* _LINUX_TASK_WORK_H */ diff --git a/kernel/signal.c b/kernel/signal.c index be4f856d52f8..2c681f11b7d2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1971,13 +1971,8 @@ static void ptrace_do_notify(int signr, int exit_code, int why) void ptrace_notify(int exit_code) { BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); - if (unlikely(current->task_works)) { - if (test_and_clear_ti_thread_flag(current_thread_info(), - TIF_NOTIFY_RESUME)) { - smp_mb__after_clear_bit(); - task_work_run(); - } - } + if (unlikely(current->task_works)) + task_work_run(); spin_lock_irq(¤t->sighand->siglock); ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); @@ -2198,13 +2193,8 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct signal_struct *signal = current->signal; int signr; - if (unlikely(current->task_works)) { - if (test_and_clear_ti_thread_flag(current_thread_info(), - TIF_NOTIFY_RESUME)) { - smp_mb__after_clear_bit(); - task_work_run(); - } - } + if (unlikely(current->task_works)) + task_work_run(); if (unlikely(uprobe_deny_signal())) return 0; diff --git a/kernel/task_work.c b/kernel/task_work.c index d320d44903bd..65bd3c92d6f3 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -2,26 +2,20 @@ #include <linux/task_work.h> #include <linux/tracehook.h> +static struct callback_head work_exited; /* all we need is ->next == NULL */ + int -task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) +task_work_add(struct task_struct *task, struct callback_head *work, bool notify) { - struct callback_head *last, *first; - unsigned long flags; + struct callback_head *head; - /* - * Not inserting the new work if the task has already passed - * exit_task_work() is the responisbility of callers. - */ - raw_spin_lock_irqsave(&task->pi_lock, flags); - last = task->task_works; - first = last ? last->next : twork; - twork->next = first; - if (last) - last->next = twork; - task->task_works = twork; - raw_spin_unlock_irqrestore(&task->pi_lock, flags); + do { + head = ACCESS_ONCE(task->task_works); + if (unlikely(head == &work_exited)) + return -ESRCH; + work->next = head; + } while (cmpxchg(&task->task_works, head, work) != head); - /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ if (notify) set_notify_resume(task); return 0; @@ -30,52 +24,69 @@ task_work_add(struct task_struct *task, struct callback_head *twork, bool notify struct callback_head * task_work_cancel(struct task_struct *task, task_work_func_t func) { + struct callback_head **pprev = &task->task_works; + struct callback_head *work = NULL; unsigned long flags; - struct callback_head *last, *res = NULL; - + /* + * If cmpxchg() fails we continue without updating pprev. + * Either we raced with task_work_add() which added the + * new entry before this work, we will find it again. Or + * we raced with task_work_run(), *pprev == NULL/exited. + */ raw_spin_lock_irqsave(&task->pi_lock, flags); - last = task->task_works; - if (last) { - struct callback_head *q = last, *p = q->next; - while (1) { - if (p->func == func) { - q->next = p->next; - if (p == last) - task->task_works = q == p ? NULL : q; - res = p; - break; - } - if (p == last) - break; - q = p; - p = q->next; - } + while ((work = ACCESS_ONCE(*pprev))) { + read_barrier_depends(); + if (work->func != func) + pprev = &work->next; + else if (cmpxchg(pprev, work, work->next) == work) + break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); - return res; + + return work; } void task_work_run(void) { struct task_struct *task = current; - struct callback_head *p, *q; + struct callback_head *work, *head, *next; + + for (;;) { + /* + * work->func() can do task_work_add(), do not set + * work_exited unless the list is empty. + */ + do { + work = ACCESS_ONCE(task->task_works); + head = !work && (task->flags & PF_EXITING) ? + &work_exited : NULL; + } while (cmpxchg(&task->task_works, work, head) != work); - while (1) { - raw_spin_lock_irq(&task->pi_lock); - p = task->task_works; - task->task_works = NULL; - raw_spin_unlock_irq(&task->pi_lock); + if (!work) + break; + /* + * Synchronize with task_work_cancel(). It can't remove + * the first entry == work, cmpxchg(task_works) should + * fail, but it can play with *work and other entries. + */ + raw_spin_unlock_wait(&task->pi_lock); + smp_mb(); - if (unlikely(!p)) - return; + /* Reverse the list to run the works in fifo order */ + head = NULL; + do { + next = work->next; + work->next = head; + head = work; + work = next; + } while (work); - q = p->next; /* head */ - p->next = NULL; /* cut it */ - while (q) { - p = q->next; - q->func(q); - q = p; + work = head; + do { + next = work->next; + work->func(work); + work = next; cond_resched(); - } + } while (work); } } diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3364fbf46807..6cfc6478863e 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1486,7 +1486,6 @@ long keyctl_session_to_parent(void) oldwork = NULL; parent = me->real_parent; - task_lock(parent); /* the parent mustn't be init and mustn't be a kernel thread */ if (parent->pid <= 1 || !parent->mm) goto unlock; @@ -1530,7 +1529,6 @@ long keyctl_session_to_parent(void) if (!ret) newwork = NULL; unlock: - task_unlock(parent); write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) |