diff options
author | Oleg Nesterov <oleg@redhat.com> | 2012-05-11 10:59:07 +1000 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2012-05-23 22:09:21 -0400 |
commit | e73f8959af0439d114847eab5a8a5ce48f1217c4 (patch) | |
tree | 47f056093590a5e5552e3a75f163e1f798063bda /kernel | |
parent | 62366c88b29c5a32e1531142092f98eaf49b1103 (diff) | |
download | op-kernel-dev-e73f8959af0439d114847eab5a8a5ce48f1217c4.zip op-kernel-dev-e73f8959af0439d114847eab5a8a5ce48f1217c4.tar.gz |
task_work_add: generic process-context callbacks
Provide a simple mechanism that allows running code in the (nonatomic)
context of the arbitrary task.
The caller does task_work_add(task, task_work) and this task executes
task_work->func() either from do_notify_resume() or from do_exit(). The
callback can rely on PF_EXITING to detect the latter case.
"struct task_work" can be embedded in another struct, still it has "void
*data" to handle the most common/simple case.
This allows us to kill the ->replacement_session_keyring hack, and
potentially this can have more users.
Performance-wise, this adds 2 "unlikely(!hlist_empty())" checks into
tracehook_notify_resume() and do_exit(). But at the same time we can
remove the "replacement_session_keyring != NULL" checks from
arch/*/signal.c and exit_creds().
Note: task_work_add/task_work_run abuses ->pi_lock. This is only because
this lock is already used by lookup_pi_state() to synchronize with
do_exit() setting PF_EXITING. Fortunately the scope of this lock in
task_work.c is really tiny, and the code is unlikely anyway.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/task_work.c | 84 |
4 files changed, 90 insertions, 2 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30..bf10340 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,7 +5,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o workqueue.o pid.o \ + signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ diff --git a/kernel/exit.c b/kernel/exit.c index 910a071..3d93325 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -946,11 +946,14 @@ void do_exit(long code) exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against - * an exiting task cleaning up the robust pi futexes. + * an exiting task cleaning up the robust pi futexes, and in + * task_work_add() to avoid the race with exit_task_work(). */ smp_mb(); raw_spin_unlock_wait(&tsk->pi_lock); + exit_task_work(tsk); + exit_irq_thread(); if (unlikely(in_atomic())) diff --git a/kernel/fork.c b/kernel/fork.c index 05c813d..a46db21 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1411,6 +1411,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); + INIT_HLIST_HEAD(&p->task_works); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible diff --git a/kernel/task_work.c b/kernel/task_work.c new file mode 100644 index 0000000..82d1c79 --- /dev/null +++ b/kernel/task_work.c @@ -0,0 +1,84 @@ +#include <linux/spinlock.h> +#include <linux/task_work.h> +#include <linux/tracehook.h> + +int +task_work_add(struct task_struct *task, struct task_work *twork, bool notify) +{ + unsigned long flags; + int err = -ESRCH; + +#ifndef TIF_NOTIFY_RESUME + if (notify) + return -ENOTSUPP; +#endif + /* + * We must not insert the new work if the task has already passed + * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait() + * and check PF_EXITING under pi_lock. + */ + raw_spin_lock_irqsave(&task->pi_lock, flags); + if (likely(!(task->flags & PF_EXITING))) { + hlist_add_head(&twork->hlist, &task->task_works); + err = 0; + } + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ + if (likely(!err) && notify) + set_notify_resume(task); + return err; +} + +struct task_work * +task_work_cancel(struct task_struct *task, task_work_func_t func) +{ + unsigned long flags; + struct task_work *twork; + struct hlist_node *pos; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + hlist_for_each_entry(twork, pos, &task->task_works, hlist) { + if (twork->func == func) { + hlist_del(&twork->hlist); + goto found; + } + } + twork = NULL; + found: + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + return twork; +} + +void task_work_run(void) +{ + struct task_struct *task = current; + struct hlist_head task_works; + struct hlist_node *pos; + + raw_spin_lock_irq(&task->pi_lock); + hlist_move_list(&task->task_works, &task_works); + raw_spin_unlock_irq(&task->pi_lock); + + if (unlikely(hlist_empty(&task_works))) + return; + /* + * We use hlist to save the space in task_struct, but we want fifo. + * Find the last entry, the list should be short, then process them + * in reverse order. + */ + for (pos = task_works.first; pos->next; pos = pos->next) + ; + + for (;;) { + struct hlist_node **pprev = pos->pprev; + struct task_work *twork = container_of(pos, struct task_work, + hlist); + twork->func(twork); + + if (pprev == &task_works.first) + break; + pos = container_of(pprev, struct hlist_node, next); + } +} |