From 99f895518368252ba862cc15ce4eb98ebbe1bec6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 26 Jun 2006 00:25:55 -0700 Subject: [PATCH] proc: don't lock task_structs indefinitely Every inode in /proc holds a reference to a struct task_struct. If a directory or file is opened and remains open after the the task exits this pinning continues. With 8K stacks on a 32bit machine the amount pinned per file descriptor is about 10K. Normally I would figure a reasonable per user process limit is about 100 processes. With 80 processes, with a 1000 file descriptors each I can trigger the 00M killer on a 32bit kernel, because I have pinned about 800MB of useless data. This patch replaces the struct task_struct pointer with a pointer to a struct task_ref which has a struct task_struct pointer. The so the pinning of dead tasks does not happen. The code now has to contend with the fact that the task may now exit at any time. Which is a little but not muh more complicated. With this change it takes about 1000 processes each opening up 1000 file descriptors before I can trigger the OOM killer. Much better. [mlp@google.com: task_mmu small fixes] Signed-off-by: Eric W. Biederman Cc: Trond Myklebust Cc: Paul Jackson Cc: Oleg Nesterov Cc: Albert Cahalan Signed-off-by: Prasanna Meda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b602f73..3e991c0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -2442,31 +2443,43 @@ void __cpuset_memory_pressure_bump(void) */ static int proc_cpuset_show(struct seq_file *m, void *v) { + struct task_ref *tref; struct task_struct *tsk; char *buf; - int retval = 0; + int retval; + retval = -ENOMEM; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) - return -ENOMEM; + goto out; + + retval = -ESRCH; + tref = m->private; + tsk = get_tref_task(tref); + if (!tsk) + goto out_free; - tsk = m->private; + retval = -EINVAL; mutex_lock(&manage_mutex); + retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); if (retval < 0) - goto out; + goto out_unlock; seq_puts(m, buf); seq_putc(m, '\n'); -out: +out_unlock: mutex_unlock(&manage_mutex); + put_task_struct(tsk); +out_free: kfree(buf); +out: return retval; } static int cpuset_open(struct inode *inode, struct file *file) { - struct task_struct *tsk = PROC_I(inode)->task; - return single_open(file, proc_cpuset_show, tsk); + struct task_ref *tref = PROC_I(inode)->tref; + return single_open(file, proc_cpuset_show, tref); } struct file_operations proc_cpuset_operations = { -- cgit v1.1