diff options
author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2009-09-21 17:03:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-22 07:17:39 -0700 |
commit | 495789a51a91cb8c015d8d77fecbac1caf20b186 (patch) | |
tree | ac2a71ed40ed84f5673326aa6bf7f278b54d989a | |
parent | 28b83c5193e7ab951e402252278f2cc79dc4d298 (diff) | |
download | op-kernel-dev-495789a51a91cb8c015d8d77fecbac1caf20b186.zip op-kernel-dev-495789a51a91cb8c015d8d77fecbac1caf20b186.tar.gz |
oom: make oom_score to per-process value
oom-killer kills a process, not task. Then oom_score should be calculated
as per-process too. it makes consistency more and makes speed up
select_bad_process().
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/filesystems/proc.txt | 2 | ||||
-rw-r--r-- | fs/proc/base.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 35 |
3 files changed, 31 insertions, 8 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ae7f8bb..75988ba 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1205,7 +1205,7 @@ The following heuristics are then applied: * if the task was reniced, its score doubles * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE or CAP_SYS_RAWIO) have their score divided by 4 - * if oom condition happened in one cpuset and checked task does not belong + * if oom condition happened in one cpuset and checked process does not belong to it, its score is divided by 8 * the resulting score is multiplied by two to the power of oom_adj, i.e. points <<= oom_adj when it is positive and diff --git a/fs/proc/base.c b/fs/proc/base.c index 81cfff8..71a3425 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer) do_posix_clock_monotonic_gettime(&uptime); read_lock(&tasklist_lock); - points = badness(task, uptime.tv_sec); + points = badness(task->group_leader, uptime.tv_sec); read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 630b77f..3726922 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -34,6 +34,23 @@ int sysctl_oom_dump_tasks; static DEFINE_SPINLOCK(zone_scan_lock); /* #define DEBUG */ +/* + * Is all threads of the target process nodes overlap ours? + */ +static int has_intersects_mems_allowed(struct task_struct *tsk) +{ + struct task_struct *t; + + t = tsk; + do { + if (cpuset_mems_allowed_intersects(current, t)) + return 1; + t = next_thread(t); + } while (t != tsk); + + return 0; +} + /** * badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate @@ -59,6 +76,9 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) struct mm_struct *mm; struct task_struct *child; int oom_adj = p->signal->oom_adj; + struct task_cputime task_time; + unsigned long utime; + unsigned long stime; if (oom_adj == OOM_DISABLE) return 0; @@ -106,8 +126,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * of seconds. There is no particular reason for this other than * that it turned out to work very well in practice. */ - cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime)) - >> (SHIFT_HZ + 3); + thread_group_cputime(p, &task_time); + utime = cputime_to_jiffies(task_time.utime); + stime = cputime_to_jiffies(task_time.stime); + cpu_time = (utime + stime) >> (SHIFT_HZ + 3); + if (uptime >= p->start_time.tv_sec) run_time = (uptime - p->start_time.tv_sec) >> 10; @@ -148,7 +171,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * because p may have allocated or otherwise mapped memory on * this node before. However it will be less likely. */ - if (!cpuset_mems_allowed_intersects(current, p)) + if (!has_intersects_mems_allowed(p)) points /= 8; /* @@ -204,13 +227,13 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, static struct task_struct *select_bad_process(unsigned long *ppoints, struct mem_cgroup *mem) { - struct task_struct *g, *p; + struct task_struct *p; struct task_struct *chosen = NULL; struct timespec uptime; *ppoints = 0; do_posix_clock_monotonic_gettime(&uptime); - do_each_thread(g, p) { + for_each_process(p) { unsigned long points; /* @@ -263,7 +286,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, chosen = p; *ppoints = points; } - } while_each_thread(g, p); + } return chosen; } |