diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2008-04-29 01:00:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-29 08:06:10 -0700 |
commit | cf475ad28ac35cc9ba612d67158f29b73b38b05d (patch) | |
tree | 2c7cd568d00357bd42643ea602884e731cc24f26 /include | |
parent | 29486df325e1fe6e1764afcb19e3370804c2b002 (diff) | |
download | op-kernel-dev-cf475ad28ac35cc9ba612d67158f29b73b38b05d.zip op-kernel-dev-cf475ad28ac35cc9ba612d67158f29b73b38b05d.tar.gz |
cgroups: add an owner to the mm_struct
Remove the mem_cgroup member from mm_struct and instead adds an owner.
This approach was suggested by Paul Menage. The advantage of this approach
is that, once the mm->owner is known, using the subsystem id, the cgroup
can be determined. It also allows several control groups that are
virtually grouped by mm_struct, to exist independent of the memory
controller i.e., without adding mem_cgroup's for each controller, to
mm_struct.
A new config option CONFIG_MM_OWNER is added and the memory resource
controller selects this config option.
This patch also adds cgroup callbacks to notify subsystems when mm->owner
changes. The mm_cgroup_changed callback is called with the task_lock() of
the new task held and is called just prior to changing the mm->owner.
I am indebted to Paul Menage for the several reviews of this patchset and
helping me make it lighter and simpler.
This patch was tested on a powerpc box, it was compiled with both the
MM_OWNER config turned on and off.
After the thread group leader exits, it's moved to init_css_state by
cgroup_exit(), thus all future charges from runnings threads would be
redirected to the init_css_set's subsystem.
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: David Rientjes <rientjes@google.com>,
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/cgroup.h | 15 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 16 | ||||
-rw-r--r-- | include/linux/mm_types.h | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 13 |
4 files changed, 34 insertions, 15 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0952480..e155aa7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -305,6 +305,12 @@ struct cgroup_subsys { struct cgroup *cgrp); void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); + /* + * This routine is called with the task_lock of mm->owner held + */ + void (*mm_owner_changed)(struct cgroup_subsys *ss, + struct cgroup *old, + struct cgroup *new); int subsys_id; int active; int disabled; @@ -390,4 +396,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats, #endif /* !CONFIG_CGROUPS */ +#ifdef CONFIG_MM_OWNER +extern void +cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new); +#else /* !CONFIG_MM_OWNER */ +static inline void +cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) +{ +} +#endif /* CONFIG_MM_OWNER */ #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8b1c429..e660877 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,9 +27,6 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); -extern void mm_free_cgroup(struct mm_struct *mm); - #define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) extern struct page_cgroup *page_get_page_cgroup(struct page *page); @@ -48,8 +45,10 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); +extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); + #define mm_match_cgroup(mm, cgroup) \ - ((cgroup) == rcu_dereference((mm)->mem_cgroup)) + ((cgroup) == mem_cgroup_from_task((mm)->owner)) extern int mem_cgroup_prepare_migration(struct page *page); extern void mem_cgroup_end_migration(struct page *page); @@ -73,15 +72,6 @@ extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct zone *zone, int priority); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline void mm_init_cgroup(struct mm_struct *mm, - struct task_struct *p) -{ -} - -static inline void mm_free_cgroup(struct mm_struct *mm) -{ -} - static inline void page_reset_bad_cgroup(struct page *page) { } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e2bae8d..bc97bd5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -225,8 +225,9 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; /* aio lock */ struct kioctx *ioctx_list; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR - struct mem_cgroup *mem_cgroup; +#ifdef CONFIG_MM_OWNER + struct task_struct *owner; /* The thread group leader that */ + /* owns the mm_struct. */ #endif }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 024d72b..1d02bab 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2148,6 +2148,19 @@ static inline void migration_init(void) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +#ifdef CONFIG_MM_OWNER +extern void mm_update_next_owner(struct mm_struct *mm); +extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); +#else +static inline void mm_update_next_owner(struct mm_struct *mm) +{ +} + +static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) +{ +} +#endif /* CONFIG_MM_OWNER */ + #endif /* __KERNEL__ */ #endif |