From 073219e995b4a3f8cf1ce8228b7ef440b6994ac0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 8 Feb 2014 10:36:58 -0500 Subject: cgroup: clean up cgroup_subsys names and initialization cgroup_subsys is a bit messier than it needs to be. * The name of a subsys can be different from its internal identifier defined in cgroup_subsys.h. Most subsystems use the matching name but three - cpu, memory and perf_event - use different ones. * cgroup_subsys_id enums are postfixed with _subsys_id and each cgroup_subsys is postfixed with _subsys. cgroup.h is widely included throughout various subsystems, it doesn't and shouldn't have claim on such generic names which don't have any qualifier indicating that they belong to cgroup. * cgroup_subsys->subsys_id should always equal the matching cgroup_subsys_id enum; however, we require each controller to initialize it and then BUG if they don't match, which is a bit silly. This patch cleans up cgroup_subsys names and initialization by doing the followings. * cgroup_subsys_id enums are now postfixed with _cgrp_id, and each cgroup_subsys with _cgrp_subsys. * With the above, renaming subsys identifiers to match the userland visible names doesn't cause any naming conflicts. All non-matching identifiers are renamed to match the official names. cpu_cgroup -> cpu mem_cgroup -> memory perf -> perf_event * controllers no longer need to initialize ->subsys_id and ->name. They're generated in cgroup core and set automatically during boot. * Redundant cgroup_subsys declarations removed. * While updating BUG_ON()s in cgroup_init_early(), convert them to WARN()s. BUGging that early during boot is stupid - the kernel can't print anything, even through serial console and the trap handler doesn't even link stack frame properly for back-tracing. This patch doesn't introduce any behavior changes. v2: Rebased on top of fe1217c4f3f7 ("net: net_cls: move cgroupfs classid handling into core"). Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: "David S. Miller" Acked-by: "Rafael J. Wysocki" Acked-by: Michal Hocko Acked-by: Peter Zijlstra Acked-by: Aristeu Rozanski Acked-by: Ingo Molnar Acked-by: Li Zefan Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Serge E. Hallyn Cc: Vivek Goyal Cc: Thomas Graf --- mm/memcontrol.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 53385cd..04a97bc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -66,8 +66,8 @@ #include -struct cgroup_subsys mem_cgroup_subsys __read_mostly; -EXPORT_SYMBOL(mem_cgroup_subsys); +struct cgroup_subsys memory_cgrp_subsys __read_mostly; +EXPORT_SYMBOL(memory_cgrp_subsys); #define MEM_CGROUP_RECLAIM_RETRIES 5 static struct mem_cgroup *root_mem_cgroup __read_mostly; @@ -538,7 +538,7 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) { struct cgroup_subsys_state *css; - css = css_from_id(id - 1, &mem_cgroup_subsys); + css = css_from_id(id - 1, &memory_cgrp_subsys); return mem_cgroup_from_css(css); } @@ -1072,7 +1072,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) if (unlikely(!p)) return NULL; - return mem_cgroup_from_css(task_css(p, mem_cgroup_subsys_id)); + return mem_cgroup_from_css(task_css(p, memory_cgrp_id)); } struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) @@ -1702,7 +1702,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) rcu_read_lock(); mem_cgrp = memcg->css.cgroup; - task_cgrp = task_cgroup(p, mem_cgroup_subsys_id); + task_cgrp = task_cgroup(p, memory_cgrp_id); ret = cgroup_path(task_cgrp, memcg_name, PATH_MAX); if (ret < 0) { @@ -6187,7 +6187,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, ret = -EINVAL; cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, - &mem_cgroup_subsys); + &memory_cgrp_subsys); if (cfile_css == css && css_tryget(css)) ret = 0; @@ -6566,11 +6566,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) * unfortunate state in our controller. */ if (parent != root_mem_cgroup) - mem_cgroup_subsys.broken_hierarchy = true; + memory_cgrp_subsys.broken_hierarchy = true; } mutex_unlock(&memcg_create_mutex); - return memcg_init_kmem(memcg, &mem_cgroup_subsys); + return memcg_init_kmem(memcg, &memory_cgrp_subsys); } /* @@ -7264,9 +7264,7 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) mem_cgroup_from_css(root_css)->use_hierarchy = true; } -struct cgroup_subsys mem_cgroup_subsys = { - .name = "memory", - .subsys_id = mem_cgroup_subsys_id, +struct cgroup_subsys memory_cgrp_subsys = { .css_alloc = mem_cgroup_css_alloc, .css_online = mem_cgroup_css_online, .css_offline = mem_cgroup_css_offline, @@ -7292,7 +7290,7 @@ __setup("swapaccount=", enable_swap_account); static void __init memsw_file_init(void) { - WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, memsw_cgroup_files)); + WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, memsw_cgroup_files)); } static void __init enable_swap_cgroup(void) -- cgit v1.1 From 5a17f543ed6808e9085063277fe46795dea484bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 11 Feb 2014 11:52:47 -0500 Subject: cgroup: improve css_from_dir() into css_tryget_from_dir() css_from_dir() returns the matching css (cgroup_subsys_state) given a dentry and subsystem. The function doesn't pin the css before returning and requires the caller to be holding RCU read lock or cgroup_mutex and handling pinning on the caller side. Given that users of the function are likely to want to pin the returned css (both existing users do) and that getting and putting css's are very cheap, there's no reason for the interface to be tricky like this. Rename css_from_dir() to css_tryget_from_dir() and make it try to pin the found css and return it only if pinning succeeded. The callers are updated so that they no longer do RCU locking and pinning around the function and just use the returned css. This will also ease converting cgroup to kernfs. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Acked-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki --- mm/memcontrol.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 04a97bc..102ab48 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6183,17 +6183,15 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - rcu_read_lock(); - + cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent, + &memory_cgrp_subsys); ret = -EINVAL; - cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, - &memory_cgrp_subsys); - if (cfile_css == css && css_tryget(css)) - ret = 0; - - rcu_read_unlock(); - if (ret) + if (IS_ERR(cfile_css)) goto out_put_cfile; + if (cfile_css != css) { + css_put(cfile_css); + goto out_put_cfile; + } ret = event->register_event(memcg, event->eventfd, buffer); if (ret) -- cgit v1.1 From e61734c55c24cdf11b07e52a74aec4dc4a7f4bd0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Feb 2014 09:29:50 -0500 Subject: cgroup: remove cgroup->name cgroup->name handling became quite complicated over time involving dedicated struct cgroup_name for RCU protection. Now that cgroup is on kernfs, we can drop all of it and simply use kernfs_name/path() and friends. Replace cgroup->name and all related code with kernfs name/path constructs. * Reimplement cgroup_name() and cgroup_path() as thin wrappers on top of kernfs counterparts, which involves semantic changes. pr_cont_cgroup_name() and pr_cont_cgroup_path() added. * cgroup->name handling dropped from cgroup_rename(). * All users of cgroup_name/path() updated to the new semantics. Users which were formatting the string just to printk them are converted to use pr_cont_cgroup_name/path() instead, which simplifies things quite a bit. As cgroup_name() no longer requires RCU read lock around it, RCU lockings which were protecting only cgroup_name() are removed. v2: Comment above oom_info_lock updated as suggested by Michal. v3: dummy_top doesn't have a kn associated and pr_cont_cgroup_name/path() ended up calling the matching kernfs functions with NULL kn leading to oops. Test for NULL kn and print "/" if so. This issue was reported by Fengguang Wu. v4: Rebased on top of 0ab02ca8f887 ("cgroup: protect modifications to cgroup_idr with cgroup_mutex"). Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Michal Hocko Acked-by: Li Zefan Cc: Fengguang Wu Cc: Ingo Molnar Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki --- mm/memcontrol.c | 68 +++++++++++++++------------------------------------------ 1 file changed, 18 insertions(+), 50 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 102ab48..c1c2549 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1683,15 +1683,8 @@ static void move_unlock_mem_cgroup(struct mem_cgroup *memcg, */ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { - /* - * protects memcg_name and makes sure that parallel ooms do not - * interleave - */ + /* oom_info_lock ensures that parallel ooms do not interleave */ static DEFINE_SPINLOCK(oom_info_lock); - struct cgroup *task_cgrp; - struct cgroup *mem_cgrp; - static char memcg_name[PATH_MAX]; - int ret; struct mem_cgroup *iter; unsigned int i; @@ -1701,36 +1694,14 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) spin_lock(&oom_info_lock); rcu_read_lock(); - mem_cgrp = memcg->css.cgroup; - task_cgrp = task_cgroup(p, memory_cgrp_id); + pr_info("Task in "); + pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); + pr_info(" killed as a result of limit of "); + pr_cont_cgroup_path(memcg->css.cgroup); + pr_info("\n"); - ret = cgroup_path(task_cgrp, memcg_name, PATH_MAX); - if (ret < 0) { - /* - * Unfortunately, we are unable to convert to a useful name - * But we'll still print out the usage information - */ - rcu_read_unlock(); - goto done; - } rcu_read_unlock(); - pr_info("Task in %s killed", memcg_name); - - rcu_read_lock(); - ret = cgroup_path(mem_cgrp, memcg_name, PATH_MAX); - if (ret < 0) { - rcu_read_unlock(); - goto done; - } - rcu_read_unlock(); - - /* - * Continues from above, so we don't need an KERN_ level - */ - pr_cont(" as a result of limit of %s\n", memcg_name); -done: - pr_info("memory: usage %llukB, limit %llukB, failcnt %llu\n", res_counter_read_u64(&memcg->res, RES_USAGE) >> 10, res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10, @@ -1745,13 +1716,8 @@ done: res_counter_read_u64(&memcg->kmem, RES_FAILCNT)); for_each_mem_cgroup_tree(iter, memcg) { - pr_info("Memory cgroup stats"); - - rcu_read_lock(); - ret = cgroup_path(iter->css.cgroup, memcg_name, PATH_MAX); - if (!ret) - pr_cont(" for %s", memcg_name); - rcu_read_unlock(); + pr_info("Memory cgroup stats for "); + pr_cont_cgroup_path(iter->css.cgroup); pr_cont(":"); for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { @@ -3401,7 +3367,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, struct kmem_cache *s) { struct kmem_cache *new = NULL; - static char *tmp_name = NULL; + static char *tmp_path = NULL, *tmp_name = NULL; static DEFINE_MUTEX(mutex); /* protects tmp_name */ BUG_ON(!memcg_can_account_kmem(memcg)); @@ -3413,18 +3379,20 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, * This static temporary buffer is used to prevent from * pointless shortliving allocation. */ - if (!tmp_name) { - tmp_name = kmalloc(PATH_MAX, GFP_KERNEL); + if (!tmp_path || !tmp_name) { + if (!tmp_path) + tmp_path = kmalloc(PATH_MAX, GFP_KERNEL); if (!tmp_name) + tmp_name = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!tmp_path || !tmp_name) goto out; } - rcu_read_lock(); - snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name, - memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup)); - rcu_read_unlock(); + cgroup_name(memcg->css.cgroup, tmp_name, NAME_MAX + 1); + snprintf(tmp_path, PATH_MAX, "%s(%d:%s)", s->name, + memcg_cache_id(memcg), tmp_name); - new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align, + new = kmem_cache_create_memcg(memcg, tmp_path, s->object_size, s->align, (s->flags & ~SLAB_PANIC), s->ctor, s); if (new) new->allocflags |= __GFP_KMEMCG; -- cgit v1.1 From 07bc356ed2950048d33d667e933e1b913c6e6b6d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Feb 2014 06:58:39 -0500 Subject: cgroup: implement cgroup_has_tasks() and unexport cgroup_task_count() cgroup_task_count() read-locks css_set_lock and walks all tasks to count them and then returns the result. The only thing all the users want is determining whether the cgroup is empty or not. This patch implements cgroup_has_tasks() which tests whether cgroup->cset_links is empty, replaces all cgroup_task_count() usages and unexports it. Note that the test isn't synchronized. This is the same as before. The test has always been racy. This will help planned css_set locking update. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c1c2549..d9c6ac1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4958,7 +4958,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) struct cgroup *cgrp = memcg->css.cgroup; /* returns EBUSY if there is a task or if we come here twice. */ - if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) + if (cgroup_has_tasks(cgrp) || !list_empty(&cgrp->children)) return -EBUSY; /* we call try-to-free pages for make this cgroup empty */ @@ -5140,7 +5140,7 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg, * of course permitted. */ mutex_lock(&memcg_create_mutex); - if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg)) + if (cgroup_has_tasks(memcg->css.cgroup) || memcg_has_children(memcg)) err = -EBUSY; mutex_unlock(&memcg_create_mutex); if (err) -- cgit v1.1 From 4d3bb511b5f9980fc3e9ae5939ebc475b231d3fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Mar 2014 10:23:54 -0400 Subject: cgroup: drop const from @buffer of cftype->write_string() cftype->write_string() just passes on the writeable buffer from kernfs and there's no reason to add const restriction on the buffer. The only thing const achieves is unnecessarily complicating parsing of the buffer. Drop const from @buffer. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d9c6ac1..96f94a9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5242,7 +5242,7 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg, * RES_LIMIT. */ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer) + char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); enum res_type type; @@ -6063,7 +6063,7 @@ static void memcg_event_ptable_queue_proc(struct file *file, * Interpretation of args is defined by control file implementation. */ static int memcg_write_event_control(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buffer) + struct cftype *cft, char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_event *event; -- cgit v1.1