From 6f4b7e632d78c2d91502211c430722cc66428492 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 31 Jul 2013 16:18:36 +0800 Subject: cgroup: more naming cleanups Constantly use @cset for css_set variables and use @cgrp as cgroup variables. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 297462b..00a7e07 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -394,8 +394,8 @@ struct cgroup_map_cb { /* cftype->flags */ enum { - CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cg */ - CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cg */ + CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ + CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ }; @@ -513,7 +513,7 @@ struct cftype_set { }; struct cgroup_scanner { - struct cgroup *cg; + struct cgroup *cgrp; int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); void (*process_task)(struct task_struct *p, struct cgroup_scanner *scan); -- cgit v1.1 From 4e96ee8e981b5140a2bcc5fff0d5c0eef39a62ee Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 31 Jul 2013 09:50:50 +0800 Subject: cgroup: convert cgroup_ida to cgroup_idr This enables us to lookup a cgroup by its id. v4: - add a comment for idr_remove() in cgroup_offline_fn(). v3: - on success, idr_alloc() returns the id but not 0, so fix the BUG_ON() in cgroup_init(). - pass the right value to idr_alloc() so that the id for dummy cgroup is 0. Signed-off-by: Li Zefan Reviewed-by: Michal Hocko Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 00a7e07..cca570e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -161,7 +161,7 @@ struct cgroup_name { struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ - int id; /* ida allocated in-hierarchy ID */ + int id; /* idr allocated in-hierarchy ID */ /* * We link our 'sibling' struct into our parent's 'children'. @@ -322,7 +322,7 @@ struct cgroupfs_root { unsigned long flags; /* IDs for cgroups in this hierarchy */ - struct ida cgroup_ida; + struct idr cgroup_idr; /* The path to use for release notifications. */ char release_agent_path[PATH_MAX]; -- cgit v1.1 From b414dc09a31d41d696093a4cce9fb2853a5ecd4e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 31 Jul 2013 09:51:06 +0800 Subject: cgroup: document how cgroup IDs are assigned As cgroup id has been used in netprio cgroup and will be used in memcg, it's important to make it clear how a cgroup id is allocated. For example, in netprio cgroup, the id is used as index of anarray. Signed-off-by: Li Zefan Reviewed-by: Michal Hocko Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index cca570e..4dfcd0e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -161,7 +161,13 @@ struct cgroup_name { struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ - int id; /* idr allocated in-hierarchy ID */ + /* + * idr allocated in-hierarchy ID. + * + * The ID of the root cgroup is always 0, and a new cgroup + * will be assigned with a smallest available ID. + */ + int id; /* * We link our 'sibling' struct into our parent's 'children'. -- cgit v1.1 From e14880f7bb7e0dc0933af304998371dd543ceb40 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 31 Jul 2013 09:51:31 +0800 Subject: cgroup: implement cgroup_from_id() This will be used as a replacement for css_lookup(). There's a difference with cgroup id and css id. cgroup id starts with 0, while css id starts with 1. v4: - also check if cggroup_mutex is held. - make it an inline function. Signed-off-by: Li Zefan Reviewed-by: Michal Hocko Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4dfcd0e..bbf4d89 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -720,6 +720,24 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, return task_subsys_state(task, subsys_id)->cgroup; } +/** + * cgroup_from_id - lookup cgroup by id + * @ss: cgroup subsys to be looked into + * @id: the cgroup id + * + * Returns the cgroup if there's valid one with @id, otherwise returns NULL. + * Should be called under rcu_read_lock(). + */ +static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id) +{ +#ifdef CONFIG_PROVE_RCU + rcu_lockdep_assert(rcu_read_lock_held() || + lockdep_is_held(&cgroup_mutex), + "cgroup_from_id() needs proper protection"); +#endif + return idr_find(&ss->root->cgroup_idr, id); +} + struct cgroup *cgroup_next_sibling(struct cgroup *pos); /** -- cgit v1.1 From 8af01f56a03e9cbd91a55d688fce1315021efba8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:22 -0400 Subject: cgroup: s/cgroup_subsys_state/cgroup_css/ s/task_subsys_state/task_css/ The names of the two struct cgroup_subsys_state accessors - cgroup_subsys_state() and task_subsys_state() - are somewhat awkward. The former clashes with the type name and the latter doesn't even indicate it's somehow related to cgroup. We're about to revamp large portion of cgroup API, so, let's rename them so that they're less awkward. Most per-controller usages of the accessors are localized in accessor wrappers and given the amount of scheduled changes, this isn't gonna add any noticeable headache. Rename cgroup_subsys_state() to cgroup_css() and task_subsys_state() to task_css(). This patch is pure rename. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 31 +++++++++++++++++++------------ include/net/cls_cgroup.h | 4 ++-- include/net/netprio_cgroup.h | 4 ++-- 3 files changed, 23 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 44dd422..552c5fe 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -647,8 +647,15 @@ struct cgroup_subsys { #undef IS_SUBSYS_ENABLED #undef SUBSYS -static inline struct cgroup_subsys_state *cgroup_subsys_state( - struct cgroup *cgrp, int subsys_id) +/** + * cgroup_css - obtain a cgroup's css for the specified subsystem + * @cgrp: the cgroup of interest + * @subsys_id: the subsystem of interest + * + * Return @cgrp's css (cgroup_subsys_state) associated with @subsys_id. + */ +static inline struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, + int subsys_id) { return cgrp->subsys[subsys_id]; } @@ -678,7 +685,7 @@ extern struct mutex cgroup_mutex; #endif /** - * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds + * task_css_check - obtain css for (task, subsys) w/ extra access conds * @task: the target task * @subsys_id: the target subsystem ID * @__c: extra condition expression to be passed to rcu_dereference_check() @@ -686,7 +693,7 @@ extern struct mutex cgroup_mutex; * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The * synchronization rules are the same as task_css_set_check(). */ -#define task_subsys_state_check(task, subsys_id, __c) \ +#define task_css_check(task, subsys_id, __c) \ task_css_set_check((task), (__c))->subsys[(subsys_id)] /** @@ -701,22 +708,22 @@ static inline struct css_set *task_css_set(struct task_struct *task) } /** - * task_subsys_state - obtain css for (task, subsys) + * task_css - obtain css for (task, subsys) * @task: the target task * @subsys_id: the target subsystem ID * - * See task_subsys_state_check(). + * See task_css_check(). */ -static inline struct cgroup_subsys_state * -task_subsys_state(struct task_struct *task, int subsys_id) +static inline struct cgroup_subsys_state *task_css(struct task_struct *task, + int subsys_id) { - return task_subsys_state_check(task, subsys_id, false); + return task_css_check(task, subsys_id, false); } -static inline struct cgroup* task_cgroup(struct task_struct *task, - int subsys_id) +static inline struct cgroup *task_cgroup(struct task_struct *task, + int subsys_id) { - return task_subsys_state(task, subsys_id)->cgroup; + return task_css(task, subsys_id)->cgroup; } /** diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 0fee061..52adaa7 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -35,7 +35,7 @@ static inline u32 task_cls_classid(struct task_struct *p) return 0; rcu_read_lock(); - classid = container_of(task_subsys_state(p, net_cls_subsys_id), + classid = container_of(task_css(p, net_cls_subsys_id), struct cgroup_cls_state, css)->classid; rcu_read_unlock(); @@ -51,7 +51,7 @@ static inline u32 task_cls_classid(struct task_struct *p) return 0; rcu_read_lock(); - css = task_subsys_state(p, net_cls_subsys_id); + css = task_css(p, net_cls_subsys_id); if (css) classid = container_of(css, struct cgroup_cls_state, css)->classid; diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 50ab8c2..8110fa7 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -39,7 +39,7 @@ static inline u32 task_netprioidx(struct task_struct *p) u32 idx; rcu_read_lock(); - css = task_subsys_state(p, net_prio_subsys_id); + css = task_css(p, net_prio_subsys_id); idx = css->cgroup->id; rcu_read_unlock(); return idx; @@ -53,7 +53,7 @@ static inline u32 task_netprioidx(struct task_struct *p) u32 idx = 0; rcu_read_lock(); - css = task_subsys_state(p, net_prio_subsys_id); + css = task_css(p, net_prio_subsys_id); if (css) idx = css->cgroup->id; rcu_read_unlock(); -- cgit v1.1 From 6d37b97428d20a21ffc39ba90e97e91e2a79a986 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:22 -0400 Subject: netprio_cgroup: pass around @css instead of @cgroup and kill struct cgroup_netprio_state cgroup controller API will be converted to primarily use struct cgroup_subsys_state instead of struct cgroup. In preparation, make the internal functions of netprio_cgroup pass around @css instead of @cgrp. While at it, kill struct cgroup_netprio_state which only contained struct cgroup_subsys_state without serving any purpose. All functions are converted to deal with @css directly. This patch shouldn't cause any behavior differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Acked-by: David S. Miller --- include/net/netprio_cgroup.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 8110fa7..a24f8bb 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -25,10 +25,6 @@ struct netprio_map { u32 priomap[]; }; -struct cgroup_netprio_state { - struct cgroup_subsys_state css; -}; - extern void sock_update_netprioidx(struct sock *sk); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) -- cgit v1.1 From 72c97e54e0f043d33b246d7460ae0a36c4b8c643 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:22 -0400 Subject: cgroup: add subsystem pointer to cgroup_subsys_state Currently, given a cgroup_subsys_state, there's no way to find out which subsystem the css is for, which we'll need to convert the cgroup controller API to primarily use @css instead of @cgroup. This patch adds cgroup_subsys_state->ss which points to the subsystem the @css belongs to. While at it, remove the comment about accessing @css->cgroup to determine the hierarchy. cgroup core will provide API to traverse hierarchy of css'es and we don't want subsystems to directly walk cgroup hierarchies anymore. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 552c5fe..821678a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -66,13 +66,12 @@ enum cgroup_subsys_id { /* Per-subsystem/per-cgroup state maintained by the system. */ struct cgroup_subsys_state { - /* - * The cgroup that this subsystem is attached to. Useful - * for subsystems that want to know about the cgroup - * hierarchy structure - */ + /* the cgroup that this css is attached to */ struct cgroup *cgroup; + /* the cgroup subsystem that this css is attached to */ + struct cgroup_subsys *ss; + /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; -- cgit v1.1 From 6387698699afd72d6304566fb6ccf84bffe07c56 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:23 -0400 Subject: cgroup: add css_parent() Currently, controllers have to explicitly follow the cgroup hierarchy to find the parent of a given css. cgroup is moving towards using cgroup_subsys_state as the main controller interface construct, so let's provide a way to climb the hierarchy using just csses. This patch implements css_parent() which, given a css, returns its parent. The function is guarnateed to valid non-NULL parent css as long as the target css is not at the top of the hierarchy. freezer, cpuset, cpu, cpuacct, hugetlb, memory, net_cls and devices are converted to use css_parent() instead of accessing cgroup->parent directly. * __parent_ca() is dropped from cpuacct and its usage is replaced with parent_ca(). The only difference between the two was NULL test on cgroup->parent which is now embedded in css_parent() making the distinction moot. Note that eventually a css->parent field will be added to css and the NULL check in css_parent() will go away. This patch shouldn't cause any behavior differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 821678a..18112a3 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -647,6 +647,21 @@ struct cgroup_subsys { #undef SUBSYS /** + * css_parent - find the parent css + * @css: the target cgroup_subsys_state + * + * Return the parent css of @css. This function is guaranteed to return + * non-NULL parent as long as @css isn't the root. + */ +static inline +struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) +{ + struct cgroup *parent_cgrp = css->cgroup->parent; + + return parent_cgrp ? parent_cgrp->subsys[css->ss->subsys_id] : NULL; +} + +/** * cgroup_css - obtain a cgroup's css for the specified subsystem * @cgrp: the cgroup of interest * @subsys_id: the subsystem of interest -- cgit v1.1 From eb95419b023abacb415e2a18fea899023ce7624d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:23 -0400 Subject: cgroup: pass around cgroup_subsys_state instead of cgroup in subsystem methods cgroup is currently in the process of transitioning to using struct cgroup_subsys_state * as the primary handle instead of struct cgroup * in subsystem implementations for the following reasons. * With unified hierarchy, subsystems will be dynamically bound and unbound from cgroups and thus css's (cgroup_subsys_state) may be created and destroyed dynamically over the lifetime of a cgroup, which is different from the current state where all css's are allocated and destroyed together with the associated cgroup. This in turn means that cgroup_css() should be synchronized and may return NULL, making it more cumbersome to use. * Differing levels of per-subsystem granularity in the unified hierarchy means that the task and descendant iterators should behave differently depending on the specific subsystem the iteration is being performed for. * In majority of the cases, subsystems only care about its part in the cgroup hierarchy - ie. the hierarchy of css's. Subsystem methods often obtain the matching css pointer from the cgroup and don't bother with the cgroup pointer itself. Passing around css fits much better. This patch converts all cgroup_subsys methods to take @css instead of @cgroup. The conversions are mostly straight-forward. A few noteworthy changes are * ->css_alloc() now takes css of the parent cgroup rather than the pointer to the new cgroup as the css for the new cgroup doesn't exist yet. Knowing the parent css is enough for all the existing subsystems. * In kernel/cgroup.c::offline_css(), unnecessary open coded css dereference is replaced with local variable access. This patch shouldn't cause any behavior differences. v2: Unnecessary explicit cgrp->subsys[] deref in css_online() replaced with local variable @css as suggested by Li Zefan. Rebased on top of new for-3.12 which includes for-3.11-fixes so that ->css_free() invocation added by da0a12caff ("cgroup: fix a leak when percpu_ref_init() fails") is converted too. Suggested by Li Zefan. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Acked-by: Vivek Goyal Acked-by: Aristeu Rozanski Acked-by: Daniel Wagner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Johannes Weiner Cc: Balbir Singh Cc: Matt Helsley Cc: Jens Axboe Cc: Steven Rostedt --- include/linux/cgroup.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 18112a3..9c2b9dd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -579,18 +579,22 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); */ struct cgroup_subsys { - struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); - int (*css_online)(struct cgroup *cgrp); - void (*css_offline)(struct cgroup *cgrp); - void (*css_free)(struct cgroup *cgrp); - - int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); - void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); + struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); + int (*css_online)(struct cgroup_subsys_state *css); + void (*css_offline)(struct cgroup_subsys_state *css); + void (*css_free)(struct cgroup_subsys_state *css); + + int (*can_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); void (*fork)(struct task_struct *task); - void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, + void (*exit)(struct cgroup_subsys_state *css, + struct cgroup_subsys_state *old_css, struct task_struct *task); - void (*bind)(struct cgroup *root); + void (*bind)(struct cgroup_subsys_state *root_css); int subsys_id; int disabled; -- cgit v1.1 From 2bb566cb68dfafad328af666ebadf0e49accd6ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:23 -0400 Subject: cgroup: add subsys backlink pointer to cftype cgroup is transitioning to using css (cgroup_subsys_state) instead of cgroup as the primary subsystem handle. The cgroupfs file interface will be converted to use css's which requires finding out the subsystem from cftype so that the matching css can be determined from the cgroup. This patch adds cftype->ss which points to the subsystem the file belongs to. The field is initialized while a cftype is being registered. This makes it unnecessary to explicitly specify the subsystem for other cftype handling functions. @ss argument dropped from various cftype handling functions. This patch shouldn't introduce any behavior differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Vivek Goyal Cc: Jens Axboe --- include/linux/cgroup.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9c2b9dd..5db8138 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -429,6 +429,12 @@ struct cftype { /* CFTYPE_* flags */ unsigned int flags; + /* + * The subsys this file belongs to. Initialized automatically + * during registration. NULL for cgroup core files. + */ + struct cgroup_subsys *ss; + int (*open)(struct inode *inode, struct file *file); ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -542,7 +548,7 @@ static inline const char *cgroup_name(const struct cgroup *cgrp) } int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); -int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); -- cgit v1.1 From 67f4c36f83455b253445b2cb28ac9a2c4f85d99a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:24 -0400 Subject: cgroup: add cgroup->dummy_css cgroup subsystem API is being converted to use css (cgroup_subsys_state) as the main handle, which makes things a bit awkward for subsystem agnostic core features - the "cgroup.*" interface files and various iterations - a bit awkward as they don't have a css to use. This patch adds cgroup->dummy_css which has NULL ->ss and whose only role is pointing back to the cgroup. This will be used to support subsystem agnostic features on the coming css based API. css_parent() is updated to handle dummy_css's. Note that css will soon grow its own ->parent field and css_parent() will be made trivial. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5db8138..b0d5f53 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -225,6 +225,9 @@ struct cgroup { struct list_head pidlists; struct mutex pidlist_mutex; + /* dummy css with NULL ->ss, points back to this cgroup */ + struct cgroup_subsys_state dummy_css; + /* For css percpu_ref killing and RCU-protected deletion */ struct rcu_head rcu_head; struct work_struct destroy_work; @@ -668,7 +671,13 @@ struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) { struct cgroup *parent_cgrp = css->cgroup->parent; - return parent_cgrp ? parent_cgrp->subsys[css->ss->subsys_id] : NULL; + if (!parent_cgrp) + return NULL; + + if (css->ss) + return parent_cgrp->subsys[css->ss->subsys_id]; + else + return &parent_cgrp->dummy_css; } /** -- cgit v1.1 From 182446d087906de40e514573a92a97b203695f71 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:24 -0400 Subject: cgroup: pass around cgroup_subsys_state instead of cgroup in file methods cgroup is currently in the process of transitioning to using struct cgroup_subsys_state * as the primary handle instead of struct cgroup. Please see the previous commit which converts the subsystem methods for rationale. This patch converts all cftype file operations to take @css instead of @cgroup. cftypes for the cgroup core files don't have their subsytem pointer set. These will automatically use the dummy_css added by the previous patch and can be converted the same way. Most subsystem conversions are straight forwards but there are some interesting ones. * freezer: update_if_frozen() is also converted to take @css instead of @cgroup for consistency. This will make the code look simpler too once iterators are converted to use css. * memory/vmpressure: mem_cgroup_from_css() needs to be exported to vmpressure while mem_cgroup_from_cont() can be made static. Updated accordingly. * cpu: cgroup_tg() doesn't have any user left. Removed. * cpuacct: cgroup_ca() doesn't have any user left. Removed. * hugetlb: hugetlb_cgroup_form_cgroup() doesn't have any user left. Removed. * net_cls: cgrp_cls_state() doesn't have any user left. Removed. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Acked-by: Vivek Goyal Acked-by: Aristeu Rozanski Acked-by: Daniel Wagner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Johannes Weiner Cc: Balbir Singh Cc: Matt Helsley Cc: Jens Axboe Cc: Steven Rostedt --- include/linux/cgroup.h | 24 +++++++++++++----------- include/linux/memcontrol.h | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b0d5f53..0b91436 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -439,34 +439,34 @@ struct cftype { struct cgroup_subsys *ss; int (*open)(struct inode *inode, struct file *file); - ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, + ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft, struct file *file, char __user *buf, size_t nbytes, loff_t *ppos); /* * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() */ - u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft); + u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); /* * read_s64() is a signed version of read_u64() */ - s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft); + s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); /* * read_map() is used for defining a map of key/value * pairs. It should call cb->fill(cb, key, value) for each * entry. The key/value pairs (and their ordering) should not * change between reboots. */ - int (*read_map)(struct cgroup *cgrp, struct cftype *cft, + int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft, struct cgroup_map_cb *cb); /* * read_seq_string() is used for outputting a simple sequence * using seqfile. */ - int (*read_seq_string)(struct cgroup *cgrp, struct cftype *cft, - struct seq_file *m); + int (*read_seq_string)(struct cgroup_subsys_state *css, + struct cftype *cft, struct seq_file *m); - ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, + ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft, struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos); @@ -475,18 +475,20 @@ struct cftype { * a single integer (as parsed by simple_strtoull) from * userspace. Use in place of write(); return 0 or error. */ - int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val); + int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, + u64 val); /* * write_s64() is a signed version of write_u64() */ - int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val); + int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, + s64 val); /* * write_string() is passed a nul-terminated kernelspace * buffer of maximum length determined by max_write_len. * Returns 0 or -ve error code. */ - int (*write_string)(struct cgroup *cgrp, struct cftype *cft, + int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft, const char *buffer); /* * trigger() callback can be used to get some kick from the @@ -494,7 +496,7 @@ struct cftype { * at all. The private field can be used to determine the * kick type for multiplexing. */ - int (*trigger)(struct cgroup *cgrp, unsigned int event); + int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); int (*release)(struct inode *inode, struct file *file); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7b4d9d7..6c41609 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -85,7 +85,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); -extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont); +extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css); static inline bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg) -- cgit v1.1 From 3b287a505ef4024634beb12a93773254909d5dae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:24 -0400 Subject: cgroup: convert cgroup_next_sibling() to cgroup_next_child() cgroup is transitioning to using css (cgroup_subsys_state) as the main subsys interface handle instead of cgroup and the iterators will be updated to use css too. The iterators need to walk the cgroup hierarchy and return the css's matching the origin css, which is a bit cumbersome to open code. This patch converts cgroup_next_sibling() to cgroup_next_child() so that it can handle all steps of direct child iteration. This will be used to update iterators to take @css instead of @cgrp. In addition to the new iteration init handling, cgroup_next_child() is restructured so that the different branches share the end of iteration condition check. This patch doesn't change any behavior. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0b91436..5f9ba58 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -779,7 +779,7 @@ static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id) return idr_find(&ss->root->cgroup_idr, id); } -struct cgroup *cgroup_next_sibling(struct cgroup *pos); +struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp); /** * cgroup_for_each_child - iterate through children of a cgroup @@ -802,7 +802,7 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos); #define cgroup_for_each_child(pos, cgrp) \ for ((pos) = list_first_or_null_rcu(&(cgrp)->children, \ struct cgroup, sibling); \ - (pos); (pos) = cgroup_next_sibling((pos))) + (pos); (pos) = cgroup_next_child((pos), (cgrp))) struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, struct cgroup *cgroup); -- cgit v1.1 From f48e3924dca268c677c4e338e5d91ad9e6fe6b9e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:24 -0400 Subject: cgroup: always use cgroup_next_child() to walk the children list There are several places where the children list is accessed directly. This patch converts those places to use cgroup_next_child(). This will help updating the hierarchy iterators to use @css instead of @cgrp. While cgroup_next_child() can be heavy in pathological cases - e.g. a lot of dead children, this shouldn't cause any noticeable behavior differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5f9ba58..c288bce 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -800,9 +800,8 @@ struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp); * the start of the next iteration by, for example, bumping the css refcnt. */ #define cgroup_for_each_child(pos, cgrp) \ - for ((pos) = list_first_or_null_rcu(&(cgrp)->children, \ - struct cgroup, sibling); \ - (pos); (pos) = cgroup_next_child((pos), (cgrp))) + for ((pos) = cgroup_next_child(NULL, (cgrp)); (pos); \ + (pos) = cgroup_next_child((pos), (cgrp))) struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, struct cgroup *cgroup); -- cgit v1.1 From 492eb21b98f88e411a8bb43d6edcd7d7022add10 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:25 -0400 Subject: cgroup: make hierarchy iterators deal with cgroup_subsys_state instead of cgroup cgroup is currently in the process of transitioning to using css (cgroup_subsys_state) as the primary handle instead of cgroup in subsystem API. For hierarchy iterators, this is beneficial because * In most cases, css is the only thing subsystems care about anyway. * On the planned unified hierarchy, iterations for different subsystems will need to skip over different subtrees of the hierarchy depending on which subsystems are enabled on each cgroup. Passing around css makes it unnecessary to explicitly specify the subsystem in question as css is intersection between cgroup and subsystem * For the planned unified hierarchy, css's would need to be created and destroyed dynamically independent from cgroup hierarchy. Having cgroup core manage css iteration makes enforcing deref rules a lot easier. Most subsystem conversions are straight-forward. Noteworthy changes are * blkio: cgroup_to_blkcg() is no longer used. Removed. * freezer: cgroup_freezer() is no longer used. Removed. * devices: cgroup_to_devcgroup() is no longer used. Removed. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Acked-by: Vivek Goyal Acked-by: Aristeu Rozanski Cc: Johannes Weiner Cc: Balbir Singh Cc: Matt Helsley Cc: Jens Axboe --- include/linux/cgroup.h | 88 ++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c288bce..4bc22f4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -779,68 +779,72 @@ static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id) return idr_find(&ss->root->cgroup_idr, id); } -struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp); +struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *parent); /** - * cgroup_for_each_child - iterate through children of a cgroup - * @pos: the cgroup * to use as the loop cursor - * @cgrp: cgroup whose children to walk + * css_for_each_child - iterate through children of a css + * @pos: the css * to use as the loop cursor + * @parent: css whose children to walk * - * Walk @cgrp's children. Must be called under rcu_read_lock(). A child - * cgroup which hasn't finished ->css_online() or already has finished + * Walk @parent's children. Must be called under rcu_read_lock(). A child + * css which hasn't finished ->css_online() or already has finished * ->css_offline() may show up during traversal and it's each subsystem's * responsibility to verify that each @pos is alive. * * If a subsystem synchronizes against the parent in its ->css_online() and - * before starting iterating, a cgroup which finished ->css_online() is + * before starting iterating, a css which finished ->css_online() is * guaranteed to be visible in the future iterations. * * It is allowed to temporarily drop RCU read lock during iteration. The * caller is responsible for ensuring that @pos remains accessible until * the start of the next iteration by, for example, bumping the css refcnt. */ -#define cgroup_for_each_child(pos, cgrp) \ - for ((pos) = cgroup_next_child(NULL, (cgrp)); (pos); \ - (pos) = cgroup_next_child((pos), (cgrp))) +#define css_for_each_child(pos, parent) \ + for ((pos) = css_next_child(NULL, (parent)); (pos); \ + (pos) = css_next_child((pos), (parent))) -struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, - struct cgroup *cgroup); -struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); +struct cgroup_subsys_state * +css_next_descendant_pre(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *css); + +struct cgroup_subsys_state * +css_rightmost_descendant(struct cgroup_subsys_state *pos); /** - * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants - * @pos: the cgroup * to use as the loop cursor - * @cgroup: cgroup whose descendants to walk + * css_for_each_descendant_pre - pre-order walk of a css's descendants + * @pos: the css * to use as the loop cursor + * @root: css whose descendants to walk * - * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A - * descendant cgroup which hasn't finished ->css_online() or already has + * Walk @root's descendants. Must be called under rcu_read_lock(). A + * descendant css which hasn't finished ->css_online() or already has * finished ->css_offline() may show up during traversal and it's each * subsystem's responsibility to verify that each @pos is alive. * * If a subsystem synchronizes against the parent in its ->css_online() and * before starting iterating, and synchronizes against @pos on each - * iteration, any descendant cgroup which finished ->css_online() is + * iteration, any descendant css which finished ->css_online() is * guaranteed to be visible in the future iterations. * * In other words, the following guarantees that a descendant can't escape * state updates of its ancestors. * - * my_online(@cgrp) + * my_online(@css) * { - * Lock @cgrp->parent and @cgrp; - * Inherit state from @cgrp->parent; + * Lock @css's parent and @css; + * Inherit state from the parent; * Unlock both. * } * - * my_update_state(@cgrp) + * my_update_state(@css) * { - * Lock @cgrp; - * Update @cgrp's state; - * Unlock @cgrp; + * Lock @css; + * Update @css's state; + * Unlock @css; * - * cgroup_for_each_descendant_pre(@pos, @cgrp) { + * css_for_each_descendant_pre(@pos, @css) { * Lock @pos; - * Verify @pos is alive and inherit state from @pos->parent; + * Verify @pos is alive and inherit state from @pos's parent; * Unlock @pos; * } * } @@ -851,8 +855,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); * visible by walking order and, as long as inheriting operations to the * same @pos are atomic to each other, multiple updates racing each other * still result in the correct state. It's guaranateed that at least one - * inheritance happens for any cgroup after the latest update to its - * parent. + * inheritance happens for any css after the latest update to its parent. * * If checking parent's state requires locking the parent, each inheriting * iteration should lock and unlock both @pos->parent and @pos. @@ -865,25 +868,26 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); * caller is responsible for ensuring that @pos remains accessible until * the start of the next iteration by, for example, bumping the css refcnt. */ -#define cgroup_for_each_descendant_pre(pos, cgroup) \ - for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ - pos = cgroup_next_descendant_pre((pos), (cgroup))) +#define css_for_each_descendant_pre(pos, css) \ + for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ + (pos) = css_next_descendant_pre((pos), (css))) -struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, - struct cgroup *cgroup); +struct cgroup_subsys_state * +css_next_descendant_post(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *css); /** - * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants - * @pos: the cgroup * to use as the loop cursor - * @cgroup: cgroup whose descendants to walk + * css_for_each_descendant_post - post-order walk of a css's descendants + * @pos: the css * to use as the loop cursor + * @css: css whose descendants to walk * - * Similar to cgroup_for_each_descendant_pre() but performs post-order + * Similar to css_for_each_descendant_pre() but performs post-order * traversal instead. Note that the walk visibility guarantee described in * pre-order walk doesn't apply the same to post-order walks. */ -#define cgroup_for_each_descendant_post(pos, cgroup) \ - for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \ - pos = cgroup_next_descendant_post((pos), (cgroup))) +#define css_for_each_descendant_post(pos, css) \ + for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ + (pos) = css_next_descendant_post((pos), (css))) /* A cgroup_iter should be treated as an opaque object */ struct cgroup_iter { -- cgit v1.1 From 0942eeeef68f9493c1bcb1a52baf612b73fcf9fb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:26 -0400 Subject: cgroup: rename cgroup_iter to cgroup_task_iter cgroup now has multiple iterators and it's quite confusing to have something which walks over tasks of a single cgroup named cgroup_iter. Let's rename it to cgroup_task_iter. While at it, reformat / update comments and replace the overview comment above the interface function decls with proper function comments. Such overview can be useful but function comments should be more than enough here. This is pure rename and doesn't introduce any functional changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Cc: Matt Helsley Cc: Johannes Weiner Cc: Balbir Singh --- include/linux/cgroup.h | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4bc22f4..ea43979 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -889,31 +889,16 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ (pos) = css_next_descendant_post((pos), (css))) -/* A cgroup_iter should be treated as an opaque object */ -struct cgroup_iter { - struct list_head *cset_link; - struct list_head *task; +/* A cgroup_task_iter should be treated as an opaque object */ +struct cgroup_task_iter { + struct list_head *cset_link; + struct list_head *task; }; -/* - * To iterate across the tasks in a cgroup: - * - * 1) call cgroup_iter_start to initialize an iterator - * - * 2) call cgroup_iter_next() to retrieve member tasks until it - * returns NULL or until you want to end the iteration - * - * 3) call cgroup_iter_end() to destroy the iterator. - * - * Or, call cgroup_scan_tasks() to iterate through every task in a - * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling - * the test_task() callback, but not while calling the process_task() - * callback. - */ -void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it); -struct task_struct *cgroup_iter_next(struct cgroup *cgrp, - struct cgroup_iter *it); -void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); +void cgroup_task_iter_start(struct cgroup *cgrp, struct cgroup_task_iter *it); +struct task_struct *cgroup_task_iter_next(struct cgroup *cgrp, + struct cgroup_task_iter *it); +void cgroup_task_iter_end(struct cgroup *cgrp, struct cgroup_task_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); -- cgit v1.1 From c59cd3d840b1b0a8f996cbbd9132128dcaabbeb9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:26 -0400 Subject: cgroup: make cgroup_task_iter remember the cgroup being iterated Currently all cgroup_task_iter functions require @cgrp to be passed in, which is superflous and increases chance of usage error. Make cgroup_task_iter remember the cgroup being iterated and drop @cgrp argument from next and end functions. This patch doesn't introduce any behavior differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Cc: Matt Helsley Cc: Johannes Weiner Cc: Balbir Singh --- include/linux/cgroup.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ea43979..0287fcc 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -891,14 +891,14 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, /* A cgroup_task_iter should be treated as an opaque object */ struct cgroup_task_iter { + struct cgroup *origin_cgrp; struct list_head *cset_link; struct list_head *task; }; void cgroup_task_iter_start(struct cgroup *cgrp, struct cgroup_task_iter *it); -struct task_struct *cgroup_task_iter_next(struct cgroup *cgrp, - struct cgroup_task_iter *it); -void cgroup_task_iter_end(struct cgroup *cgrp, struct cgroup_task_iter *it); +struct task_struct *cgroup_task_iter_next(struct cgroup_task_iter *it); +void cgroup_task_iter_end(struct cgroup_task_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); -- cgit v1.1 From e535837b1dae17b5a2d76ea1bc22ac1a79354624 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:26 -0400 Subject: cgroup: remove struct cgroup_scanner cgroup_scan_tasks() takes a pointer to struct cgroup_scanner as its sole argument and the only function of that struct is packing the arguments of the function call which are consisted of five fields. It's not too unusual to pack parameters into a struct when the number of arguments gets excessive or the whole set needs to be passed around a lot, but neither holds here making it just weird. Drop struct cgroup_scanner and pass the params directly to cgroup_scan_tasks(). Note that struct cpuset_change_nodemask_arg was added to cpuset.c to pass both ->cs and ->newmems pointer to cpuset_change_nodemask() using single data pointer. This doesn't make any functional differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0287fcc..8472ed5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -528,15 +528,6 @@ struct cftype_set { struct cftype *cfts; }; -struct cgroup_scanner { - struct cgroup *cgrp; - int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); - void (*process_task)(struct task_struct *p, - struct cgroup_scanner *scan); - struct ptr_heap *heap; - void *data; -}; - /* * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This * function can be called as long as @cgrp is accessible. @@ -899,7 +890,12 @@ struct cgroup_task_iter { void cgroup_task_iter_start(struct cgroup *cgrp, struct cgroup_task_iter *it); struct task_struct *cgroup_task_iter_next(struct cgroup_task_iter *it); void cgroup_task_iter_end(struct cgroup_task_iter *it); -int cgroup_scan_tasks(struct cgroup_scanner *scan); + +int cgroup_scan_tasks(struct cgroup *cgrp, + bool (*test)(struct task_struct *, void *), + void (*process)(struct task_struct *, void *), + void *data, struct ptr_heap *heap); + int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); -- cgit v1.1 From 72ec7029937f0518eff21b8762743c31591684f5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:26 -0400 Subject: cgroup: make task iterators deal with cgroup_subsys_state instead of cgroup cgroup is in the process of converting to css (cgroup_subsys_state) from cgroup as the principal subsystem interface handle. This is mostly to prepare for the unified hierarchy support where css's will be created and destroyed dynamically but also helps cleaning up subsystem implementations as css is usually what they are interested in anyway. This patch converts task iterators to deal with css instead of cgroup. Note that under unified hierarchy, different sets of tasks will be considered belonging to a given cgroup depending on the subsystem in question and making the iterators deal with css instead cgroup provides them with enough information about the iteration. While at it, fix several function comment formats in cpuset.c. This patch doesn't introduce any behavior differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh Cc: Matt Helsley --- include/linux/cgroup.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8472ed5..cd105fc 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -880,21 +880,22 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ (pos) = css_next_descendant_post((pos), (css))) -/* A cgroup_task_iter should be treated as an opaque object */ -struct cgroup_task_iter { - struct cgroup *origin_cgrp; +/* A css_task_iter should be treated as an opaque object */ +struct css_task_iter { + struct cgroup_subsys_state *origin_css; struct list_head *cset_link; struct list_head *task; }; -void cgroup_task_iter_start(struct cgroup *cgrp, struct cgroup_task_iter *it); -struct task_struct *cgroup_task_iter_next(struct cgroup_task_iter *it); -void cgroup_task_iter_end(struct cgroup_task_iter *it); +void css_task_iter_start(struct cgroup_subsys_state *css, + struct css_task_iter *it); +struct task_struct *css_task_iter_next(struct css_task_iter *it); +void css_task_iter_end(struct css_task_iter *it); -int cgroup_scan_tasks(struct cgroup *cgrp, - bool (*test)(struct task_struct *, void *), - void (*process)(struct task_struct *, void *), - void *data, struct ptr_heap *heap); +int css_scan_tasks(struct cgroup_subsys_state *css, + bool (*test)(struct task_struct *, void *), + void (*process)(struct task_struct *, void *), + void *data, struct ptr_heap *heap); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); -- cgit v1.1 From 81eeaf0411204f52af8ef78ff107cfca2fcfec1d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:26 -0400 Subject: cgroup: make cftype->[un]register_event() deal with cgroup_subsys_state instead of cgroup cgroup is in the process of converting to css (cgroup_subsys_state) from cgroup as the principal subsystem interface handle. This is mostly to prepare for the unified hierarchy support where css's will be created and destroyed dynamically but also helps cleaning up subsystem implementations as css is usually what they are interested in anyway. cftype->[un]register_event() is among the remaining couple interfaces which still use struct cgroup. Convert it to cgroup_subsys_state. The conversion is mostly mechanical and removes the last users of mem_cgroup_from_cont() and cg_to_vmpressure(), which are removed. v2: indentation update as suggested by Li Zefan. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh --- include/linux/cgroup.h | 10 ++++++---- include/linux/vmpressure.h | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index cd105fc..b065d24 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -506,16 +506,18 @@ struct cftype { * you want to provide this functionality. Use eventfd_signal() * on eventfd to send notification to userspace. */ - int (*register_event)(struct cgroup *cgrp, struct cftype *cft, - struct eventfd_ctx *eventfd, const char *args); + int (*register_event)(struct cgroup_subsys_state *css, + struct cftype *cft, struct eventfd_ctx *eventfd, + const char *args); /* * unregister_event() callback will be called when userspace * closes the eventfd or on cgroup removing. * This callback must be implemented, if you want provide * notification functionality. */ - void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft, - struct eventfd_ctx *eventfd); + void (*unregister_event)(struct cgroup_subsys_state *css, + struct cftype *cft, + struct eventfd_ctx *eventfd); }; /* diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 76be077..b239482 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -33,10 +33,12 @@ extern void vmpressure_init(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); -extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, +extern int vmpressure_register_event(struct cgroup_subsys_state *css, + struct cftype *cft, struct eventfd_ctx *eventfd, const char *args); -extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, +extern void vmpressure_unregister_event(struct cgroup_subsys_state *css, + struct cftype *cft, struct eventfd_ctx *eventfd); #else static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, -- cgit v1.1 From d99c8727e7bbc01b70e2c57e6127bfab26b868fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:27 -0400 Subject: cgroup: make cgroup_taskset deal with cgroup_subsys_state instead of cgroup cgroup is in the process of converting to css (cgroup_subsys_state) from cgroup as the principal subsystem interface handle. This is mostly to prepare for the unified hierarchy support where css's will be created and destroyed dynamically but also helps cleaning up subsystem implementations as css is usually what they are interested in anyway. cgroup_taskset which is used by the subsystem attach methods is the last cgroup subsystem API which isn't using css as the handle. Update cgroup_taskset_cur_cgroup() to cgroup_taskset_cur_css() and cgroup_taskset_for_each() to take @skip_css instead of @skip_cgrp. The conversions are pretty mechanical. One exception is cpuset::cgroup_cs(), which lost its last user and got removed. This patch shouldn't introduce any functional changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Daniel Wagner Cc: Ingo Molnar Cc: Matt Helsley Cc: Steven Rostedt --- include/linux/cgroup.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b065d24..d9a9705 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -562,20 +562,22 @@ int cgroup_task_count(const struct cgroup *cgrp); struct cgroup_taskset; struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); -struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset); +struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset, + int subsys_id); int cgroup_taskset_size(struct cgroup_taskset *tset); /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor - * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all + * @skip_css: skip if task's css matches this, %NULL to iterate through all * @tset: taskset to iterate */ -#define cgroup_taskset_for_each(task, skip_cgrp, tset) \ +#define cgroup_taskset_for_each(task, skip_css, tset) \ for ((task) = cgroup_taskset_first((tset)); (task); \ (task) = cgroup_taskset_next((tset))) \ - if (!(skip_cgrp) || \ - cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp)) + if (!(skip_css) || \ + cgroup_taskset_cur_css((tset), \ + (skip_css)->ss->subsys_id) != (skip_css)) /* * Control Group subsystem type. -- cgit v1.1 From 95109b627ba6a043c181fa5fa45d1c754dd44fbc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:27 -0400 Subject: cgroup: unexport cgroup_css() cgroup_css() no longer has any user left outside cgroup.c proper and we don't want subsystems to grow new usages of the function. cgroup core should always provide the css to use to the subsystems, which will make dynamic creation and destruction of css's across the lifetime of a cgroup much more manageable than exposing the cgroup directly to subsystems and let them dereference css's from it. Make cgroup_css() a static function in cgroup.c. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d9a9705..c40e508 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -678,19 +678,6 @@ struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) } /** - * cgroup_css - obtain a cgroup's css for the specified subsystem - * @cgrp: the cgroup of interest - * @subsys_id: the subsystem of interest - * - * Return @cgrp's css (cgroup_subsys_state) associated with @subsys_id. - */ -static inline struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, - int subsys_id) -{ - return cgrp->subsys[subsys_id]; -} - -/** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for * @__c: extra condition expression to be passed to rcu_dereference_check() -- cgit v1.1 From bd8815a6d802fc16a7a106e170593aa05dc17e72 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Aug 2013 20:11:27 -0400 Subject: cgroup: make css_for_each_descendant() and friends include the origin css in the iteration Previously, all css descendant iterators didn't include the origin (root of subtree) css in the iteration. The reasons were maintaining consistency with css_for_each_child() and that at the time of introduction more use cases needed skipping the origin anyway; however, given that css_is_descendant() considers self to be a descendant, omitting the origin css has become more confusing and looking at the accumulated use cases rather clearly indicates that including origin would result in simpler code overall. While this is a change which can easily lead to subtle bugs, cgroup API including the iterators has recently gone through major restructuring and no out-of-tree changes will be applicable without adjustments making this a relatively acceptable opportunity for this type of change. The conversions are mostly straight-forward. If the iteration block had explicit origin handling before or after, it's moved inside the iteration. If not, if (pos == origin) continue; is added. Some conversions add extra reference get/put around origin handling by consolidating origin handling and the rest. While the extra ref operations aren't strictly necessary, this shouldn't cause any noticeable difference. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Vivek Goyal Acked-by: Aristeu Rozanski Acked-by: Michal Hocko Cc: Jens Axboe Cc: Matt Helsley Cc: Johannes Weiner Cc: Balbir Singh --- include/linux/cgroup.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c40e508..8ec5b0f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -798,7 +798,8 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos); * @pos: the css * to use as the loop cursor * @root: css whose descendants to walk * - * Walk @root's descendants. Must be called under rcu_read_lock(). A + * Walk @root's descendants. @root is included in the iteration and the + * first node to be visited. Must be called under rcu_read_lock(). A * descendant css which hasn't finished ->css_online() or already has * finished ->css_offline() may show up during traversal and it's each * subsystem's responsibility to verify that each @pos is alive. @@ -820,13 +821,12 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos); * * my_update_state(@css) * { - * Lock @css; - * Update @css's state; - * Unlock @css; - * * css_for_each_descendant_pre(@pos, @css) { * Lock @pos; - * Verify @pos is alive and inherit state from @pos's parent; + * if (@pos == @css) + * Update @css's state; + * else + * Verify @pos is alive and inherit state from its parent; * Unlock @pos; * } * } @@ -864,8 +864,9 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, * @css: css whose descendants to walk * * Similar to css_for_each_descendant_pre() but performs post-order - * traversal instead. Note that the walk visibility guarantee described in - * pre-order walk doesn't apply the same to post-order walks. + * traversal instead. @root is included in the iteration and the last + * node to be visited. Note that the walk visibility guarantee described + * in pre-order walk doesn't apply the same to post-order walks. */ #define css_for_each_descendant_post(pos, css) \ for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ -- cgit v1.1 From 35ef10da65d43211f4cd7e7822cbb3becdfc0ae1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Aug 2013 11:01:54 -0400 Subject: cgroup: rename cgroup_subsys_state->dput_work and its callback function css (cgroup_subsys_state) will become RCU protected and there will be two stages which require punting to work item during release. To prepare for using the work item for multiple times, rename css->dput_work to css->destroy_work and css_dput_fn() to css_free_work_fn() and move work item initialization from css init to right before the actual usage. This reorganization doesn't introduce any behavior change. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8ec5b0f..12d66fe 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -80,7 +80,7 @@ struct cgroup_subsys_state { struct css_id __rcu *id; /* Used to put @cgroup->dentry on the last css_put() */ - struct work_struct dput_work; + struct work_struct destroy_work; }; /* bits in struct cgroup_subsys_state flags field */ -- cgit v1.1 From 0ae78e0bf10ac38ab53548e18383afc9997eca22 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Aug 2013 11:01:54 -0400 Subject: cgroup: add cgroup_subsys_state->parent With the planned unified hierarchy, css's (cgroup_subsys_state) will be RCU protected and allowed to be attached and detached dynamically over the course of a cgroup's lifetime. This means that css's will stay accessible after being detached from its cgroup - the matching pointer in cgroup->subsys[] cleared - for ref draining and RCU grace period. cgroup core still wants to guarantee that the parent css is never destroyed before its children and css_parent() always returns the parent regardless of the state of the child css as long as it's accessible. This patch makes css's hold onto their parents and adds css->parent so that the parent css is never detroyed before its children and can be determined without consulting the cgroups. cgroup->dummy_css is also updated to point to the parent dummy_css; however, it doesn't need to worry about object lifetime as the parent cgroup is already pinned by the child. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 12d66fe..8a5dc91 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -75,6 +75,9 @@ struct cgroup_subsys_state { /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; + /* the parent css */ + struct cgroup_subsys_state *parent; + unsigned long flags; /* ID for this css, if possible */ struct css_id __rcu *id; @@ -666,15 +669,7 @@ struct cgroup_subsys { static inline struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) { - struct cgroup *parent_cgrp = css->cgroup->parent; - - if (!parent_cgrp) - return NULL; - - if (css->ss) - return parent_cgrp->subsys[css->ss->subsys_id]; - else - return &parent_cgrp->dummy_css; + return css->parent; } /** -- cgit v1.1 From 73e80ed8007fc48a6deeb295ba37159fad274bd2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Aug 2013 11:01:55 -0400 Subject: cgroup: add __rcu modifier to cgroup->subsys[] For the planned unified hierarchy, each css (cgroup_subsys_state) will be RCU protected so that it can be created and destroyed individually while allowing RCU accesses. Previous changes ensured that all cgroup->subsys[] accesses use the cgroup_css() accessor. This patch adds __rcu modifier to cgroup->subsys[], add matching RCU dereference in cgroup_css() and convert all assignments to either rcu_assign_pointer() or RCU_INIT_POINTER(). This change prepares for the actual RCUfication of css's and doesn't introduce any visible behavior change. The conversion is verified with sparse and all accesses are properly RCU annotated. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8a5dc91..eb200b5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -204,7 +204,7 @@ struct cgroup { struct cgroup_name __rcu *name; /* Private pointers for each registered subsystem */ - struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; struct cgroupfs_root *root; -- cgit v1.1 From f20104de55a212a9742d8df1807f1f29dc95b748 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Aug 2013 20:22:50 -0400 Subject: cgroup: replace cgroup->css_kill_cnt with ->nr_css Currently, css (cgroup_subsys_state) lifetime is tied to that of the associated cgroup. With the planned unified hierarchy, css's will be dynamically created and destroyed within the lifetime of a cgroup. To enable such usages, css's will be individually RCU protected instead of being tied to the cgroup. cgroup->css_kill_cnt is used during cgroup destruction to wait for css reference count disable; however, this model doesn't work once css's lifetimes are managed separately from cgroup's. This patch replaces it with cgroup->nr_css which is an cgroup_mutex protected integer counting the number of attached css's. The count is incremented from online_css() and decremented after refcnt kill is confirmed. If the count reaches zero and the cgroup is marked dead, the second stage of cgroup destruction is kicked off. If a cgroup doesn't have any css attached at the time of rmdir, cgroup_destroy_locked() now invokes the second stage directly as no css kill confirmation would happen. cgroup_offline_fn() - the second step of cgroup destruction - is renamed to cgroup_destroy_css_killed() and now expects to be called with cgroup_mutex held. While this patch changes how css destruction is punted to work items, it shouldn't change any visible behavior. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index eb200b5..80dca87 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -171,6 +171,9 @@ struct cgroup { */ int id; + /* the number of attached css's */ + int nr_css; + /* * We link our 'sibling' struct into our parent's 'children'. * Our children link their 'sibling' into our 'children'. @@ -234,7 +237,6 @@ struct cgroup { /* For css percpu_ref killing and RCU-protected deletion */ struct rcu_head rcu_head; struct work_struct destroy_work; - atomic_t css_kill_cnt; /* List of events which userspace want to receive */ struct list_head event_list; -- cgit v1.1 From 09a503ea3a816b285b0b402b7f785eaec0c7a7e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Aug 2013 20:22:50 -0400 Subject: cgroup: decouple cgroup_subsys_state destruction from cgroup destruction Currently, css (cgroup_subsys_state) lifetime is tied to that of the associated cgroup. css's are created when the associated cgroup is created and destroyed when it gets destroyed. Also, individual css's aren't RCU protected but the whole cgroup is. With the planned unified hierarchy, css's will need to be dynamically created and destroyed within the lifetime of a cgroup. To enable such usages, this patch decouples css destruction from cgroup destruction - offline_css() invocation and the final css_put() are moved from cgroup_destroy_css_killed() to css_killed_work_fn(). Now each css is individually offlined and put as its reference count is killed instead of waiting for all css's attached to the cgroup to finish refcnt killing and then proceeding to offlining and putting them together. While this changes the order of destruction operations, the changes shouldn't be noticeable to cgroup subsystems or userland. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 80dca87..71e77e7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -82,7 +82,7 @@ struct cgroup_subsys_state { /* ID for this css, if possible */ struct css_id __rcu *id; - /* Used to put @cgroup->dentry on the last css_put() */ + /* percpu_ref killing and putting dentry on the last css_put() */ struct work_struct destroy_work; }; -- cgit v1.1 From 0c21ead136a900c36f1ab74fd7d09a306dc31324 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Aug 2013 20:22:51 -0400 Subject: cgroup: RCU protect each cgroup_subsys_state release With the planned unified hierarchy, individual css's will be created and destroyed dynamically across the lifetime of a cgroup. To enable such usages, css destruction is being decoupled from cgroup destruction. Most of the destruction path has been decoupled but the actual free of css still depends on cgroup free path. When all css refs are drained, css_release() kicks off css_free_work_fn() which puts the cgroup. When the cgroup refcnt reaches zero, cgroup_diput() is invoked which in turn schedules RCU free of the cgroup. After a grace period, all css's are freed along with the cgroup itself. This patch moves the RCU grace period and css freeing from cgroup release path to css release path. css_release(), instead of kicking off css_free_work_fn() directly, schedules RCU callback css_free_rcu_fn() which in turn kicks off css_free_work_fn() after a RCU grace period. css_free_work_fn() is updated to free the css directly. The five-way punting - percpu ref kill confirmation, a work item, percpu ref release, RCU grace period, and again a work item - is quite hairy but the work items are there only to provide process context and the actual sequence is kill confirm -> release -> RCU free, which isn't simple but not too crazy. This removes cgroup_css() usage after offline_css() allowing clearing cgroup->subsys[] from offline_css(), which makes it consistent with online_css() and brings it closer to proper lifetime management for individual css's. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 71e77e7..c24bd0b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -82,7 +82,8 @@ struct cgroup_subsys_state { /* ID for this css, if possible */ struct css_id __rcu *id; - /* percpu_ref killing and putting dentry on the last css_put() */ + /* percpu_ref killing and RCU release */ + struct rcu_head rcu_head; struct work_struct destroy_work; }; -- cgit v1.1 From 1cb650b91ba582f6737457b7d22e368585596d2c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 19 Aug 2013 10:05:24 +0800 Subject: cgroup: change cgroup_from_id() to css_from_id() Now we want cgroup core to always provide the css to use to the subsystems, so change this API to css_from_id(). Uninline css_from_id(), because it's getting bigger and cgroup_css() has been unexported. While at it, remove the #ifdef, and shuffle the order of the args. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c24bd0b..b685955 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -741,27 +741,11 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, return task_css(task, subsys_id)->cgroup; } -/** - * cgroup_from_id - lookup cgroup by id - * @ss: cgroup subsys to be looked into - * @id: the cgroup id - * - * Returns the cgroup if there's valid one with @id, otherwise returns NULL. - * Should be called under rcu_read_lock(). - */ -static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id) -{ -#ifdef CONFIG_PROVE_RCU - rcu_lockdep_assert(rcu_read_lock_held() || - lockdep_is_held(&cgroup_mutex), - "cgroup_from_id() needs proper protection"); -#endif - return idr_find(&ss->root->cgroup_idr, id); -} - struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *parent); +struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); + /** * css_for_each_child - iterate through children of a css * @pos: the css * to use as the loop cursor -- cgit v1.1 From 35cf083619da5677f83e9a8eae813f0b413d7082 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 26 Aug 2013 18:40:56 -0400 Subject: cgroup: rename cgroup_css_from_dir() to css_from_dir() and update its syntax cgroup_css_from_dir() will grow another user. In preparation, make the following changes. * All css functions are prefixed with just "css_", rename it to css_from_dir(). * Take dentry * instead of file * as dentry is what ultimately identifies a cgroup and file may not always be available. Note that the function now checkes whether @dentry->d_inode is NULL as the caller now may specify a negative dentry. * Make it take cgroup_subsys * instead of integer subsys_id. This simplifies the function and allows specifying no subsystem for cgroup->dummy_css. * Make return section a bit less verbose. This patch doesn't introduce any behavior changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar --- include/linux/cgroup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b685955..21ba298 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -903,7 +903,8 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg, /* Get id and depth of css */ unsigned short css_id(struct cgroup_subsys_state *css); -struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); +struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, + struct cgroup_subsys *ss); #else /* !CONFIG_CGROUPS */ -- cgit v1.1 From 9fa4db334c7d9570aec7a5121e84fae99aae1d04 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 26 Aug 2013 18:40:56 -0400 Subject: cgroup: implement CFTYPE_NO_PREFIX When cgroup files are created, cgroup core automatically prepends the name of the subsystem as prefix. This patch adds CFTYPE_NO_ which disables the automatic prefix. This is to work around historical baggages and shouldn't be used for new files. This will be used to move "cgroup.event_control" from cgroup core to memcg. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Cc: Glauber Costa --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 21ba298..3561d30 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -411,6 +411,7 @@ enum { CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ + CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ }; #define MAX_CFTYPE_NAME 64 -- cgit v1.1