diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/cgroup.h | 167 | ||||
-rw-r--r-- | include/linux/freezer.h | 57 | ||||
-rw-r--r-- | include/net/netprio_cgroup.h | 11 |
3 files changed, 163 insertions, 72 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f8a030c..7d73905 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -12,6 +12,7 @@ #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/rcupdate.h> +#include <linux/rculist.h> #include <linux/cgroupstats.h> #include <linux/prio_heap.h> #include <linux/rwsem.h> @@ -34,7 +35,6 @@ extern int cgroup_lock_is_held(void); extern bool cgroup_lock_live_group(struct cgroup *cgrp); extern void cgroup_unlock(void); extern void cgroup_fork(struct task_struct *p); -extern void cgroup_fork_callbacks(struct task_struct *p); extern void cgroup_post_fork(struct task_struct *p); extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern int cgroupstats_build(struct cgroupstats *stats, @@ -66,7 +66,7 @@ struct cgroup_subsys_state { /* * State maintained by the cgroup system to allow subsystems * to be "busy". Should be accessed via css_get(), - * css_tryget() and and css_put(). + * css_tryget() and css_put(). */ atomic_t refcnt; @@ -81,9 +81,8 @@ struct cgroup_subsys_state { /* bits in struct cgroup_subsys_state flags field */ enum { - CSS_ROOT, /* This CSS is the root of the subsystem */ - CSS_REMOVED, /* This CSS is dead */ - CSS_CLEAR_CSS_REFS, /* @ss->__DEPRECATED_clear_css_refs */ + CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */ + CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ }; /* Caller must verify that the css is not for root cgroup */ @@ -102,15 +101,10 @@ static inline void __css_get(struct cgroup_subsys_state *css, int count) static inline void css_get(struct cgroup_subsys_state *css) { /* We don't need to reference count the root state */ - if (!test_bit(CSS_ROOT, &css->flags)) + if (!(css->flags & CSS_ROOT)) __css_get(css, 1); } -static inline bool css_is_removed(struct cgroup_subsys_state *css) -{ - return test_bit(CSS_REMOVED, &css->flags); -} - /* * Call css_tryget() to take a reference on a css if your existing * (known-valid) reference isn't already ref-counted. Returns false if @@ -120,7 +114,7 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css) extern bool __css_tryget(struct cgroup_subsys_state *css); static inline bool css_tryget(struct cgroup_subsys_state *css) { - if (test_bit(CSS_ROOT, &css->flags)) + if (css->flags & CSS_ROOT) return true; return __css_tryget(css); } @@ -133,7 +127,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) extern void __css_put(struct cgroup_subsys_state *css); static inline void css_put(struct cgroup_subsys_state *css) { - if (!test_bit(CSS_ROOT, &css->flags)) + if (!(css->flags & CSS_ROOT)) __css_put(css); } @@ -149,13 +143,11 @@ enum { /* Control Group requires release notifications to userspace */ CGRP_NOTIFY_ON_RELEASE, /* - * A thread in rmdir() is wating for this cgroup. - */ - CGRP_WAIT_ON_RMDIR, - /* - * Clone cgroup values when creating a new child cgroup + * Clone the parent's configuration when creating a new child + * cpuset cgroup. For historical reasons, this option can be + * specified at mount time and thus is implemented here. */ - CGRP_CLONE_CHILDREN, + CGRP_CPUSET_CLONE_CHILDREN, }; struct cgroup { @@ -167,6 +159,8 @@ struct cgroup { */ atomic_t count; + int id; /* ida allocated in-hierarchy ID */ + /* * We link our 'sibling' struct into our parent's 'children'. * Our children link their 'sibling' into our 'children'. @@ -176,7 +170,7 @@ struct cgroup { struct list_head files; /* my files */ struct cgroup *parent; /* my parent */ - struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ + struct dentry *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; @@ -282,7 +276,7 @@ struct cgroup_map_cb { /* cftype->flags */ #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ -#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create onp root cg */ +#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ #define MAX_CFTYPE_NAME 64 @@ -422,23 +416,6 @@ int cgroup_task_count(const struct cgroup *cgrp); int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); /* - * When the subsys has to access css and may add permanent refcnt to css, - * it should take care of racy conditions with rmdir(). Following set of - * functions, is for stop/restart rmdir if necessary. - * Because these will call css_get/put, "css" should be alive css. - * - * cgroup_exclude_rmdir(); - * ...do some jobs which may access arbitrary empty cgroup - * cgroup_release_and_wakeup_rmdir(); - * - * When someone removes a cgroup while cgroup_exclude_rmdir() holds it, - * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up. - */ - -void cgroup_exclude_rmdir(struct cgroup_subsys_state *css); -void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); - -/* * Control Group taskset, used to pass around set of tasks to cgroup_subsys * methods. */ @@ -466,16 +443,17 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); */ struct cgroup_subsys { - struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); - int (*pre_destroy)(struct cgroup *cgrp); - void (*destroy)(struct cgroup *cgrp); + struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); + int (*css_online)(struct cgroup *cgrp); + void (*css_offline)(struct cgroup *cgrp); + void (*css_free)(struct cgroup *cgrp); + int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); void (*fork)(struct task_struct *task); void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, struct task_struct *task); - void (*post_clone)(struct cgroup *cgrp); void (*bind)(struct cgroup *root); int subsys_id; @@ -489,17 +467,6 @@ struct cgroup_subsys { bool use_id; /* - * If %true, cgroup removal will try to clear css refs by retrying - * ss->pre_destroy() until there's no css ref left. This behavior - * is strictly for backward compatibility and will be removed as - * soon as the current user (memcg) is updated. - * - * If %false, ss->pre_destroy() can't fail and cgroup removal won't - * wait for css refs to drop to zero before proceeding. - */ - bool __DEPRECATED_clear_css_refs; - - /* * If %false, this subsystem is properly hierarchical - * configuration, resource accounting and restriction on a parent * cgroup cover those of its children. If %true, hierarchy support @@ -572,6 +539,100 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, return task_subsys_state(task, subsys_id)->cgroup; } +/** + * cgroup_for_each_child - iterate through children of a cgroup + * @pos: the cgroup * to use as the loop cursor + * @cgroup: cgroup whose children to walk + * + * Walk @cgroup's children. Must be called under rcu_read_lock(). A child + * cgroup which hasn't finished ->css_online() or already has finished + * ->css_offline() may show up during traversal and it's each subsystem's + * responsibility to verify that each @pos is alive. + * + * If a subsystem synchronizes against the parent in its ->css_online() and + * before starting iterating, a cgroup which finished ->css_online() is + * guaranteed to be visible in the future iterations. + */ +#define cgroup_for_each_child(pos, cgroup) \ + list_for_each_entry_rcu(pos, &(cgroup)->children, sibling) + +struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, + struct cgroup *cgroup); + +/** + * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants + * @pos: the cgroup * to use as the loop cursor + * @cgroup: cgroup whose descendants to walk + * + * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A + * descendant cgroup which hasn't finished ->css_online() or already has + * finished ->css_offline() may show up during traversal and it's each + * subsystem's responsibility to verify that each @pos is alive. + * + * If a subsystem synchronizes against the parent in its ->css_online() and + * before starting iterating, and synchronizes against @pos on each + * iteration, any descendant cgroup which finished ->css_offline() is + * guaranteed to be visible in the future iterations. + * + * In other words, the following guarantees that a descendant can't escape + * state updates of its ancestors. + * + * my_online(@cgrp) + * { + * Lock @cgrp->parent and @cgrp; + * Inherit state from @cgrp->parent; + * Unlock both. + * } + * + * my_update_state(@cgrp) + * { + * Lock @cgrp; + * Update @cgrp's state; + * Unlock @cgrp; + * + * cgroup_for_each_descendant_pre(@pos, @cgrp) { + * Lock @pos; + * Verify @pos is alive and inherit state from @pos->parent; + * Unlock @pos; + * } + * } + * + * As long as the inheriting step, including checking the parent state, is + * enclosed inside @pos locking, double-locking the parent isn't necessary + * while inheriting. The state update to the parent is guaranteed to be + * visible by walking order and, as long as inheriting operations to the + * same @pos are atomic to each other, multiple updates racing each other + * still result in the correct state. It's guaranateed that at least one + * inheritance happens for any cgroup after the latest update to its + * parent. + * + * If checking parent's state requires locking the parent, each inheriting + * iteration should lock and unlock both @pos->parent and @pos. + * + * Alternatively, a subsystem may choose to use a single global lock to + * synchronize ->css_online() and ->css_offline() against tree-walking + * operations. + */ +#define cgroup_for_each_descendant_pre(pos, cgroup) \ + for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ + pos = cgroup_next_descendant_pre((pos), (cgroup))) + +struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, + struct cgroup *cgroup); + +/** + * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants + * @pos: the cgroup * to use as the loop cursor + * @cgroup: cgroup whose descendants to walk + * + * Similar to cgroup_for_each_descendant_pre() but performs post-order + * traversal instead. Note that the walk visibility guarantee described in + * pre-order walk doesn't apply the same to post-order walks. + */ +#define cgroup_for_each_descendant_post(pos, cgroup) \ + for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \ + pos = cgroup_next_descendant_post((pos), (cgroup))) + /* A cgroup_iter should be treated as an opaque object */ struct cgroup_iter { struct list_head *cg_link; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index b90091a..e4238ce 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -75,35 +75,68 @@ static inline bool cgroup_freezing(struct task_struct *task) */ -/* Tell the freezer not to count the current task as freezable. */ +/** + * freezer_do_not_count - tell freezer to ignore %current + * + * Tell freezers to ignore the current task when determining whether the + * target frozen state is reached. IOW, the current task will be + * considered frozen enough by freezers. + * + * The caller shouldn't do anything which isn't allowed for a frozen task + * until freezer_cont() is called. Usually, freezer[_do_not]_count() pair + * wrap a scheduling operation and nothing much else. + */ static inline void freezer_do_not_count(void) { current->flags |= PF_FREEZER_SKIP; } -/* - * Tell the freezer to count the current task as freezable again and try to - * freeze it. +/** + * freezer_count - tell freezer to stop ignoring %current + * + * Undo freezer_do_not_count(). It tells freezers that %current should be + * considered again and tries to freeze if freezing condition is already in + * effect. */ static inline void freezer_count(void) { current->flags &= ~PF_FREEZER_SKIP; + /* + * If freezing is in progress, the following paired with smp_mb() + * in freezer_should_skip() ensures that either we see %true + * freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP. + */ + smp_mb(); try_to_freeze(); } -/* - * Check if the task should be counted as freezable by the freezer +/** + * freezer_should_skip - whether to skip a task when determining frozen + * state is reached + * @p: task in quesion + * + * This function is used by freezers after establishing %true freezing() to + * test whether a task should be skipped when determining the target frozen + * state is reached. IOW, if this function returns %true, @p is considered + * frozen enough. */ -static inline int freezer_should_skip(struct task_struct *p) +static inline bool freezer_should_skip(struct task_struct *p) { - return !!(p->flags & PF_FREEZER_SKIP); + /* + * The following smp_mb() paired with the one in freezer_count() + * ensures that either freezer_count() sees %true freezing() or we + * see cleared %PF_FREEZER_SKIP and return %false. This makes it + * impossible for a task to slip frozen state testing after + * clearing %PF_FREEZER_SKIP. + */ + smp_mb(); + return p->flags & PF_FREEZER_SKIP; } /* - * These macros are intended to be used whenever you want allow a task that's - * sleeping in TASK_UNINTERRUPTIBLE or TASK_KILLABLE state to be frozen. Note - * that neither return any clear indication of whether a freeze event happened - * while in this function. + * These macros are intended to be used whenever you want allow a sleeping + * task to be frozen. Note that neither return any clear indication of + * whether a freeze event happened while in this function. */ /* Like schedule(), but should not block the freezer. */ diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 2760f4f..1d04b6f 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -27,7 +27,6 @@ struct netprio_map { struct cgroup_netprio_state { struct cgroup_subsys_state css; - u32 prioidx; }; extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); @@ -36,13 +35,12 @@ extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); static inline u32 task_netprioidx(struct task_struct *p) { - struct cgroup_netprio_state *state; + struct cgroup_subsys_state *css; u32 idx; rcu_read_lock(); - state = container_of(task_subsys_state(p, net_prio_subsys_id), - struct cgroup_netprio_state, css); - idx = state->prioidx; + css = task_subsys_state(p, net_prio_subsys_id); + idx = css->cgroup->id; rcu_read_unlock(); return idx; } @@ -57,8 +55,7 @@ static inline u32 task_netprioidx(struct task_struct *p) rcu_read_lock(); css = task_subsys_state(p, net_prio_subsys_id); if (css) - idx = container_of(css, - struct cgroup_netprio_state, css)->prioidx; + idx = css->cgroup->id; rcu_read_unlock(); return idx; } |