diff options
-rw-r--r-- | block/blk-throttle.c | 6 | ||||
-rw-r--r-- | include/linux/cgroup.h | 125 | ||||
-rw-r--r-- | kernel/cgroup.c | 19 | ||||
-rw-r--r-- | kernel/cpuset.c | 33 | ||||
-rw-r--r-- | mm/memcontrol.c | 7 |
5 files changed, 86 insertions, 104 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 3fdb21a..9273d09 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -412,13 +412,13 @@ static void throtl_pd_init(struct blkcg_gq *blkg) int rw; /* - * If sane_hierarchy is enabled, we switch to properly hierarchical + * If on the default hierarchy, we switch to properly hierarchical * behavior where limits on a given throtl_grp are applied to the * whole subtree rather than just the group itself. e.g. If 16M * read_bps limit is set on the root group, the whole system can't * exceed 16M for the device. * - * If sane_hierarchy is not enabled, the broken flat hierarchy + * If not on the default hierarchy, the broken flat hierarchy * behavior is retained where all throtl_grps are treated as if * they're all separate root groups right below throtl_data. * Limits of a group don't interact with limits of other groups @@ -426,7 +426,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg) */ parent_sq = &td->service_queue; - if (cgroup_sane_behavior(blkg->blkcg->css.cgroup) && blkg->parent) + if (cgroup_on_dfl(blkg->blkcg->css.cgroup) && blkg->parent) parent_sq = &blkg_to_tg(blkg->parent)->service_queue; throtl_service_queue_init(&tg->service_queue, parent_sq); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c4901c1..7bb2744 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -256,68 +256,7 @@ struct cgroup { /* cgroup_root->flags */ enum { - /* - * Unfortunately, cgroup core and various controllers are riddled - * with idiosyncrasies and pointless options. The following flag, - * when set, will force sane behavior - some options are forced on, - * others are disallowed, and some controllers will change their - * hierarchical or other behaviors. - * - * The set of behaviors affected by this flag are still being - * determined and developed and the mount option for this flag is - * prefixed with __DEVEL__. The prefix will be dropped once we - * reach the point where all behaviors are compatible with the - * planned unified hierarchy, which will automatically turn on this - * flag. - * - * The followings are the behaviors currently affected this flag. - * - * - Mount options "noprefix", "xattr", "clone_children", - * "release_agent" and "name" are disallowed. - * - * - When mounting an existing superblock, mount options should - * match. - * - * - Remount is disallowed. - * - * - rename(2) is disallowed. - * - * - "tasks" is removed. Everything should be at process - * granularity. Use "cgroup.procs" instead. - * - * - "cgroup.procs" is not sorted. pids will be unique unless they - * got recycled inbetween reads. - * - * - "release_agent" and "notify_on_release" are removed. - * Replacement notification mechanism will be implemented. - * - * - "cgroup.clone_children" is removed. - * - * - "cgroup.subtree_populated" is available. Its value is 0 if - * the cgroup and its descendants contain no task; otherwise, 1. - * The file also generates kernfs notification which can be - * monitored through poll and [di]notify when the value of the - * file changes. - * - * - If mount is requested with sane_behavior but without any - * subsystem, the default unified hierarchy is mounted. - * - * - cpuset: tasks will be kept in empty cpusets when hotplug happens - * and take masks of ancestors with non-empty cpus/mems, instead of - * being moved to an ancestor. - * - * - cpuset: a task can be moved into an empty cpuset, and again it - * takes masks of ancestors. - * - * - memcg: use_hierarchy is on by default and the cgroup file for - * the flag is not created. - * - * - blkcg: blk-throttle becomes properly hierarchical. - * - * - debug: disallowed on the default hierarchy. - */ - CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), - + CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ }; @@ -531,20 +470,64 @@ struct cftype { extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; +/** + * cgroup_on_dfl - test whether a cgroup is on the default hierarchy + * @cgrp: the cgroup of interest + * + * The default hierarchy is the v2 interface of cgroup and this function + * can be used to test whether a cgroup is on the default hierarchy for + * cases where a subsystem should behave differnetly depending on the + * interface version. + * + * The set of behaviors which change on the default hierarchy are still + * being determined and the mount option is prefixed with __DEVEL__. + * + * List of changed behaviors: + * + * - Mount options "noprefix", "xattr", "clone_children", "release_agent" + * and "name" are disallowed. + * + * - When mounting an existing superblock, mount options should match. + * + * - Remount is disallowed. + * + * - rename(2) is disallowed. + * + * - "tasks" is removed. Everything should be at process granularity. Use + * "cgroup.procs" instead. + * + * - "cgroup.procs" is not sorted. pids will be unique unless they got + * recycled inbetween reads. + * + * - "release_agent" and "notify_on_release" are removed. Replacement + * notification mechanism will be implemented. + * + * - "cgroup.clone_children" is removed. + * + * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup + * and its descendants contain no task; otherwise, 1. The file also + * generates kernfs notification which can be monitored through poll and + * [di]notify when the value of the file changes. + * + * - cpuset: tasks will be kept in empty cpusets when hotplug happens and + * take masks of ancestors with non-empty cpus/mems, instead of being + * moved to an ancestor. + * + * - cpuset: a task can be moved into an empty cpuset, and again it takes + * masks of ancestors. + * + * - memcg: use_hierarchy is on by default and the cgroup file for the flag + * is not created. + * + * - blkcg: blk-throttle becomes properly hierarchical. + * + * - debug: disallowed on the default hierarchy. + */ static inline bool cgroup_on_dfl(const struct cgroup *cgrp) { return cgrp->root == &cgrp_dfl_root; } -/* - * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This - * function can be called as long as @cgrp is accessible. - */ -static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) -{ - return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; -} - /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_has_tasks(struct cgroup *cgrp) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0ea54af..fb07c6d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1470,8 +1470,8 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) struct cgroup_sb_opts opts; unsigned int added_mask, removed_mask; - if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) { - pr_err("sane_behavior: remount is not allowed\n"); + if (root == &cgrp_dfl_root) { + pr_err("remount is not allowed\n"); return -EINVAL; } @@ -2943,9 +2943,9 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, /* * This isn't a proper migration and its usefulness is very - * limited. Disallow if sane_behavior. + * limited. Disallow on the default hierarchy. */ - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) return -EPERM; /* @@ -3031,7 +3031,7 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], /* does cft->flags tell us to skip this file on @cgrp? */ if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; - if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp)) + if ((cft->flags & CFTYPE_INSANE) && cgroup_on_dfl(cgrp)) continue; if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) continue; @@ -3764,8 +3764,9 @@ after: * * All this extra complexity was caused by the original implementation * committing to an entirely unnecessary property. In the long term, we - * want to do away with it. Explicitly scramble sort order if - * sane_behavior so that no such expectation exists in the new interface. + * want to do away with it. Explicitly scramble sort order if on the + * default hierarchy so that no such expectation exists in the new + * interface. * * Scrambling is done by swapping every two consecutive bits, which is * non-identity one-to-one mapping which disturbs sort order sufficiently. @@ -3780,7 +3781,7 @@ static pid_t pid_fry(pid_t pid) static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid) { - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) return pid_fry(pid); else return pid; @@ -3883,7 +3884,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, css_task_iter_end(&it); length = n; /* now sort & (if procs) strip out duplicates */ - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) sort(array, length, sizeof(pid_t), fried_cmppid, NULL); else sort(array, length, sizeof(pid_t), cmppid, NULL); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f6b33c6..f9d4807 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1383,12 +1383,9 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, mutex_lock(&cpuset_mutex); - /* - * We allow to move tasks into an empty cpuset if sane_behavior - * flag is set. - */ + /* allow moving tasks into an empty cpuset if on default hierarchy */ ret = -ENOSPC; - if (!cgroup_sane_behavior(css->cgroup) && + if (!cgroup_on_dfl(css->cgroup) && (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) goto out_unlock; @@ -2030,7 +2027,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) static cpumask_t off_cpus; static nodemask_t off_mems; bool is_empty; - bool sane = cgroup_sane_behavior(cs->css.cgroup); + bool on_dfl = cgroup_on_dfl(cs->css.cgroup); retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2054,12 +2051,12 @@ retry: mutex_unlock(&callback_mutex); /* - * If sane_behavior flag is set, we need to update tasks' cpumask - * for empty cpuset to take on ancestor's cpumask. Otherwise, don't - * call update_tasks_cpumask() if the cpuset becomes empty, as - * the tasks in it will be migrated to an ancestor. + * If on_dfl, we need to update tasks' cpumask for empty cpuset to + * take on ancestor's cpumask. Otherwise, don't call + * update_tasks_cpumask() if the cpuset becomes empty, as the tasks + * in it will be migrated to an ancestor. */ - if ((sane && cpumask_empty(cs->cpus_allowed)) || + if ((on_dfl && cpumask_empty(cs->cpus_allowed)) || (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) update_tasks_cpumask(cs); @@ -2068,12 +2065,12 @@ retry: mutex_unlock(&callback_mutex); /* - * If sane_behavior flag is set, we need to update tasks' nodemask - * for empty cpuset to take on ancestor's nodemask. Otherwise, don't - * call update_tasks_nodemask() if the cpuset becomes empty, as - * the tasks in it will be migratd to an ancestor. + * If on_dfl, we need to update tasks' nodemask for empty cpuset to + * take on ancestor's nodemask. Otherwise, don't call + * update_tasks_nodemask() if the cpuset becomes empty, as the + * tasks in it will be migratd to an ancestor. */ - if ((sane && nodes_empty(cs->mems_allowed)) || + if ((on_dfl && nodes_empty(cs->mems_allowed)) || (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) update_tasks_nodemask(cs); @@ -2083,13 +2080,13 @@ retry: mutex_unlock(&cpuset_mutex); /* - * If sane_behavior flag is set, we'll keep tasks in empty cpusets. + * If on_dfl, we'll keep tasks in empty cpusets. * * Otherwise move tasks to the nearest ancestor with execution * resources. This is full cgroup operation which will * also call back into cpuset. Should be done outside any lock. */ - if (!sane && is_empty) + if (!on_dfl && is_empty) remove_tasks_in_empty_cpuset(cs); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index db536e9..a2a4bd6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7024,16 +7024,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css, /* * Cgroup retains root cgroups across [un]mount cycles making it necessary - * to verify sane_behavior flag on each mount attempt. + * to verify whether we're attached to the default hierarchy on each mount + * attempt. */ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) { /* - * use_hierarchy is forced with sane_behavior. cgroup core + * use_hierarchy is forced on the default hierarchy. cgroup core * guarantees that @root doesn't have any children, so turning it * on for the root memcg is enough. */ - if (cgroup_sane_behavior(root_css->cgroup)) + if (cgroup_on_dfl(root_css->cgroup)) mem_cgroup_from_css(root_css)->use_hierarchy = true; } |