From 667c24917144e34880f821486bf0a6e4d05a3a14 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:56 -0400 Subject: cgroup: introduce cgroup->subtree_control cgroup is implementing support for subsystem dependency which would require a way to enable a subsystem even when it's not directly configured through "cgroup.subtree_control". Previously, cgroup->child_subsys_mask directly reflected "cgroup.subtree_control" and the enabled subsystems in the child cgroups. This patch adds cgroup->subtree_control which "cgroup.subtree_control" operates on. cgroup->child_subsys_mask is now calculated from cgroup->subtree_control by cgroup_refresh_child_subsys_mask(), which sets it identical to cgroup->subtree_control for now. This will allow using cgroup->child_subsys_mask for all the enabled subsystems including the implicit ones and ->subtree_control for tracking the explicitly requested ones. This patch keeps the two masks identical and doesn't introduce any behavior changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- include/linux/cgroup.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8a111dd42d7a..8d52c8e5b510 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -203,7 +203,13 @@ struct cgroup { struct kernfs_node *kn; /* cgroup kernfs entry */ struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ - /* the bitmask of subsystems enabled on the child cgroups */ + /* + * The bitmask of subsystems enabled on the child cgroups. + * ->subtree_control is the one configured through + * "cgroup.subtree_control" while ->child_subsys_mask is the + * effective one which may have more subsystems enabled. + */ + unsigned int subtree_control; unsigned int child_subsys_mask; /* Private pointers for each registered subsystem */ -- cgit v1.2.3 From f63070d350e3562baa6196f1043e01cd8da2509a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:57 -0400 Subject: cgroup: make interface files visible iff enabled on cgroup->subtree_control cgroup is implementing support for subsystem dependency which would require a way to enable a subsystem even when it's not directly configured through "cgroup.subtree_control". The preceding patch distinguished cgroup->subtree_control and ->child_subsys_mask where the former is the subsystems explicitly configured by the userland and the latter is all enabled subsystems currently is equal to the former but will include subsystems implicitly enabled through dependency. Subsystems which are enabled due to dependency shouldn't be visible to userland. This patch updates cgroup_subtree_control_write() and create_css() such that interface files are not created for implicitly enabled subsytems. * @visible paramter is added to create_css(). Interface files are created only when true. * If an already implicitly enabled subsystem is turned on through "cgroup.subtree_control", the existing css should be used. css draining is skipped. * cgroup_subtree_control_write() computes the new target cgroup->child_subsys_mask and create/kill or show/hide csses accordingly. As the two subsystem masks are still kept identical, this patch doesn't introduce any behavior changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- include/linux/cgroup.h | 2 ++ kernel/cgroup.c | 78 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 14 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8d52c8e5b510..5287f931680a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -208,6 +208,8 @@ struct cgroup { * ->subtree_control is the one configured through * "cgroup.subtree_control" while ->child_subsys_mask is the * effective one which may have more subsystems enabled. + * Controller knobs are made available iff it's enabled in + * ->subtree_control. */ unsigned int subtree_control; unsigned int child_subsys_mask; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 14a9d88eacf9..331fa296c7e0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -186,7 +186,8 @@ static void cgroup_put(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask); static int cgroup_destroy_locked(struct cgroup *cgrp); -static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss); +static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, + bool visible); static void css_release(struct percpu_ref *ref); static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], @@ -2577,6 +2578,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, loff_t off) { unsigned int enable = 0, disable = 0; + unsigned int css_enable, css_disable, old_ctrl, new_ctrl; struct cgroup *cgrp, *child; struct cgroup_subsys *ss; char *tok; @@ -2629,6 +2631,13 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, goto out_unlock; } + /* + * @ss is already enabled through dependency and + * we'll just make it visible. Skip draining. + */ + if (cgrp->child_subsys_mask & (1 << ssid)) + continue; + /* * Because css offlining is asynchronous, userland * might try to re-enable the same controller while @@ -2681,17 +2690,39 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, goto out_unlock; } + /* + * Update subsys masks and calculate what needs to be done. More + * subsystems than specified may need to be enabled or disabled + * depending on subsystem dependencies. + */ cgrp->subtree_control |= enable; cgrp->subtree_control &= ~disable; + + old_ctrl = cgrp->child_subsys_mask; cgroup_refresh_child_subsys_mask(cgrp); + new_ctrl = cgrp->child_subsys_mask; + + css_enable = ~old_ctrl & new_ctrl; + css_disable = old_ctrl & ~new_ctrl; + enable |= css_enable; + disable |= css_disable; - /* create new csses */ + /* + * Create new csses or make the existing ones visible. A css is + * created invisible if it's being implicitly enabled through + * dependency. An invisible css is made visible when the userland + * explicitly enables it. + */ for_each_subsys(ss, ssid) { if (!(enable & (1 << ssid))) continue; cgroup_for_each_live_child(child, cgrp) { - ret = create_css(child, ss); + if (css_enable & (1 << ssid)) + ret = create_css(child, ss, + cgrp->subtree_control & (1 << ssid)); + else + ret = cgroup_populate_dir(child, 1 << ssid); if (ret) goto err_undo_css; } @@ -2706,13 +2737,21 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, if (ret) goto err_undo_css; - /* all tasks are now migrated away from the old csses, kill them */ + /* + * All tasks are migrated out of disabled csses. Kill or hide + * them. A css is hidden when the userland requests it to be + * disabled while other subsystems are still depending on it. + */ for_each_subsys(ss, ssid) { if (!(disable & (1 << ssid))) continue; - cgroup_for_each_live_child(child, cgrp) - kill_css(cgroup_css(child, ss)); + cgroup_for_each_live_child(child, cgrp) { + if (css_disable & (1 << ssid)) + kill_css(cgroup_css(child, ss)); + else + cgroup_clear_dir(child, 1 << ssid); + } } kernfs_activate(cgrp->kn); @@ -2732,8 +2771,14 @@ err_undo_css: cgroup_for_each_live_child(child, cgrp) { struct cgroup_subsys_state *css = cgroup_css(child, ss); - if (css) + + if (!css) + continue; + + if (css_enable & (1 << ssid)) kill_css(css); + else + cgroup_clear_dir(child, 1 << ssid); } } goto out_unlock; @@ -4282,12 +4327,14 @@ static void offline_css(struct cgroup_subsys_state *css) * create_css - create a cgroup_subsys_state * @cgrp: the cgroup new css will be associated with * @ss: the subsys of new css + * @visible: whether to create control knobs for the new css or not * * Create a new css associated with @cgrp - @ss pair. On success, the new - * css is online and installed in @cgrp with all interface files created. - * Returns 0 on success, -errno on failure. + * css is online and installed in @cgrp with all interface files created if + * @visible. Returns 0 on success, -errno on failure. */ -static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) +static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, + bool visible) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss); @@ -4311,9 +4358,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) goto err_free_percpu_ref; css->id = err; - err = cgroup_populate_dir(cgrp, 1 << ss->id); - if (err) - goto err_free_id; + if (visible) { + err = cgroup_populate_dir(cgrp, 1 << ss->id); + if (err) + goto err_free_id; + } /* @css is ready to be brought online now, make it visible */ list_add_tail_rcu(&css->sibling, &parent_css->children); @@ -4430,7 +4479,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, /* let's create and online css's */ for_each_subsys(ss, ssid) { if (parent->child_subsys_mask & (1 << ssid)) { - ret = create_css(cgrp, ss); + ret = create_css(cgrp, ss, + parent->subtree_control & (1 << ssid)); if (ret) goto out_destroy; } -- cgit v1.2.3 From b4536f0cab2b18414e26101a2b9d484c5cbea0c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:57 -0400 Subject: cgroup: implement cgroup_subsys->css_reset() cgroup is implementing support for subsystem dependency which would require a way to enable a subsystem even when it's not directly configured through "cgroup.subtree_control". The previous patches added support for explicitly and implicitly enabled subsystems and showing/hiding their interface files. An explicitly enabled subsystem may become implicitly enabled if it's turned off through "cgroup.subtree_control" but there are subsystems depending on it. In such cases, the subsystem, as it's turned off when seen from userland, shouldn't enforce any resource control. Also, the subsystem may be explicitly turned on later again and its interface files should be as close to the intial state as possible. This patch adds cgroup_subsys->css_reset() which is invoked when a css is hidden. The callback should disable resource control and reset the state to the vanilla state. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- Documentation/cgroups/cgroups.txt | 14 ++++++++++++++ include/linux/cgroup.h | 1 + kernel/cgroup.c | 16 ++++++++++++---- 3 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 821de56d1580..10c949b293e4 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -599,6 +599,20 @@ fork. If this method returns 0 (success) then this should remain valid while the caller holds cgroup_mutex and it is ensured that either attach() or cancel_attach() will be called in future. +void css_reset(struct cgroup_subsys_state *css) +(cgroup_mutex held by caller) + +An optional operation which should restore @css's configuration to the +initial state. This is currently only used on the unified hierarchy +when a subsystem is disabled on a cgroup through +"cgroup.subtree_control" but should remain enabled because other +subsystems depend on it. cgroup core makes such a css invisible by +removing the associated interface files and invokes this callback so +that the hidden subsystem can return to the initial neutral state. +This prevents unexpected resource control from a hidden css and +ensures that the configuration is in the initial state when it is made +visible again later. + void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5287f931680a..db99e3b923b1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -642,6 +642,7 @@ struct cgroup_subsys { int (*css_online)(struct cgroup_subsys_state *css); void (*css_offline)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); + void (*css_reset)(struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_subsys_state *css, struct cgroup_taskset *tset); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 331fa296c7e0..3a6b77d7ba4a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2740,17 +2740,25 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, /* * All tasks are migrated out of disabled csses. Kill or hide * them. A css is hidden when the userland requests it to be - * disabled while other subsystems are still depending on it. + * disabled while other subsystems are still depending on it. The + * css must not actively control resources and be in the vanilla + * state if it's made visible again later. Controllers which may + * be depended upon should provide ->css_reset() for this purpose. */ for_each_subsys(ss, ssid) { if (!(disable & (1 << ssid))) continue; cgroup_for_each_live_child(child, cgrp) { - if (css_disable & (1 << ssid)) - kill_css(cgroup_css(child, ss)); - else + struct cgroup_subsys_state *css = cgroup_css(child, ss); + + if (css_disable & (1 << ssid)) { + kill_css(css); + } else { cgroup_clear_dir(child, 1 << ssid); + if (ss->css_reset) + ss->css_reset(css); + } } } -- cgit v1.2.3 From af0ba6789c8e43518635606d0af1ff475ba7471a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:57 -0400 Subject: cgroup: implement cgroup_subsys->depends_on Currently, the blkio subsystem attributes all of writeback IOs to the root. One of the issues is that there's no way to tell who originated a writeback IO from block layer. Those IOs are usually issued asynchronously from a task which didn't have anything to do with actually generating the dirty pages. The memory subsystem, when enabled, already keeps track of the ownership of each dirty page and it's desirable for blkio to piggyback instead of adding its own per-page tag. blkio piggybacking on memory is an implementation detail which preferably should be handled automatically without requiring explicit userland action. To achieve that, this patch implements cgroup_subsys->depends_on which contains the mask of subsystems which should be enabled together when the subsystem is enabled. The previous patches already implemented the support for enabled but invisible subsystems and cgroup_subsys->depends_on can be easily implemented by updating cgroup_refresh_child_subsys_mask() so that it calculates cgroup->child_subsys_mask considering cgroup_subsys->depends_on of the explicitly enabled subsystems. Documentation/cgroups/unified-hierarchy.txt is updated to explain that subsystems may not become immediately available after being unused from userland and that dependency could be a factor in it. As subsystems may already keep residual references, this doesn't significantly change how subsystem rebinding can be used. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- Documentation/cgroups/unified-hierarchy.txt | 23 ++++++++++++-- include/linux/cgroup.h | 9 ++++++ kernel/cgroup.c | 49 ++++++++++++++++++++++++++++- 3 files changed, 77 insertions(+), 4 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt index 324b182e6000..a7a2205539a7 100644 --- a/Documentation/cgroups/unified-hierarchy.txt +++ b/Documentation/cgroups/unified-hierarchy.txt @@ -97,9 +97,26 @@ change soon. All controllers which are not bound to other hierarchies are automatically bound to unified hierarchy and show up at the root of it. Controllers which are enabled only in the root of unified -hierarchy can be bound to other hierarchies at any time. This allows -mixing unified hierarchy with the traditional multiple hierarchies in -a fully backward compatible way. +hierarchy can be bound to other hierarchies. This allows mixing +unified hierarchy with the traditional multiple hierarchies in a fully +backward compatible way. + +A controller can be moved across hierarchies only after the controller +is no longer referenced in its current hierarchy. Because per-cgroup +controller states are destroyed asynchronously and controllers may +have lingering references, a controller may not show up immediately on +the unified hierarchy after the final umount of the previous +hierarchy. Similarly, a controller should be fully disabled to be +moved out of the unified hierarchy and it may take some time for the +disabled controller to become available for other hierarchies; +furthermore, due to dependencies among controllers, other controllers +may need to be disabled too. + +While useful for development and manual configurations, dynamically +moving controllers between the unified and other hierarchies is +strongly discouraged for production use. It is recommended to decide +the hierarchies and controller associations before starting using the +controllers. 2-2. cgroup.subtree_control diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index db99e3b923b1..28853e771f3b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -693,6 +693,15 @@ struct cgroup_subsys { /* base cftypes, automatically registered with subsys itself */ struct cftype *base_cftypes; + + /* + * A subsystem may depend on other subsystems. When such subsystem + * is enabled on a cgroup, the depended-upon subsystems are enabled + * together if available. Subsystems enabled due to dependency are + * not visible to userland until explicitly enabled. The following + * specifies the mask of subsystems that this one depends on. + */ + unsigned int depends_on; }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3a6b77d7ba4a..cd02e99d5d3b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1037,9 +1037,56 @@ static void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +/** + * cgroup_refresh_child_subsys_mask - update child_subsys_mask + * @cgrp: the target cgroup + * + * On the default hierarchy, a subsystem may request other subsystems to be + * enabled together through its ->depends_on mask. In such cases, more + * subsystems than specified in "cgroup.subtree_control" may be enabled. + * + * This function determines which subsystems need to be enabled given the + * current @cgrp->subtree_control and records it in + * @cgrp->child_subsys_mask. The resulting mask is always a superset of + * @cgrp->subtree_control and follows the usual hierarchy rules. + */ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) { - cgrp->child_subsys_mask = cgrp->subtree_control; + struct cgroup *parent = cgroup_parent(cgrp); + unsigned int cur_ss_mask = cgrp->subtree_control; + struct cgroup_subsys *ss; + int ssid; + + lockdep_assert_held(&cgroup_mutex); + + if (!cgroup_on_dfl(cgrp)) { + cgrp->child_subsys_mask = cur_ss_mask; + return; + } + + while (true) { + unsigned int new_ss_mask = cur_ss_mask; + + for_each_subsys(ss, ssid) + if (cur_ss_mask & (1 << ssid)) + new_ss_mask |= ss->depends_on; + + /* + * Mask out subsystems which aren't available. This can + * happen only if some depended-upon subsystems were bound + * to non-default hierarchies. + */ + if (parent) + new_ss_mask &= parent->child_subsys_mask; + else + new_ss_mask &= cgrp->root->subsys_mask; + + if (new_ss_mask == cur_ss_mask) + break; + cur_ss_mask = new_ss_mask; + } + + cgrp->child_subsys_mask = cur_ss_mask; } /** -- cgit v1.2.3 From 7450e90bbb8d834c190cc8100d1cc41888358c7c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jul 2014 10:08:07 -0400 Subject: cgroup: remove CGRP_ROOT_OPTION_MASK cgroup_root->flags only contains CGRP_ROOT_* flags and there's no reason to mask the flags. Remove CGRP_ROOT_OPTION_MASK. This doesn't cause any behavior differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 3 --- kernel/cgroup.c | 7 +++---- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 28853e771f3b..c4901c19668b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -320,9 +320,6 @@ enum { CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ - - /* mount options live below bit 16 */ - CGRP_ROOT_OPTION_MASK = (1 << 16) - 1, }; /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cd02e99d5d3b..5411fffa4b70 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1490,11 +1490,10 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) removed_mask = root->subsys_mask & ~opts.subsys_mask; /* Don't allow flags or name to change at remount */ - if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) || + if ((opts.flags ^ root->flags) || (opts.name && strcmp(opts.name, root->name))) { pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", - opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "", - root->flags & CGRP_ROOT_OPTION_MASK, root->name); + opts.flags, opts.name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; } @@ -1762,7 +1761,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, goto out_unlock; } - if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) { + if (root->flags ^ opts.flags) { if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { pr_err("sane_behavior: new mount options should match the existing superblock\n"); ret = -EINVAL; -- cgit v1.2.3 From aa6ec29bee8692ce232132f1a1ea2a1f9196610e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jul 2014 10:08:08 -0400 Subject: cgroup: remove sane_behavior support on non-default hierarchies sane_behavior has been used as a development vehicle for the default unified hierarchy. Now that the default hierarchy is in place, the flag became redundant and confusing as its usage is allowed on all hierarchies. There are gonna be either the default hierarchy or legacy ones. Let's make that clear by removing sane_behavior support on non-default hierarchies. This patch replaces cgroup_sane_behavior() with cgroup_on_dfl(). The comment on top of CGRP_ROOT_SANE_BEHAVIOR is moved to on top of cgroup_on_dfl() with sane_behavior specific part dropped. On the default and legacy hierarchies w/o sane_behavior, this shouldn't cause any behavior differences. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko --- block/blk-throttle.c | 6 +-- include/linux/cgroup.h | 125 +++++++++++++++++++++---------------------------- kernel/cgroup.c | 19 ++++---- kernel/cpuset.c | 33 ++++++------- mm/memcontrol.c | 7 +-- 5 files changed, 86 insertions(+), 104 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 3fdb21a390c1..9273d0969ebd 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -412,13 +412,13 @@ static void throtl_pd_init(struct blkcg_gq *blkg) int rw; /* - * If sane_hierarchy is enabled, we switch to properly hierarchical + * If on the default hierarchy, we switch to properly hierarchical * behavior where limits on a given throtl_grp are applied to the * whole subtree rather than just the group itself. e.g. If 16M * read_bps limit is set on the root group, the whole system can't * exceed 16M for the device. * - * If sane_hierarchy is not enabled, the broken flat hierarchy + * If not on the default hierarchy, the broken flat hierarchy * behavior is retained where all throtl_grps are treated as if * they're all separate root groups right below throtl_data. * Limits of a group don't interact with limits of other groups @@ -426,7 +426,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg) */ parent_sq = &td->service_queue; - if (cgroup_sane_behavior(blkg->blkcg->css.cgroup) && blkg->parent) + if (cgroup_on_dfl(blkg->blkcg->css.cgroup) && blkg->parent) parent_sq = &blkg_to_tg(blkg->parent)->service_queue; throtl_service_queue_init(&tg->service_queue, parent_sq); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c4901c19668b..7bb274487c89 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -256,68 +256,7 @@ struct cgroup { /* cgroup_root->flags */ enum { - /* - * Unfortunately, cgroup core and various controllers are riddled - * with idiosyncrasies and pointless options. The following flag, - * when set, will force sane behavior - some options are forced on, - * others are disallowed, and some controllers will change their - * hierarchical or other behaviors. - * - * The set of behaviors affected by this flag are still being - * determined and developed and the mount option for this flag is - * prefixed with __DEVEL__. The prefix will be dropped once we - * reach the point where all behaviors are compatible with the - * planned unified hierarchy, which will automatically turn on this - * flag. - * - * The followings are the behaviors currently affected this flag. - * - * - Mount options "noprefix", "xattr", "clone_children", - * "release_agent" and "name" are disallowed. - * - * - When mounting an existing superblock, mount options should - * match. - * - * - Remount is disallowed. - * - * - rename(2) is disallowed. - * - * - "tasks" is removed. Everything should be at process - * granularity. Use "cgroup.procs" instead. - * - * - "cgroup.procs" is not sorted. pids will be unique unless they - * got recycled inbetween reads. - * - * - "release_agent" and "notify_on_release" are removed. - * Replacement notification mechanism will be implemented. - * - * - "cgroup.clone_children" is removed. - * - * - "cgroup.subtree_populated" is available. Its value is 0 if - * the cgroup and its descendants contain no task; otherwise, 1. - * The file also generates kernfs notification which can be - * monitored through poll and [di]notify when the value of the - * file changes. - * - * - If mount is requested with sane_behavior but without any - * subsystem, the default unified hierarchy is mounted. - * - * - cpuset: tasks will be kept in empty cpusets when hotplug happens - * and take masks of ancestors with non-empty cpus/mems, instead of - * being moved to an ancestor. - * - * - cpuset: a task can be moved into an empty cpuset, and again it - * takes masks of ancestors. - * - * - memcg: use_hierarchy is on by default and the cgroup file for - * the flag is not created. - * - * - blkcg: blk-throttle becomes properly hierarchical. - * - * - debug: disallowed on the default hierarchy. - */ - CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), - + CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ }; @@ -531,20 +470,64 @@ struct cftype { extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; +/** + * cgroup_on_dfl - test whether a cgroup is on the default hierarchy + * @cgrp: the cgroup of interest + * + * The default hierarchy is the v2 interface of cgroup and this function + * can be used to test whether a cgroup is on the default hierarchy for + * cases where a subsystem should behave differnetly depending on the + * interface version. + * + * The set of behaviors which change on the default hierarchy are still + * being determined and the mount option is prefixed with __DEVEL__. + * + * List of changed behaviors: + * + * - Mount options "noprefix", "xattr", "clone_children", "release_agent" + * and "name" are disallowed. + * + * - When mounting an existing superblock, mount options should match. + * + * - Remount is disallowed. + * + * - rename(2) is disallowed. + * + * - "tasks" is removed. Everything should be at process granularity. Use + * "cgroup.procs" instead. + * + * - "cgroup.procs" is not sorted. pids will be unique unless they got + * recycled inbetween reads. + * + * - "release_agent" and "notify_on_release" are removed. Replacement + * notification mechanism will be implemented. + * + * - "cgroup.clone_children" is removed. + * + * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup + * and its descendants contain no task; otherwise, 1. The file also + * generates kernfs notification which can be monitored through poll and + * [di]notify when the value of the file changes. + * + * - cpuset: tasks will be kept in empty cpusets when hotplug happens and + * take masks of ancestors with non-empty cpus/mems, instead of being + * moved to an ancestor. + * + * - cpuset: a task can be moved into an empty cpuset, and again it takes + * masks of ancestors. + * + * - memcg: use_hierarchy is on by default and the cgroup file for the flag + * is not created. + * + * - blkcg: blk-throttle becomes properly hierarchical. + * + * - debug: disallowed on the default hierarchy. + */ static inline bool cgroup_on_dfl(const struct cgroup *cgrp) { return cgrp->root == &cgrp_dfl_root; } -/* - * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This - * function can be called as long as @cgrp is accessible. - */ -static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) -{ - return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; -} - /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_has_tasks(struct cgroup *cgrp) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0ea54af6b133..fb07c6d43aff 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1470,8 +1470,8 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) struct cgroup_sb_opts opts; unsigned int added_mask, removed_mask; - if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) { - pr_err("sane_behavior: remount is not allowed\n"); + if (root == &cgrp_dfl_root) { + pr_err("remount is not allowed\n"); return -EINVAL; } @@ -2943,9 +2943,9 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, /* * This isn't a proper migration and its usefulness is very - * limited. Disallow if sane_behavior. + * limited. Disallow on the default hierarchy. */ - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) return -EPERM; /* @@ -3031,7 +3031,7 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], /* does cft->flags tell us to skip this file on @cgrp? */ if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; - if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp)) + if ((cft->flags & CFTYPE_INSANE) && cgroup_on_dfl(cgrp)) continue; if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) continue; @@ -3764,8 +3764,9 @@ after: * * All this extra complexity was caused by the original implementation * committing to an entirely unnecessary property. In the long term, we - * want to do away with it. Explicitly scramble sort order if - * sane_behavior so that no such expectation exists in the new interface. + * want to do away with it. Explicitly scramble sort order if on the + * default hierarchy so that no such expectation exists in the new + * interface. * * Scrambling is done by swapping every two consecutive bits, which is * non-identity one-to-one mapping which disturbs sort order sufficiently. @@ -3780,7 +3781,7 @@ static pid_t pid_fry(pid_t pid) static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid) { - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) return pid_fry(pid); else return pid; @@ -3883,7 +3884,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, css_task_iter_end(&it); length = n; /* now sort & (if procs) strip out duplicates */ - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) sort(array, length, sizeof(pid_t), fried_cmppid, NULL); else sort(array, length, sizeof(pid_t), cmppid, NULL); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f6b33c696224..f9d4807c869f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1383,12 +1383,9 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, mutex_lock(&cpuset_mutex); - /* - * We allow to move tasks into an empty cpuset if sane_behavior - * flag is set. - */ + /* allow moving tasks into an empty cpuset if on default hierarchy */ ret = -ENOSPC; - if (!cgroup_sane_behavior(css->cgroup) && + if (!cgroup_on_dfl(css->cgroup) && (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) goto out_unlock; @@ -2030,7 +2027,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) static cpumask_t off_cpus; static nodemask_t off_mems; bool is_empty; - bool sane = cgroup_sane_behavior(cs->css.cgroup); + bool on_dfl = cgroup_on_dfl(cs->css.cgroup); retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2054,12 +2051,12 @@ retry: mutex_unlock(&callback_mutex); /* - * If sane_behavior flag is set, we need to update tasks' cpumask - * for empty cpuset to take on ancestor's cpumask. Otherwise, don't - * call update_tasks_cpumask() if the cpuset becomes empty, as - * the tasks in it will be migrated to an ancestor. + * If on_dfl, we need to update tasks' cpumask for empty cpuset to + * take on ancestor's cpumask. Otherwise, don't call + * update_tasks_cpumask() if the cpuset becomes empty, as the tasks + * in it will be migrated to an ancestor. */ - if ((sane && cpumask_empty(cs->cpus_allowed)) || + if ((on_dfl && cpumask_empty(cs->cpus_allowed)) || (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) update_tasks_cpumask(cs); @@ -2068,12 +2065,12 @@ retry: mutex_unlock(&callback_mutex); /* - * If sane_behavior flag is set, we need to update tasks' nodemask - * for empty cpuset to take on ancestor's nodemask. Otherwise, don't - * call update_tasks_nodemask() if the cpuset becomes empty, as - * the tasks in it will be migratd to an ancestor. + * If on_dfl, we need to update tasks' nodemask for empty cpuset to + * take on ancestor's nodemask. Otherwise, don't call + * update_tasks_nodemask() if the cpuset becomes empty, as the + * tasks in it will be migratd to an ancestor. */ - if ((sane && nodes_empty(cs->mems_allowed)) || + if ((on_dfl && nodes_empty(cs->mems_allowed)) || (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) update_tasks_nodemask(cs); @@ -2083,13 +2080,13 @@ retry: mutex_unlock(&cpuset_mutex); /* - * If sane_behavior flag is set, we'll keep tasks in empty cpusets. + * If on_dfl, we'll keep tasks in empty cpusets. * * Otherwise move tasks to the nearest ancestor with execution * resources. This is full cgroup operation which will * also call back into cpuset. Should be done outside any lock. */ - if (!sane && is_empty) + if (!on_dfl && is_empty) remove_tasks_in_empty_cpuset(cs); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index db536e90c8ee..a2a4bd69a7ae 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7024,16 +7024,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css, /* * Cgroup retains root cgroups across [un]mount cycles making it necessary - * to verify sane_behavior flag on each mount attempt. + * to verify whether we're attached to the default hierarchy on each mount + * attempt. */ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) { /* - * use_hierarchy is forced with sane_behavior. cgroup core + * use_hierarchy is forced on the default hierarchy. cgroup core * guarantees that @root doesn't have any children, so turning it * on for the root memcg is enough. */ - if (cgroup_sane_behavior(root_css->cgroup)) + if (cgroup_on_dfl(root_css->cgroup)) mem_cgroup_from_css(root_css)->use_hierarchy = true; } -- cgit v1.2.3 From 5577964e64692e17cc498854b7e0833e6532cd64 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:09 -0400 Subject: cgroup: rename cgroup_subsys->base_cftypes to ->legacy_cftypes Currently, cgroup_subsys->base_cftypes is used for both the unified default hierarchy and legacy ones and subsystems can mark each file with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE if it has to appear only on one of them. This is quite hairy and error-prone. Also, we may end up exposing interface files to the default hierarchy without thinking it through. cgroup_subsys will grow two separate cftype arrays and apply each only on the hierarchies of the matching type. This will allow organizing cftypes in a lot clearer way and encourage subsystems to scrutinize the interface which is being exposed in the new default hierarchy. In preparation, this patch renames cgroup_subsys->base_cftypes to cgroup_subsys->legacy_cftypes. This patch is pure rename. Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: Vivek Goyal Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Aristeu Rozanski Cc: Aneesh Kumar K.V --- block/blk-cgroup.c | 2 +- include/linux/cgroup.h | 2 +- kernel/cgroup.c | 4 ++-- kernel/cgroup_freezer.c | 2 +- kernel/cpuset.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/cpuacct.c | 2 +- mm/memcontrol.c | 2 +- net/core/netclassid_cgroup.c | 2 +- net/core/netprio_cgroup.c | 2 +- security/device_cgroup.c | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 63c3cd454d1e..5cfbc723041c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -924,7 +924,7 @@ struct cgroup_subsys blkio_cgrp_subsys = { .css_offline = blkcg_css_offline, .css_free = blkcg_css_free, .can_attach = blkcg_can_attach, - .base_cftypes = blkcg_files, + .legacy_cftypes = blkcg_files, #ifdef CONFIG_MEMCG /* * This ensures that, if available, memcg is automatically enabled diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7bb274487c89..a6e9c2eeab89 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -672,7 +672,7 @@ struct cgroup_subsys { struct list_head cfts; /* base cftypes, automatically registered with subsys itself */ - struct cftype *base_cftypes; + struct cftype *legacy_cftypes; /* used on the legacy hierarchies */ /* * A subsystem may depend on other subsystems. When such subsystem diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7e5fee5d6422..6496a83b0314 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4890,7 +4890,7 @@ int __init cgroup_init(void) */ if (!ss->disabled) { cgrp_dfl_root.subsys_mask |= 1 << ss->id; - WARN_ON(cgroup_add_cftypes(ss, ss->base_cftypes)); + WARN_ON(cgroup_add_cftypes(ss, ss->legacy_cftypes)); } } @@ -5480,6 +5480,6 @@ static struct cftype debug_files[] = { struct cgroup_subsys debug_cgrp_subsys = { .css_alloc = debug_css_alloc, .css_free = debug_css_free, - .base_cftypes = debug_files, + .legacy_cftypes = debug_files, }; #endif /* CONFIG_CGROUP_DEBUG */ diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index a79e40f9d700..92b98cc0ee76 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -480,5 +480,5 @@ struct cgroup_subsys freezer_cgrp_subsys = { .css_free = freezer_css_free, .attach = freezer_attach, .fork = freezer_fork, - .base_cftypes = files, + .legacy_cftypes = files, }; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 53a9bbf16391..f337f42a07ac 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2036,7 +2036,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, .bind = cpuset_bind, - .base_cftypes = files, + .legacy_cftypes = files, .early_init = 1, }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494fe..6628e8014824 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8088,7 +8088,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, .exit = cpu_cgroup_exit, - .base_cftypes = cpu_files, + .legacy_cftypes = cpu_files, .early_init = 1, }; diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 9cf350c94ec4..dd7cbb55bbf2 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -278,6 +278,6 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) struct cgroup_subsys cpuacct_cgrp_subsys = { .css_alloc = cpuacct_css_alloc, .css_free = cpuacct_css_free, - .base_cftypes = files, + .legacy_cftypes = files, .early_init = 1, }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a2a4bd69a7ae..8331f2739e2c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7048,7 +7048,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, .bind = mem_cgroup_bind, - .base_cftypes = mem_cgroup_files, + .legacy_cftypes = mem_cgroup_files, .early_init = 0, }; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 30d903b19c62..1f2a126f4ffa 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -107,5 +107,5 @@ struct cgroup_subsys net_cls_cgrp_subsys = { .css_online = cgrp_css_online, .css_free = cgrp_css_free, .attach = cgrp_attach, - .base_cftypes = ss_files, + .legacy_cftypes = ss_files, }; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 2f385b9bccc0..cbd0a199bf52 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -249,7 +249,7 @@ struct cgroup_subsys net_prio_cgrp_subsys = { .css_online = cgrp_css_online, .css_free = cgrp_css_free, .attach = net_prio_attach, - .base_cftypes = ss_files, + .legacy_cftypes = ss_files, }; static int netprio_device_event(struct notifier_block *unused, diff --git a/security/device_cgroup.c b/security/device_cgroup.c index d9d69e6930ed..188c1d26393b 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -796,7 +796,7 @@ struct cgroup_subsys devices_cgrp_subsys = { .css_free = devcgroup_css_free, .css_online = devcgroup_online, .css_offline = devcgroup_offline, - .base_cftypes = dev_cgroup_files, + .legacy_cftypes = dev_cgroup_files, }; /** -- cgit v1.2.3 From 2cf669a58dc08fa065a8bd0dca866c0e6cb358cc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:09 -0400 Subject: cgroup: replace cgroup_add_cftypes() with cgroup_add_legacy_cftypes() Currently, cftypes added by cgroup_add_cftypes() are used for both the unified default hierarchy and legacy ones and subsystems can mark each file with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE if it has to appear only on one of them. This is quite hairy and error-prone. Also, we may end up exposing interface files to the default hierarchy without thinking it through. cgroup_subsys will grow two separate cftype addition functions and apply each only on the hierarchies of the matching type. This will allow organizing cftypes in a lot clearer way and encourage subsystems to scrutinize the interface which is being exposed in the new default hierarchy. In preparation, this patch adds cgroup_add_legacy_cftypes() which currently is a simple wrapper around cgroup_add_cftypes() and replaces all cgroup_add_cftypes() usages with it. While at it, this patch drops a completely spurious return from __hugetlb_cgroup_file_init(). This patch doesn't introduce any functional differences. Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: Aneesh Kumar K.V --- block/blk-cgroup.c | 3 ++- include/linux/cgroup.h | 2 +- kernel/cgroup.c | 7 ++++++- mm/hugetlb_cgroup.c | 5 ++--- mm/memcontrol.c | 3 ++- net/ipv4/tcp_memcontrol.c | 2 +- 6 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5cfbc723041c..2541cf043ba8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1124,7 +1124,8 @@ int __init blkcg_policy_register(struct blkcg_policy *pol) /* everything is in place, add intf files for the new policy */ if (pol->cftypes) - WARN_ON(cgroup_add_cftypes(&blkio_cgrp_subsys, pol->cftypes)); + WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys, + pol->cftypes)); ret = 0; out_unlock: mutex_unlock(&blkcg_pol_mutex); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a6e9c2eeab89..f5f0feef2701 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -590,7 +590,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); -int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6496a83b0314..c275aa439a6f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3170,7 +3170,7 @@ int cgroup_rm_cftypes(struct cftype *cfts) * function currently returns 0 as long as @cfts registration is successful * even if some file creation attempts on existing cgroups fail. */ -int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) +static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { int ret; @@ -3195,6 +3195,11 @@ int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) return ret; } +int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) +{ + return cgroup_add_cftypes(ss, cfts); +} + /** * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 493f758445e7..9aae6f47433f 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -358,9 +358,8 @@ static void __init __hugetlb_cgroup_file_init(int idx) cft = &h->cgroup_files[4]; memset(cft, 0, sizeof(*cft)); - WARN_ON(cgroup_add_cftypes(&hugetlb_cgrp_subsys, h->cgroup_files)); - - return; + WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, + h->cgroup_files)); } void __init hugetlb_cgroup_file_init(void) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8331f2739e2c..b6b3c6fea509 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7065,7 +7065,8 @@ __setup("swapaccount=", enable_swap_account); static void __init memsw_file_init(void) { - WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, memsw_cgroup_files)); + WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, + memsw_cgroup_files)); } static void __init enable_swap_cgroup(void) diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index f7a2ec3ac584..3af522622fad 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -222,7 +222,7 @@ static struct cftype tcp_files[] = { static int __init tcp_memcontrol_init(void) { - WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files)); + WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files)); return 0; } __initcall(tcp_memcontrol_init); -- cgit v1.2.3 From a8ddc8215e1a4cd9dc5d6210811cfc381a489ec2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:10 -0400 Subject: cgroup: distinguish the default and legacy hierarchies when handling cftypes Until now, cftype arrays carried files for both the default and legacy hierarchies and the files which needed to be used on only one of them were flagged with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE. This gets confusing very quickly and we may end up exposing interface files to the default hierarchy without thinking it through. This patch makes cgroup core provide separate sets of interfaces for cftype handling so that the cftypes for the default and legacy hierarchies are clearly distinguished. The previous two patches renamed the existing ones so that they clearly indicate that they're for the legacy hierarchies. This patch adds the interface for the default hierarchy and apply them selectively depending on the hierarchy type. * cftypes added through cgroup_subsys->dfl_cftypes and cgroup_add_dfl_cftypes() only show up on the default hierarchy. * cftypes added through cgroup_subsys->legacy_cftypes and cgroup_add_legacy_cftypes() only show up on the legacy hierarchies. * cgroup_subsys->dfl_cftypes and ->legacy_cftypes can point to the same array for the cases where the interface files are identical on both types of hierarchies. * This makes all the existing subsystem interface files legacy-only by default and all subsystems will have no interface file created when enabled on the default hierarchy. Each subsystem should explicitly review and compose the interface for the default hierarchy. * A boot param "cgroup__DEVEL__legacy_files_on_dfl" is added which makes subsystems which haven't decided the interface files for the default hierarchy to present the legacy files on the default hierarchy so that its behavior on the default hierarchy can be tested. As the awkward name suggests, this is for development only. * memcg's CFTYPE_INSANE on "use_hierarchy" is noop now as the whole array isn't used on the default hierarchy. The flag is removed. v2: Updated documentation for cgroup__DEVEL__legacy_files_on_dfl. v3: Clear CFTYPE_ONLY_ON_DFL and CFTYPE_INSANE when cfts are removed as suggested by Li. Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: Vivek Goyal Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Aristeu Rozanski Cc: Aneesh Kumar K.V --- Documentation/cgroups/unified-hierarchy.txt | 18 ++++++--- include/linux/cgroup.h | 9 ++++- kernel/cgroup.c | 62 +++++++++++++++++++++++++++-- mm/memcontrol.c | 1 - 4 files changed, 78 insertions(+), 12 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt index a7a2205539a7..4f4563277864 100644 --- a/Documentation/cgroups/unified-hierarchy.txt +++ b/Documentation/cgroups/unified-hierarchy.txt @@ -94,12 +94,18 @@ change soon. mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT -All controllers which are not bound to other hierarchies are -automatically bound to unified hierarchy and show up at the root of -it. Controllers which are enabled only in the root of unified -hierarchy can be bound to other hierarchies. This allows mixing -unified hierarchy with the traditional multiple hierarchies in a fully -backward compatible way. +All controllers which support the unified hierarchy and are not bound +to other hierarchies are automatically bound to unified hierarchy and +show up at the root of it. Controllers which are enabled only in the +root of unified hierarchy can be bound to other hierarchies. This +allows mixing unified hierarchy with the traditional multiple +hierarchies in a fully backward compatible way. + +For development purposes, the following boot parameter makes all +controllers to appear on the unified hierarchy whether supported or +not. + + cgroup__DEVEL__legacy_files_on_dfl A controller can be moved across hierarchies only after the controller is no longer referenced in its current hierarchy. Because per-cgroup diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f5f0feef2701..9f76236ac158 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -590,6 +590,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); +int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); @@ -671,8 +672,12 @@ struct cgroup_subsys { */ struct list_head cfts; - /* base cftypes, automatically registered with subsys itself */ - struct cftype *legacy_cftypes; /* used on the legacy hierarchies */ + /* + * Base cftypes which are automatically registered. The two can + * point to the same array. + */ + struct cftype *dfl_cftypes; /* for the default hierarchy */ + struct cftype *legacy_cftypes; /* for the legacy hierarchies */ /* * A subsystem may depend on other subsystems. When such subsystem diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c275aa439a6f..374ebdf74f35 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -149,6 +149,12 @@ struct cgroup_root cgrp_dfl_root; */ static bool cgrp_dfl_root_visible; +/* + * Set by the boot param of the same name and makes subsystems with NULL + * ->dfl_files to use ->legacy_files on the default hierarchy. + */ +static bool cgroup_legacy_files_on_dfl; + /* some controllers are not supported in the default hierarchy */ static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0 #ifdef CONFIG_CGROUP_DEBUG @@ -3085,6 +3091,9 @@ static void cgroup_exit_cftypes(struct cftype *cfts) kfree(cft->kf_ops); cft->kf_ops = NULL; cft->ss = NULL; + + /* revert flags set by cgroup core while adding @cfts */ + cft->flags &= ~(CFTYPE_ONLY_ON_DFL | CFTYPE_INSANE); } } @@ -3195,8 +3204,37 @@ static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) return ret; } +/** + * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy + * @ss: target cgroup subsystem + * @cfts: zero-length name terminated array of cftypes + * + * Similar to cgroup_add_cftypes() but the added files are only used for + * the default hierarchy. + */ +int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) +{ + struct cftype *cft; + + for (cft = cfts; cft && cft->name[0] != '\0'; cft++) + cft->flags |= CFTYPE_ONLY_ON_DFL; + return cgroup_add_cftypes(ss, cfts); +} + +/** + * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies + * @ss: target cgroup subsystem + * @cfts: zero-length name terminated array of cftypes + * + * Similar to cgroup_add_cftypes() but the added files are only used for + * the legacy hierarchies. + */ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { + struct cftype *cft; + + for (cft = cfts; cft && cft->name[0] != '\0'; cft++) + cft->flags |= CFTYPE_INSANE; return cgroup_add_cftypes(ss, cfts); } @@ -4893,9 +4931,19 @@ int __init cgroup_init(void) * disabled flag and cftype registration needs kmalloc, * both of which aren't available during early_init. */ - if (!ss->disabled) { - cgrp_dfl_root.subsys_mask |= 1 << ss->id; - WARN_ON(cgroup_add_cftypes(ss, ss->legacy_cftypes)); + if (ss->disabled) + continue; + + cgrp_dfl_root.subsys_mask |= 1 << ss->id; + + if (cgroup_legacy_files_on_dfl && !ss->dfl_cftypes) + ss->dfl_cftypes = ss->legacy_cftypes; + + if (ss->dfl_cftypes == ss->legacy_cftypes) { + WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); + } else { + WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes)); + WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes)); } } @@ -5291,6 +5339,14 @@ static int __init cgroup_disable(char *str) } __setup("cgroup_disable=", cgroup_disable); +static int __init cgroup_set_legacy_files_on_dfl(char *str) +{ + printk("cgroup: using legacy files on the default hierarchy\n"); + cgroup_legacy_files_on_dfl = true; + return 0; +} +__setup("cgroup__DEVEL__legacy_files_on_dfl", cgroup_set_legacy_files_on_dfl); + /** * css_tryget_online_from_dir - get corresponding css from a cgroup dentry * @dentry: directory dentry of interest diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b6b3c6fea509..45c10c6fc3ce 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6003,7 +6003,6 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "use_hierarchy", - .flags = CFTYPE_INSANE, .write_u64 = mem_cgroup_hierarchy_write, .read_u64 = mem_cgroup_hierarchy_read, }, -- cgit v1.2.3 From 05ebb6e60f044a9cef2549b6204559276500f363 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:10 -0400 Subject: cgroup: make CFTYPE_ONLY_ON_DFL and CFTYPE_NO_ internal to cgroup core cgroup now distinguishes cftypes for the default and legacy hierarchies more explicitly by using separate arrays and CFTYPE_ONLY_ON_DFL and CFTYPE_INSANE should be and are used only inside cgroup core proper. Let's make it clear that the flags are internal by prefixing them with double underscores. CFTYPE_INSANE is renamed to __CFTYPE_NOT_ON_DFL for consistency. The two flags are also collected and assigned bits >= 16 so that they aren't mixed with the published flags. v2: Convert the extra ones in cgroup_exit_cftypes() which are added by revision to the previous patch. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 6 ++++-- kernel/cgroup.c | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9f76236ac158..b5223c570eba 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -384,9 +384,11 @@ struct css_set { enum { CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ - CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ - CFTYPE_ONLY_ON_DFL = (1 << 4), /* only on default hierarchy */ + + /* internal flags, do not use outside cgroup core proper */ + __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ + __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ }; #define MAX_CFTYPE_NAME 64 diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 374ebdf74f35..f41d164a3d54 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3031,9 +3031,9 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], for (cft = cfts; cft->name[0] != '\0'; cft++) { /* does cft->flags tell us to skip this file on @cgrp? */ - if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) + if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; - if ((cft->flags & CFTYPE_INSANE) && cgroup_on_dfl(cgrp)) + if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp)) continue; if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) continue; @@ -3093,7 +3093,7 @@ static void cgroup_exit_cftypes(struct cftype *cfts) cft->ss = NULL; /* revert flags set by cgroup core while adding @cfts */ - cft->flags &= ~(CFTYPE_ONLY_ON_DFL | CFTYPE_INSANE); + cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL); } } @@ -3217,7 +3217,7 @@ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) struct cftype *cft; for (cft = cfts; cft && cft->name[0] != '\0'; cft++) - cft->flags |= CFTYPE_ONLY_ON_DFL; + cft->flags |= __CFTYPE_ONLY_ON_DFL; return cgroup_add_cftypes(ss, cfts); } @@ -3234,7 +3234,7 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) struct cftype *cft; for (cft = cfts; cft && cft->name[0] != '\0'; cft++) - cft->flags |= CFTYPE_INSANE; + cft->flags |= __CFTYPE_NOT_ON_DFL; return cgroup_add_cftypes(ss, cfts); } -- cgit v1.2.3