From 4b9502e63b5e2b1b5ef491919d3219b9440fe0b3 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Wed, 8 Mar 2017 10:00:40 +0200 Subject: kernel: convert css_set.refcount from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup/cgroup-v1.c') diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 56eba9caa632..c4a68c438fde 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -346,7 +346,7 @@ static int cgroup_task_count(const struct cgroup *cgrp) spin_lock_irq(&css_set_lock); list_for_each_entry(link, &cgrp->cset_links, cset_link) - count += atomic_read(&link->cset->refcount); + count += refcount_read(&link->cset->refcount); spin_unlock_irq(&css_set_lock); return count; } @@ -1286,7 +1286,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, u64 count; rcu_read_lock(); - count = atomic_read(&task_css_set(current)->refcount); + count = refcount_read(&task_css_set(current)->refcount); rcu_read_unlock(); return count; } -- cgit v1.2.3 From 9732adc5d6520238223df16630f1f8cad2269317 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Wed, 19 Apr 2017 10:15:59 +0800 Subject: cgroup: avoid attaching a cgroup root to two different superblocks, take 2 Commit bfb0b80db5f9 ("cgroup: avoid attaching a cgroup root to two different superblocks") is broken. Now we try to fix the race by delaying the initialization of cgroup root refcnt until a superblock has been allocated. Reported-by: Dmitry Vyukov Reported-by: Andrei Vagin Tested-by: Andrei Vagin Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-internal.h | 2 +- kernel/cgroup/cgroup-v1.c | 16 +++++++++++++++- kernel/cgroup/cgroup.c | 8 ++++---- 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'kernel/cgroup/cgroup-v1.c') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 4567f12b02e9..00f4d6bf048f 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -164,7 +164,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, struct cgroup_root *root, unsigned long magic, diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index c4a68c438fde..a478995e7ed3 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1072,6 +1072,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, struct cgroup_subsys *ss; struct dentry *dentry; int i, ret; + bool new_root = false; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); @@ -1181,10 +1182,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, ret = -ENOMEM; goto out_unlock; } + new_root = true; init_cgroup_root(root, &opts); - ret = cgroup_setup_root(root, opts.subsys_mask); + ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD); if (ret) cgroup_free_root(root); @@ -1200,6 +1202,18 @@ out_free: dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, CGROUP_SUPER_MAGIC, ns); + /* + * There's a race window after we release cgroup_mutex and before + * allocating a superblock. Make sure a concurrent process won't + * be able to re-use the root during this window by delaying the + * initialization of root refcnt. + */ + if (new_root) { + mutex_lock(&cgroup_mutex); + percpu_ref_reinit(&root->cgrp.self.refcnt); + mutex_unlock(&cgroup_mutex); + } + /* * If @pinned_sb, we're reusing an existing root and holding an * extra ref on its sb. Mount is complete. Put the extra ref. diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 10951d5e35d2..38d9386f46e7 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1645,7 +1645,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@ -1661,8 +1661,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) root_cgrp->id = ret; root_cgrp->ancestor_ids[0] = ret; - ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0, - GFP_KERNEL); + ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, + ref_flags, GFP_KERNEL); if (ret) goto out; @@ -4517,7 +4517,7 @@ int __init cgroup_init(void) hash_add(css_set_table, &init_css_set.hlist, css_set_hash(init_css_set.subsys)); - BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); + BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0)); mutex_unlock(&cgroup_mutex); -- cgit v1.2.3