summaryrefslogtreecommitdiff
path: root/kernel/cgroup
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup')
-rw-r--r--kernel/cgroup/cgroup-v1.c16
-rw-r--r--kernel/cgroup/cgroup.c86
-rw-r--r--kernel/cgroup/cpuset.c208
-rw-r--r--kernel/cgroup/legacy_freezer.c7
-rw-r--r--kernel/cgroup/rstat.c8
5 files changed, 226 insertions, 99 deletions
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 52bb5a74a23b..aeef06c465ef 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -58,7 +58,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
struct cgroup_root *root;
int retval = 0;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
cgroup_attach_lock(true);
for_each_root(root) {
struct cgroup *from_cgrp;
@@ -72,7 +72,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
break;
}
cgroup_attach_unlock(true);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return retval;
}
@@ -106,7 +106,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
if (ret)
return ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
percpu_down_write(&cgroup_threadgroup_rwsem);
@@ -145,7 +145,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
out_err:
cgroup_migrate_finish(&mgctx);
percpu_up_write(&cgroup_threadgroup_rwsem);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -847,13 +847,13 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
kernfs_break_active_protection(new_parent);
kernfs_break_active_protection(kn);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
ret = kernfs_rename(kn, new_parent, new_name_str);
if (!ret)
TRACE_CGROUP_PATH(rename, cgrp);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
kernfs_unbreak_active_protection(kn);
kernfs_unbreak_active_protection(new_parent);
@@ -1119,7 +1119,7 @@ int cgroup1_reconfigure(struct fs_context *fc)
trace_cgroup_remount(root);
out_unlock:
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -1246,7 +1246,7 @@ int cgroup1_get_tree(struct fs_context *fc)
if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt))
ret = 1; /* restart */
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
if (!ret)
ret = cgroup_do_get_tree(fc);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 935e8121b21e..625d7483951c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1391,7 +1391,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_favor_dynmods(root, false);
cgroup_exit_root_id(root);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
cgroup_rstat_exit(cgrp);
kernfs_destroy_root(root->kf_root);
@@ -1465,8 +1465,18 @@ static struct cgroup *current_cgns_cgroup_dfl(void)
{
struct css_set *cset;
- cset = current->nsproxy->cgroup_ns->root_cset;
- return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
+ if (current->nsproxy) {
+ cset = current->nsproxy->cgroup_ns->root_cset;
+ return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
+ } else {
+ /*
+ * NOTE: This function may be called from bpf_cgroup_from_id()
+ * on a task which has already passed exit_task_namespaces() and
+ * nsproxy == NULL. Fall back to cgrp_dfl_root which will make all
+ * cgroups visible for lookups.
+ */
+ return &cgrp_dfl_root.cgrp;
+ }
}
/* look up cgroup associated with given css_set on the specified hierarchy */
@@ -1625,7 +1635,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn)
else
cgrp = kn->parent->priv;
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
kernfs_unbreak_active_protection(kn);
cgroup_put(cgrp);
@@ -1670,7 +1680,7 @@ struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
if (drain_offline)
cgroup_lock_and_drain_offline(cgrp);
else
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
if (!cgroup_is_dead(cgrp))
return cgrp;
@@ -2167,13 +2177,13 @@ int cgroup_do_get_tree(struct fs_context *fc)
struct super_block *sb = fc->root->d_sb;
struct cgroup *cgrp;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
spin_lock_irq(&css_set_lock);
cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(fc->root);
@@ -2356,13 +2366,13 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
{
int ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
spin_lock_irq(&css_set_lock);
ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -2388,7 +2398,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
int hierarchy_id = 1;
int ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
spin_lock_irq(&css_set_lock);
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
@@ -2402,7 +2412,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
}
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(task_cgroup_path);
@@ -3111,7 +3121,7 @@ void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
int ssid;
restart:
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
for_each_subsys(ss, ssid) {
@@ -3125,7 +3135,7 @@ restart:
prepare_to_wait(&dsct->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
schedule();
finish_wait(&dsct->offline_waitq, &wait);
@@ -3761,7 +3771,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
}
psi = cgroup_psi(cgrp);
- new = psi_trigger_create(psi, buf, res);
+ new = psi_trigger_create(psi, buf, res, of->file);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
@@ -4374,9 +4384,9 @@ int cgroup_rm_cftypes(struct cftype *cfts)
if (!(cfts[0].flags & __CFTYPE_ADDED))
return -ENOENT;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
ret = cgroup_rm_cftypes_locked(cfts);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -4408,14 +4418,14 @@ static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
if (ret)
return ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
list_add_tail(&cfts->node, &ss->cfts);
ret = cgroup_apply_cftypes(cfts, true);
if (ret)
cgroup_rm_cftypes_locked(cfts);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -5385,7 +5395,7 @@ static void css_release_work_fn(struct work_struct *work)
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
css->flags |= CSS_RELEASED;
list_del_rcu(&css->sibling);
@@ -5426,7 +5436,7 @@ static void css_release_work_fn(struct work_struct *work)
NULL);
}
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
@@ -5774,7 +5784,7 @@ static void css_killed_work_fn(struct work_struct *work)
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
do {
offline_css(css);
@@ -5783,7 +5793,7 @@ static void css_killed_work_fn(struct work_struct *work)
css = css->parent;
} while (css && atomic_dec_and_test(&css->online_cnt));
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
}
/* css kill confirmation processing requires process context, bounce */
@@ -5967,7 +5977,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
pr_debug("Initializing cgroup subsys %s\n", ss->name);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
idr_init(&ss->css_idr);
INIT_LIST_HEAD(&ss->cfts);
@@ -6011,7 +6021,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
BUG_ON(online_css(css));
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
}
/**
@@ -6071,7 +6081,7 @@ int __init cgroup_init(void)
get_user_ns(init_cgroup_ns.user_ns);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
/*
* Add init_css_set to the hash table so that dfl_root can link to
@@ -6082,7 +6092,7 @@ int __init cgroup_init(void)
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
for_each_subsys(ss, ssid) {
if (ss->early_init) {
@@ -6134,9 +6144,9 @@ int __init cgroup_init(void)
if (ss->bind)
ss->bind(init_css_set.subsys[ssid]);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
css_populate_dir(init_css_set.subsys[ssid]);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
}
/* init_css_set.subsys[] has been updated, re-hash */
@@ -6241,7 +6251,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
if (!buf)
goto out;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
spin_lock_irq(&css_set_lock);
for_each_root(root) {
@@ -6296,7 +6306,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
retval = 0;
out_unlock:
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
kfree(buf);
out:
return retval;
@@ -6380,7 +6390,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
struct file *f;
if (kargs->flags & CLONE_INTO_CGROUP)
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
cgroup_threadgroup_change_begin(current);
@@ -6455,7 +6465,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
err:
cgroup_threadgroup_change_end(current);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
if (f)
fput(f);
if (dst_cgrp)
@@ -6482,7 +6492,7 @@ static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs)
struct cgroup *cgrp = kargs->cgrp;
struct css_set *cset = kargs->cset;
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
if (cset) {
put_css_set(cset);
@@ -6856,14 +6866,12 @@ EXPORT_SYMBOL_GPL(cgroup_get_from_path);
struct cgroup *cgroup_v1v2_get_from_fd(int fd)
{
struct cgroup *cgrp;
- struct file *f;
-
- f = fget_raw(fd);
- if (!f)
+ struct fd f = fdget_raw(fd);
+ if (!f.file)
return ERR_PTR(-EBADF);
- cgrp = cgroup_v1v2_get_from_file(f);
- fput(f);
+ cgrp = cgroup_v1v2_get_from_file(f.file);
+ fdput(f);
return cgrp;
}
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 636f1c682ac0..e4ca2dd2b764 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1209,7 +1209,9 @@ void rebuild_sched_domains(void)
*
* Iterate through each task of @cs updating its cpus_allowed to the
* effective cpuset's. As this function is called with cpuset_rwsem held,
- * cpuset membership stays stable.
+ * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
+ * is used instead of effective_cpus to make sure all offline CPUs are also
+ * included as hotplug code won't update cpumasks for tasks in top_cpuset.
*/
static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
{
@@ -1219,15 +1221,18 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it))) {
- /*
- * Percpu kthreads in top_cpuset are ignored
- */
- if (top_cs && (task->flags & PF_KTHREAD) &&
- kthread_is_per_cpu(task))
- continue;
+ const struct cpumask *possible_mask = task_cpu_possible_mask(task);
- cpumask_and(new_cpus, cs->effective_cpus,
- task_cpu_possible_mask(task));
+ if (top_cs) {
+ /*
+ * Percpu kthreads in top_cpuset are ignored
+ */
+ if ((task->flags & PF_KTHREAD) && kthread_is_per_cpu(task))
+ continue;
+ cpumask_andnot(new_cpus, possible_mask, cs->subparts_cpus);
+ } else {
+ cpumask_and(new_cpus, possible_mask, cs->effective_cpus);
+ }
set_cpus_allowed_ptr(task, new_cpus);
}
css_task_iter_end(&it);
@@ -1513,7 +1518,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
spin_unlock_irq(&callback_lock);
if (adding || deleting)
- update_tasks_cpumask(parent, tmp->new_cpus);
+ update_tasks_cpumask(parent, tmp->addmask);
/*
* Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
@@ -1770,10 +1775,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
/*
* Use the cpumasks in trialcs for tmpmasks when they are pointers
* to allocated cpumasks.
+ *
+ * Note that update_parent_subparts_cpumask() uses only addmask &
+ * delmask, but not new_cpus.
*/
tmp.addmask = trialcs->subparts_cpus;
tmp.delmask = trialcs->effective_cpus;
- tmp.new_cpus = trialcs->cpus_allowed;
+ tmp.new_cpus = NULL;
#endif
retval = validate_change(cs, trialcs);
@@ -1838,6 +1846,11 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
}
spin_unlock_irq(&callback_lock);
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ /* Now trialcs->cpus_allowed is available */
+ tmp.new_cpus = trialcs->cpus_allowed;
+#endif
+
/* effective_cpus will be updated here */
update_cpumasks_hier(cs, &tmp, false);
@@ -2445,6 +2458,20 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
+/*
+ * Check to see if a cpuset can accept a new task
+ * For v1, cpus_allowed and mems_allowed can't be empty.
+ * For v2, effective_cpus can't be empty.
+ * Note that in v1, effective_cpus = cpus_allowed.
+ */
+static int cpuset_can_attach_check(struct cpuset *cs)
+{
+ if (cpumask_empty(cs->effective_cpus) ||
+ (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
+ return -ENOSPC;
+ return 0;
+}
+
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
@@ -2459,16 +2486,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
percpu_down_write(&cpuset_rwsem);
- /* allow moving tasks into an empty cpuset if on default hierarchy */
- ret = -ENOSPC;
- if (!is_in_v2_mode() &&
- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
- goto out_unlock;
-
- /*
- * Task cannot be moved to a cpuset with empty effective cpus.
- */
- if (cpumask_empty(cs->effective_cpus))
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
goto out_unlock;
cgroup_taskset_for_each(task, css, tset) {
@@ -2485,7 +2505,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
* changes which zero cpus/mems_allowed.
*/
cs->attach_in_progress++;
- ret = 0;
out_unlock:
percpu_up_write(&cpuset_rwsem);
return ret;
@@ -2494,25 +2513,47 @@ out_unlock:
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
+ struct cpuset *cs;
cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
percpu_down_write(&cpuset_rwsem);
- css_cs(css)->attach_in_progress--;
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
}
/*
- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
* but we can't allocate it dynamically there. Define it global and
* allocate from cpuset_init().
*/
static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_to;
+
+static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
+{
+ percpu_rwsem_assert_held(&cpuset_rwsem);
+
+ if (cs != &top_cpuset)
+ guarantee_online_cpus(task, cpus_attach);
+ else
+ cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
+ cs->subparts_cpus);
+ /*
+ * can_attach beforehand should guarantee that this doesn't
+ * fail. TODO: have a better way to handle failure here
+ */
+ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+
+ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+ cpuset_update_task_spread_flags(cs, task);
+}
static void cpuset_attach(struct cgroup_taskset *tset)
{
- /* static buf protected by cpuset_rwsem */
- static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
struct cgroup_subsys_state *css;
@@ -2543,20 +2584,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, css, tset) {
- if (cs != &top_cpuset)
- guarantee_online_cpus(task, cpus_attach);
- else
- cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
- /*
- * can_attach beforehand should guarantee that this doesn't
- * fail. TODO: have a better way to handle failure here
- */
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
- cpuset_update_task_spread_flags(cs, task);
- }
+ cgroup_taskset_for_each(task, css, tset)
+ cpuset_attach_task(cs, task);
/*
* Change mm for all threadgroup leaders. This is expensive and may
@@ -3248,17 +3277,101 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
}
/*
+ * In case the child is cloned into a cpuset different from its parent,
+ * additional checks are done to see if the move is allowed.
+ */
+static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+ int ret;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return 0;
+
+ lockdep_assert_held(&cgroup_mutex);
+ percpu_down_write(&cpuset_rwsem);
+
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
+ goto out_unlock;
+
+ ret = task_can_attach(task, cs->effective_cpus);
+ if (ret)
+ goto out_unlock;
+
+ ret = security_task_setscheduler(task);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * Mark attach is in progress. This makes validate_change() fail
+ * changes which zero cpus/mems_allowed.
+ */
+ cs->attach_in_progress++;
+out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ return ret;
+}
+
+static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return;
+
+ percpu_down_write(&cpuset_rwsem);
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+ percpu_up_write(&cpuset_rwsem);
+}
+
+/*
* Make sure the new task conform to the current state of its parent,
* which could have been changed by cpuset just after it inherits the
* state from the parent and before it sits on the cgroup's task list.
*/
static void cpuset_fork(struct task_struct *task)
{
- if (task_css_is_root(task, cpuset_cgrp_id))
+ struct cpuset *cs;
+ bool same_cs;
+
+ rcu_read_lock();
+ cs = task_cs(task);
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs) {
+ if (cs == &top_cpuset)
+ return;
+
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
+ task->mems_allowed = current->mems_allowed;
return;
+ }
- set_cpus_allowed_ptr(task, current->cpus_ptr);
- task->mems_allowed = current->mems_allowed;
+ /* CLONE_INTO_CGROUP */
+ percpu_down_write(&cpuset_rwsem);
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+ cpuset_attach_task(cs, task);
+
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+
+ percpu_up_write(&cpuset_rwsem);
}
struct cgroup_subsys cpuset_cgrp_subsys = {
@@ -3271,6 +3384,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
+ .can_fork = cpuset_can_fork,
+ .cancel_fork = cpuset_cancel_fork,
.fork = cpuset_fork,
.legacy_cftypes = legacy_files,
.dfl_cftypes = dfl_files,
@@ -3508,6 +3623,8 @@ retry:
update_tasks:
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
mems_updated = !nodes_equal(new_mems, cs->effective_mems);
+ if (!cpus_updated && !mems_updated)
+ goto unlock; /* Hotplug doesn't affect this cpuset */
if (mems_updated)
check_insane_mems_config(&new_mems);
@@ -3519,6 +3636,7 @@ update_tasks:
hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
cpus_updated, mems_updated);
+unlock:
percpu_up_write(&cpuset_rwsem);
}
@@ -3831,7 +3949,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
}
/*
- * __cpuset_node_allowed - Can we allocate on a memory node?
+ * cpuset_node_allowed - Can we allocate on a memory node?
* @node: is this an allowed node?
* @gfp_mask: memory allocation flags
*
@@ -3870,7 +3988,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*/
-bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
+bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
struct cpuset *cs; /* current cpuset ancestors */
bool allowed; /* is allocation in zone z allowed? */
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index 1b6b21851e9d..936473203a6b 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -22,6 +22,7 @@
#include <linux/freezer.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
+#include <linux/cpu.h>
/*
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
@@ -350,7 +351,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
if (freeze) {
if (!(freezer->state & CGROUP_FREEZING))
- static_branch_inc(&freezer_active);
+ static_branch_inc_cpuslocked(&freezer_active);
freezer->state |= state;
freeze_cgroup(freezer);
} else {
@@ -361,7 +362,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
if (!(freezer->state & CGROUP_FREEZING)) {
freezer->state &= ~CGROUP_FROZEN;
if (was_freezing)
- static_branch_dec(&freezer_active);
+ static_branch_dec_cpuslocked(&freezer_active);
unfreeze_cgroup(freezer);
}
}
@@ -379,6 +380,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
{
struct cgroup_subsys_state *pos;
+ cpus_read_lock();
/*
* Update all its descendants in pre-order traversal. Each
* descendant will try to inherit its parent's FREEZING state as
@@ -407,6 +409,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
}
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
+ cpus_read_unlock();
}
static ssize_t freezer_write(struct kernfs_open_file *of,
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 831f1f472bb8..9c4c55228567 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -241,12 +241,12 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
}
/**
- * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush()
+ * cgroup_rstat_flush_atomic- atomic version of cgroup_rstat_flush()
* @cgrp: target cgroup
*
* This function can be called from any context.
*/
-void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
+void cgroup_rstat_flush_atomic(struct cgroup *cgrp)
{
unsigned long flags;
@@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
struct task_cputime *cputime = &bstat->cputime;
int i;
- cputime->stime = 0;
- cputime->utime = 0;
- cputime->sum_exec_runtime = 0;
+ memset(bstat, 0, sizeof(*bstat));
for_each_possible_cpu(i) {
struct kernel_cpustat kcpustat;
u64 *cpustat = kcpustat.cpustat;