summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-10 15:03:02 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-10 15:03:02 -0800
commit58624e4bc876198a5dc41be1d7dd39e7c944b9c6 (patch)
treea6c7b7377869c24ff7741203b73ce209b7c43276
parent257a8be4e9a6fc3e821c337275256416750afa5b (diff)
parent3cb97a927fffe443e1e7e8eddbfebfdb062e86ed (diff)
Merge tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "Cpuset fixes: - Fix isolated CPUs leaking into sched domains - Remove now unnecessary kernfs active break which can trigger a warning - Comment updates" * tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup/cpuset: remove kernfs active break cgroup/cpuset: Prevent leakage of isolated CPUs into sched domains cgroup/cpuset: Remove stale text
-rw-r--r--kernel/cgroup/cpuset.c44
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh33
2 files changed, 30 insertions, 47 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f321ed515f3a..0f910c828973 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -197,10 +197,8 @@ static struct cpuset top_cpuset = {
/*
* There are two global locks guarding cpuset structures - cpuset_mutex and
- * callback_lock. We also require taking task_lock() when dereferencing a
- * task's cpuset pointer. See "The task_lock() exception", at the end of this
- * comment. The cpuset code uses only cpuset_mutex. Other kernel subsystems
- * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
+ * callback_lock. The cpuset code uses only cpuset_mutex. Other kernel
+ * subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
* structures. Note that cpuset_mutex needs to be a mutex as it is used in
* paths that rely on priority inheritance (e.g. scheduler - on RT) for
* correctness.
@@ -229,9 +227,6 @@ static struct cpuset top_cpuset = {
* The cpuset_common_seq_show() handlers only hold callback_lock across
* small pieces of code, such as when reading out possibly multi-word
* cpumasks and nodemasks.
- *
- * Accessing a task's cpuset should be done in accordance with the
- * guidelines for accessing subsystem state in kernel/cgroup.c
*/
static DEFINE_MUTEX(cpuset_mutex);
@@ -890,7 +885,15 @@ v2:
*/
if (cgrpv2) {
for (i = 0; i < ndoms; i++) {
- cpumask_copy(doms[i], csa[i]->effective_cpus);
+ /*
+ * The top cpuset may contain some boot time isolated
+ * CPUs that need to be excluded from the sched domain.
+ */
+ if (csa[i] == &top_cpuset)
+ cpumask_and(doms[i], csa[i]->effective_cpus,
+ housekeeping_cpumask(HK_TYPE_DOMAIN));
+ else
+ cpumask_copy(doms[i], csa[i]->effective_cpus);
if (dattr)
dattr[i] = SD_ATTR_INIT;
}
@@ -3121,29 +3124,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
int retval = -ENODEV;
buf = strstrip(buf);
-
- /*
- * CPU or memory hotunplug may leave @cs w/o any execution
- * resources, in which case the hotplug code asynchronously updates
- * configuration and transfers all tasks to the nearest ancestor
- * which can execute.
- *
- * As writes to "cpus" or "mems" may restore @cs's execution
- * resources, wait for the previously scheduled operations before
- * proceeding, so that we don't end up keep removing tasks added
- * after execution capability is restored.
- *
- * cpuset_handle_hotplug may call back into cgroup core asynchronously
- * via cgroup_transfer_tasks() and waiting for it from a cgroupfs
- * operation like this one can lead to a deadlock through kernfs
- * active_ref protection. Let's break the protection. Losing the
- * protection is okay as we check whether @cs is online after
- * grabbing cpuset_mutex anyway. This only happens on the legacy
- * hierarchies.
- */
- css_get(&cs->css);
- kernfs_break_active_protection(of->kn);
-
cpus_read_lock();
mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs))
@@ -3176,8 +3156,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
out_unlock:
mutex_unlock(&cpuset_mutex);
cpus_read_unlock();
- kernfs_unbreak_active_protection(of->kn);
- css_put(&cs->css);
flush_workqueue(cpuset_migrate_mm_wq);
return retval ?: nbytes;
}
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 03c1bdaed2c3..400a696a0d21 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus
#
# If isolated CPUs have been reserved at boot time (as shown in
-# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7
+# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
# that will be used by this script for testing purpose. If not, some of
-# the tests may fail incorrectly. These isolated CPUs will also be removed
-# before being compared with the expected results.
+# the tests may fail incorrectly. These pre-isolated CPUs should stay in
+# an isolated state throughout the testing process for now.
#
BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
if [[ -n "$BOOT_ISOLCPUS" ]]
then
- [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] &&
+ [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
fi
@@ -684,14 +684,18 @@ check_isolcpus()
fi
#
+ # Appending pre-isolated CPUs
+ # Even though CPU #8 isn't used for testing, it can't be pre-isolated
+ # to make appending those CPUs easier.
+ #
+ [[ -n "$BOOT_ISOLCPUS" ]] && {
+ EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
+ EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
+ }
+
+ #
# Check cpuset.cpus.isolated cpumask
#
- if [[ -z "$BOOT_ISOLCPUS" ]]
- then
- ISOLCPUS=$(cat $ISCPUS)
- else
- ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
- fi
[[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
# Take a 50ms pause and try again
pause 0.05
@@ -731,8 +735,6 @@ check_isolcpus()
fi
done
[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
- [[ -n "BOOT_ISOLCPUS" ]] &&
- ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
[[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
}
@@ -836,8 +838,11 @@ run_state_test()
# if available
[[ -n "$ICPUS" ]] && {
check_isolcpus $ICPUS
- [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \
- "Expect $ICPUS, get $ISOLCPUS instead"
+ [[ $? -ne 0 ]] && {
+ [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
+ test_fail $I "isolated CPU" \
+ "Expect $ICPUS, get $ISOLCPUS instead"
+ }
}
reset_cgroup_states
#