]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
cgroup/cpuset: Prevent leakage of isolated CPUs into sched domains
authorWaiman Long <longman@redhat.com>
Thu, 5 Dec 2024 19:51:01 +0000 (14:51 -0500)
committerTejun Heo <tj@kernel.org>
Wed, 11 Dec 2024 15:45:52 +0000 (05:45 -1000)
Isolated CPUs are not allowed to be used in a non-isolated partition.
The only exception is the top cpuset which is allowed to contain boot
time isolated CPUs.

Commit ccac8e8de99c ("cgroup/cpuset: Fix remote root partition creation
problem") introduces a simplified scheme of including only partition
roots in sched domain generation. However, it does not properly account
for this exception case. This can result in leakage of isolated CPUs
into a sched domain.

Fix it by making sure that isolated CPUs are excluded from the top
cpuset before generating sched domains.

Also update the way the boot time isolated CPUs are handled in
test_cpuset_prs.sh to make sure that those isolated CPUs are really
isolated instead of just skipping them in the tests.

Fixes: ccac8e8de99c ("cgroup/cpuset: Fix remote root partition creation problem")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/cpuset.c
tools/testing/selftests/cgroup/test_cpuset_prs.sh

index 9e2abd6a38a53fdca51da9fc3b86aee3442f879b..7ea559fb0cbf517a11d22ef8d44be81fab72a78e 100644 (file)
@@ -885,7 +885,15 @@ v2:
         */
        if (cgrpv2) {
                for (i = 0; i < ndoms; i++) {
-                       cpumask_copy(doms[i], csa[i]->effective_cpus);
+                       /*
+                        * The top cpuset may contain some boot time isolated
+                        * CPUs that need to be excluded from the sched domain.
+                        */
+                       if (csa[i] == &top_cpuset)
+                               cpumask_and(doms[i], csa[i]->effective_cpus,
+                                           housekeeping_cpumask(HK_TYPE_DOMAIN));
+                       else
+                               cpumask_copy(doms[i], csa[i]->effective_cpus);
                        if (dattr)
                                dattr[i] = SD_ATTR_INIT;
                }
index 03c1bdaed2c3c5a71d8584535a20d82d2b23b447..400a696a0d212ebe65c970668ee542b9ae9ebf5b 100755 (executable)
@@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus
 
 #
 # If isolated CPUs have been reserved at boot time (as shown in
-# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7
+# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
 # that will be used by this script for testing purpose. If not, some of
-# the tests may fail incorrectly. These isolated CPUs will also be removed
-# before being compared with the expected results.
+# the tests may fail incorrectly. These pre-isolated CPUs should stay in
+# an isolated state throughout the testing process for now.
 #
 BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
 if [[ -n "$BOOT_ISOLCPUS" ]]
 then
-       [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] &&
+       [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
                skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
        echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
 fi
@@ -683,15 +683,19 @@ check_isolcpus()
                EXPECT_VAL2=$EXPECT_VAL
        fi
 
+       #
+       # Appending pre-isolated CPUs
+       # Even though CPU #8 isn't used for testing, it can't be pre-isolated
+       # to make appending those CPUs easier.
+       #
+       [[ -n "$BOOT_ISOLCPUS" ]] && {
+               EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
+               EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
+       }
+
        #
        # Check cpuset.cpus.isolated cpumask
        #
-       if [[ -z "$BOOT_ISOLCPUS" ]]
-       then
-               ISOLCPUS=$(cat $ISCPUS)
-       else
-               ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
-       fi
        [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
                # Take a 50ms pause and try again
                pause 0.05
@@ -731,8 +735,6 @@ check_isolcpus()
                fi
        done
        [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
-       [[ -n "BOOT_ISOLCPUS" ]] &&
-               ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
 
        [[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
 }
@@ -836,8 +838,11 @@ run_state_test()
                # if available
                [[ -n "$ICPUS" ]] && {
                        check_isolcpus $ICPUS
-                       [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \
-                               "Expect $ICPUS, get $ISOLCPUS instead"
+                       [[ $? -ne 0 ]] && {
+                               [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
+                               test_fail $I "isolated CPU" \
+                                       "Expect $ICPUS, get $ISOLCPUS instead"
+                       }
                }
                reset_cgroup_states
                #