lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly

author Ming Lei <ming.lei@redhat.com>

Mon, 20 Nov 2023 08:35:59 +0000 (16:35 +0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 10 Jan 2024 16:10:33 +0000 (17:10 +0100)
author Ming Lei <ming.lei@redhat.com>
Mon, 20 Nov 2023 08:35:59 +0000 (16:35 +0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 10 Jan 2024 16:10:33 +0000 (17:10 +0100)
diff --git a/lib/group_cpus.c b/lib/group_cpus.c

index 99f08c6cb9d971c9da98372cc8994d3015682c77..156b1446d2a202f4dbf1774b29ee126fc9d589d3 100644 (file)
--- a/lib/group_cpus.c
+++ b/lib/group_cpus.c
@@ -365,13 +365,25 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
         if (!masks)
                 goto fail_node_to_cpumask;
  
-       /* Stabilize the cpumasks */
-       cpus_read_lock();
         build_node_to_cpumask(node_to_cpumask);
  
+       /*
+        * Make a local cache of 'cpu_present_mask', so the two stages
+        * spread can observe consistent 'cpu_present_mask' without holding
+        * cpu hotplug lock, then we can reduce deadlock risk with cpu
+        * hotplug code.
+        *
+        * Here CPU hotplug may happen when reading `cpu_present_mask`, and
+        * we can live with the case because it only affects that hotplug
+        * CPU is handled in the 1st or 2nd stage, and either way is correct
+        * from API user viewpoint since 2-stage spread is sort of
+        * optimization.
+        */
+       cpumask_copy(npresmsk, data_race(cpu_present_mask));
+
         /* grouping present CPUs first */
         ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
-                                 cpu_present_mask, nmsk, masks);
+                                 npresmsk, nmsk, masks);
         if (ret < 0)
                 goto fail_build_affinity;
         nr_present = ret;
@@ -386,15 +398,13 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
                 curgrp = 0;
         else
                 curgrp = nr_present;
-       cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
+       cpumask_andnot(npresmsk, cpu_possible_mask, npresmsk);
         ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
                                   npresmsk, nmsk, masks);
         if (ret >= 0)
                 nr_others = ret;
  
   fail_build_affinity:
-       cpus_read_unlock();
-
         if (ret >= 0)
                 WARN_ON(nr_present + nr_others < numgrps);
author	Ming Lei <ming.lei@redhat.com>
	Mon, 20 Nov 2023 08:35:59 +0000 (16:35 +0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 10 Jan 2024 16:10:33 +0000 (17:10 +0100)