]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
smp: Improve smp_call_function_single() CSD-lock diagnostics
authorPaul E. McKenney <paulmck@kernel.org>
Fri, 20 Mar 2026 10:45:36 +0000 (03:45 -0700)
committerThomas Gleixner <tglx@kernel.org>
Wed, 25 Mar 2026 19:11:30 +0000 (20:11 +0100)
Both smp_call_function() and smp_call_function_single() use per-CPU
call_single_data_t variable to hold the infamous CSD lock.  However,
while smp_call_function() acquires the destination CPU's CSD lock,
smp_call_function_single() instead uses the source CPU's CSD lock.
(These are two separate sets of CSD locks, cfd_data and csd_data,
respectively.)

This otherwise inexplicable pair of choices is explained by their
respective queueing properties.  If smp_call_function() where to
use the sending CPU's CSD lock, that would serialize the destination
CPUs' IPI handlers and result in long smp_call_function() latencies,
especially on systems with large numbers of CPUs.  For its part, if
smp_call_function_single() were to use the (single) destination CPU's
CSD lock, this would similarly serialize in the case where many CPUs
are sending IPIs to a single "victim" CPU.  Plus it would result in
higher levels of memory contention.

Except that if there is no NMI-based stack tracing on a weakly ordered
system where remote unsynchronized stack traces are especially unreliable,
the improved debugging beats the improved queueing.  This improved queueing
only matters if a bunch of CPUs are calling smp_call_function_single()
concurrently for a single "victim" CPU, which is not the common case.

Therefore, make smp_call_function_single() use the destination CPU's
csd_data instance in kernels built with CONFIG_CSD_LOCK_WAIT_DEBUG=y
where csdlock_debug_enabled is also set.  Otherwise, continue to use
the source CPU's csd_data.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://patch.msgid.link/25c2eb97-77c8-49a5-80ac-efe78dea272c@paulmck-laptop
kernel/smp.c

index bdbf145737baa0da97cb48bc69cf213c8673d087..0fc4ff28ba79eb1109236e2e4b29e0a8a651a5fc 100644 (file)
@@ -377,6 +377,20 @@ static __always_inline void csd_unlock(call_single_data_t *csd)
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
 
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+static call_single_data_t *get_single_csd_data(int cpu)
+{
+       if (static_branch_unlikely(&csdlock_debug_enabled))
+               return per_cpu_ptr(&csd_data, cpu);
+       return this_cpu_ptr(&csd_data);
+}
+#else
+static call_single_data_t *get_single_csd_data(int cpu)
+{
+       return this_cpu_ptr(&csd_data);
+}
+#endif
+
 void __smp_call_single_queue(int cpu, struct llist_node *node)
 {
        /*
@@ -671,7 +685,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 
        csd = &csd_stack;
        if (!wait) {
-               csd = this_cpu_ptr(&csd_data);
+               csd = get_single_csd_data(cpu);
                csd_lock(csd);
        }