]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
rcu: Summarize RCU CPU stall warnings during CSD-lock stalls
authorPaul E. McKenney <paulmck@kernel.org>
Tue, 2 Jul 2024 02:30:56 +0000 (19:30 -0700)
committerNeeraj Upadhyay <neeraj.upadhyay@kernel.org>
Wed, 14 Aug 2024 18:40:50 +0000 (00:10 +0530)
During CSD-lock stalls, the additional information output by RCU CPU
stall warnings is usually redundant, flooding the console for not good
reason.  However, this has been the way things work for a few years.
This commit therefore adds an rcutree.csd_lock_suppress_rcu_stall kernel
boot parameter that causes RCU CPU stall warnings to be abbreviated to
a single line when there is at least one CPU that has been stuck waiting
for CSD lock for more than five seconds.

To make this abbreviated message happen with decent probability:

tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 8 \
--configs "2*TREE01" --kconfig "CONFIG_CSD_LOCK_WAIT_DEBUG=y" \
--bootargs "csdlock_debug=1 rcutorture.stall_cpu=200 \
rcutorture.stall_cpu_holdoff=120 rcutorture.stall_cpu_irqsoff=1 \
rcutree.csd_lock_suppress_rcu_stall=1 \
rcupdate.rcu_exp_cpu_stall_timeout=5000" --trust-make

[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
Documentation/admin-guide/kernel-parameters.txt
kernel/rcu/tree_stall.h

index f1384c7b59c9282fc6fb40569a92f6c13861bbb9..d56356c13184e397df48a979e69c90ce2aad1f7c 100644 (file)
                        Set maximum number of finished RCU callbacks to
                        process in one batch.
 
+       rcutree.csd_lock_suppress_rcu_stall=    [KNL]
+                       Do only a one-line RCU CPU stall warning when
+                       there is an ongoing too-long CSD-lock wait.
+
        rcutree.do_rcu_barrier= [KNL]
                        Request a call to rcu_barrier().  This is
                        throttled so that userspace tests can safely
index 4b0e9d7c4c68ee029c72d7094c4be7dc3b0d1582..b497d4c6dabdf77338ebbd0797b2ed8c9951fd39 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/kvm_para.h>
 #include <linux/rcu_notifier.h>
+#include <linux/smp.h>
 
 //////////////////////////////////////////////////////////////////////////////
 //
@@ -719,6 +720,9 @@ static void print_cpu_stall(unsigned long gps)
        set_preempt_need_resched();
 }
 
+static bool csd_lock_suppress_rcu_stall;
+module_param(csd_lock_suppress_rcu_stall, bool, 0644);
+
 static void check_cpu_stall(struct rcu_data *rdp)
 {
        bool self_detected;
@@ -791,7 +795,9 @@ static void check_cpu_stall(struct rcu_data *rdp)
                        return;
 
                rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
-               if (self_detected) {
+               if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
+                       pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
+               } else if (self_detected) {
                        /* We haven't checked in, so go dump stack. */
                        print_cpu_stall(gps);
                } else {