]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
watchdog: softlockup: panic when lockup duration exceeds N thresholds
authorLi RongQing <lirongqing@baidu.com>
Tue, 16 Dec 2025 07:45:21 +0000 (02:45 -0500)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 21 Jan 2026 03:44:20 +0000 (19:44 -0800)
The softlockup_panic sysctl is currently a binary option: panic
immediately or never panic on soft lockups.

Panicking on any soft lockup, regardless of duration, can be overly
aggressive for brief stalls that may be caused by legitimate operations.
Conversely, never panicking may allow severe system hangs to persist
undetected.

Extend softlockup_panic to accept an integer threshold, allowing the
kernel to panic only when the normalized lockup duration exceeds N
watchdog threshold periods.  This provides finer-grained control to
distinguish between transient delays and persistent system failures.

The accepted values are:
- 0: Don't panic (unchanged)
- 1: Panic when duration >= 1 * threshold (20s default, original behavior)
- N > 1: Panic when duration >= N * threshold (e.g., 2 = 40s, 3 = 60s.)

The original behavior is preserved for values 0 and 1, maintaining full
backward compatibility while allowing systems to tolerate brief lockups
while still catching severe, persistent hangs.

[lirongqing@baidu.com: v2]
Link: https://lkml.kernel.org/r/20251218074300.4080-1-lirongqing@baidu.com
Link: https://lkml.kernel.org/r/20251216074521.2796-1-lirongqing@baidu.com
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
13 files changed:
Documentation/admin-guide/kernel-parameters.txt
arch/arm/configs/aspeed_g5_defconfig
arch/arm/configs/pxa3xx_defconfig
arch/openrisc/configs/or1klitex_defconfig
arch/powerpc/configs/skiroot_defconfig
drivers/gpu/drm/ci/arm.config
drivers/gpu/drm/ci/arm64.config
drivers/gpu/drm/ci/x86_64.config
kernel/configs/debug.config
kernel/watchdog.c
lib/Kconfig.debug
tools/testing/selftests/bpf/config
tools/testing/selftests/wireguard/qemu/kernel.config

index 1058f2a6d6a8c2e6a7153917b3c4d4312d57a8a0..73d846211144403d5b0af0cac2d2c4ff5594eab7 100644 (file)
@@ -6969,12 +6969,12 @@ Kernel parameters
 
        softlockup_panic=
                        [KNL] Should the soft-lockup detector generate panics.
-                       Format: 0 | 1
+                       Format: <int>
 
-                       A value of 1 instructs the soft-lockup detector
-                       to panic the machine when a soft-lockup occurs. It is
-                       also controlled by the kernel.softlockup_panic sysctl
-                       and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
+                       A value of non-zero instructs the soft-lockup detector
+                       to panic the machine when a soft-lockup duration exceeds
+                       N thresholds. It is also controlled by the kernel.softlockup_panic
+                       sysctl and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
                        respective build-time switch to that functionality.
 
        softlockup_all_cpu_backtrace=
index 2e6ea13c1e9be78ccadfe264f49aaa59dc7489fb..ec558e57d081b5b420c1baeacf89efe8bcd31acd 100644 (file)
@@ -306,7 +306,7 @@ CONFIG_SCHED_STACK_END_CHECK=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PANIC_TIMEOUT=-1
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1
 CONFIG_WQ_WATCHDOG=y
 # CONFIG_SCHED_DEBUG is not set
index 07d422f0ff348aa3363124fe07f326ac1275776a..fb272e3a23377a3a7328ef59ce3a43b44b1ad153 100644 (file)
@@ -100,7 +100,7 @@ CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_SHIRQ=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
index fb1eb9a68bd68354561ed2f1aaefefa830f14271..984b0e3b27680ee24352d2351841c3837db0227c 100644 (file)
@@ -52,5 +52,5 @@ CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,bpf"
 CONFIG_PRINTK_TIME=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_BUG_ON_DATA_CORRUPTION=y
index 2b71a6dc399e4df3e0e35ff93031d10d9f1ae0d1..a4114fca5a398a1347c916ab8ed9d9a0c05ca9f3 100644 (file)
@@ -289,7 +289,7 @@ CONFIG_SCHED_STACK_END_CHECK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
 CONFIG_WQ_WATCHDOG=y
index 411e814819a8e6e68b550ccb28a5aaea7b4586ad..d7c51670da2fd05a3df7bd6d5c52dcd9d88728c3 100644 (file)
@@ -52,7 +52,7 @@ CONFIG_TMPFS=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_DEBUG_LOCKDEP=n
 CONFIG_SOFTLOCKUP_DETECTOR=n
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=n
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0
 
 CONFIG_FW_LOADER_COMPRESS=y
 
index fddfbd4d2493d54651f6f93264244f0fc214ef6b..ea0e30737c4dd4206ff4be91185715d5f1160374 100644 (file)
@@ -161,7 +161,7 @@ CONFIG_TMPFS=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_DEBUG_LOCKDEP=n
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 
 CONFIG_DETECT_HUNG_TASK=y
 
index 8eaba388b14195a41ed31111c3db8f80fb6744c3..7ac98a78691e5483a7ab35ae54a4aa70929f0bf4 100644 (file)
@@ -47,7 +47,7 @@ CONFIG_TMPFS=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_DEBUG_LOCKDEP=n
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 
 CONFIG_DETECT_HUNG_TASK=y
 
index 9f6ab7dabf672ee31048f486628a97e4168585fd..774702591d26c60514faeda3bfebe933a4b9da38 100644 (file)
@@ -84,7 +84,7 @@ CONFIG_SLUB_DEBUG_ON=y
 # Debug Oops, Lockups and Hangs
 #
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=0
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_PANIC_ON_OOPS=y
index 366122f4a0f87158dca9ea30157801396a93ec27..b4d5fbdb933a2f9b56de72e64799cf774e8f23c8 100644 (file)
@@ -363,7 +363,7 @@ static struct cpumask watchdog_allowed_mask __read_mostly;
 
 /* Global variables, exported for sysctl */
 unsigned int __read_mostly softlockup_panic =
-                       IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC);
+                       CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC;
 
 static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
@@ -774,8 +774,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
        unsigned long touch_ts, period_ts, now;
        struct pt_regs *regs = get_irq_regs();
-       int duration;
        int softlockup_all_cpu_backtrace;
+       int duration, thresh_count;
        unsigned long flags;
 
        if (!watchdog_enabled)
@@ -879,7 +879,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 
                add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
                sys_info(softlockup_si_mask & ~SYS_INFO_ALL_BT);
-               if (softlockup_panic)
+               thresh_count = duration / get_softlockup_thresh();
+
+               if (softlockup_panic && thresh_count >= softlockup_panic)
                        panic("softlockup: hung tasks");
        }
 
@@ -1228,7 +1230,7 @@ static const struct ctl_table watchdog_sysctls[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
-               .extra2         = SYSCTL_ONE,
+               .extra2         = SYSCTL_INT_MAX,
        },
        {
                .procname       = "softlockup_sys_info",
index 4bfca37f313e907b5a50fb20d6700cee5c55b457..947e62e92da8c181dc821a5f46be39085c5d0021 100644 (file)
@@ -1110,13 +1110,14 @@ config SOFTLOCKUP_DETECTOR_INTR_STORM
          the CPU stats and the interrupt counts during the "soft lockups".
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
-       bool "Panic (Reboot) On Soft Lockups"
+       int "Panic (Reboot) On Soft Lockups"
        depends on SOFTLOCKUP_DETECTOR
+       default 0
        help
-         Say Y here to enable the kernel to panic on "soft lockups",
-         which are bugs that cause the kernel to loop in kernel
-         mode for more than 20 seconds (configurable using the watchdog_thresh
-         sysctl), without giving other tasks a chance to run.
+         Set to a non-zero value N to enable the kernel to panic on "soft
+         lockups", which are bugs that cause the kernel to loop in kernel
+         mode for more than (N * 20 seconds) (configurable using the
+         watchdog_thresh sysctl), without giving other tasks a chance to run.
 
          The panic can be used in combination with panic_timeout,
          to cause the system to reboot automatically after a
@@ -1124,7 +1125,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
          high-availability systems that have uptime guarantees and
          where a lockup must be resolved ASAP.
 
-         Say N if unsure.
+         Say 0 if unsure.
 
 config HAVE_HARDLOCKUP_DETECTOR_BUDDY
        bool
index 558839e3c185e52323a7bc755dd47565b9578b0b..24855381290d6a1012d85da033e163f28d56b536 100644 (file)
@@ -1,6 +1,6 @@
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_BPF=y
 CONFIG_BPF_EVENTS=y
 CONFIG_BPF_JIT=y
index 0504c11c2de628340056cd17bc6d01b8b578108c..bb89d2dfaa2a24a9281013a88115d1efc52575de 100644 (file)
@@ -80,7 +80,7 @@ CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_WQ_WATCHDOG=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1
 CONFIG_PANIC_TIMEOUT=-1
 CONFIG_STACKTRACE=y