workqueue: Fix false positive stall reports

author Song Liu <song@kernel.org>

Sun, 22 Mar 2026 03:30:45 +0000 (20:30 -0700)

committer Tejun Heo <tj@kernel.org>

Sun, 22 Mar 2026 04:34:59 +0000 (18:34 -1000)
author Song Liu <song@kernel.org>
Sun, 22 Mar 2026 03:30:45 +0000 (20:30 -0700)
committer Tejun Heo <tj@kernel.org>
Sun, 22 Mar 2026 04:34:59 +0000 (18:34 -1000)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index b77119d71641ae32c69047581d09410071eca706..ff97b705f25ed125326691a1be98b360c302f0b4 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -7699,8 +7699,28 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
                 else
                         ts = touched;
  
-               /* did we stall? */
+               /*
+                * Did we stall?
+                *
+                * Do a lockless check first. On weakly ordered
+                * architectures, the lockless check can observe a
+                * reordering between worklist insert_work() and
+                * last_progress_ts update from __queue_work(). Since
+                * __queue_work() is a much hotter path than the timer
+                * function, we handle false positive here by reading
+                * last_progress_ts again with pool->lock held.
+                */
                 if (time_after(now, ts + thresh)) {
+                       scoped_guard(raw_spinlock_irqsave, &pool->lock) {
+                               pool_ts = pool->last_progress_ts;
+                               if (time_after(pool_ts, touched))
+                                       ts = pool_ts;
+                               else
+                                       ts = touched;
+                       }
+                       if (!time_after(now, ts + thresh))
+                               continue;
+
                         lockup_detected = true;
                         stall_time = jiffies_to_msecs(now - pool_ts) / 1000;
                         max_stall_time = max(max_stall_time, stall_time);
@@ -7712,8 +7732,6 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
                         pr_cont_pool_info(pool);
                         pr_cont(" stuck for %us!\n", stall_time);
                 }
-
-
         }
  
         if (lockup_detected)
author	Song Liu <song@kernel.org>
	Sun, 22 Mar 2026 03:30:45 +0000 (20:30 -0700)
committer	Tejun Heo <tj@kernel.org>
	Sun, 22 Mar 2026 04:34:59 +0000 (18:34 -1000)