]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
hung_task: show the blocker task if the task is hung on mutex
authorMasami Hiramatsu (Google) <mhiramat@kernel.org>
Tue, 25 Feb 2025 07:02:34 +0000 (16:02 +0900)
committerAndrew Morton <akpm@linux-foundation.org>
Sat, 22 Mar 2025 05:10:04 +0000 (22:10 -0700)
Patch series "hung_task: Dump the blocking task stacktrace", v4.

The hung_task detector is very useful for detecting the lockup.  However,
since it only dumps the blocked (uninterruptible sleep) processes, it is
not enough to identify the root cause of that lockup.

For example, if a process holds a mutex and sleep an event in
interruptible state long time, the other processes will wait on the mutex
in uninterruptible state.  In this case, the waiter processes are dumped,
but the blocker process is not shown because it is sleep in interruptible
state.

This adds a feature to dump the blocker task which holds a mutex
when detecting a hung task. e.g.

 INFO: task cat:115 blocked for more than 122 seconds.
       Not tainted 6.14.0-rc3-00003-ga8946be3de00 #156
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 task:cat             state:D stack:13432 pid:115   tgid:115   ppid:106    task_flags:0x400100 flags:0x00000002
 Call Trace:
  <TASK>
  __schedule+0x731/0x960
  ? schedule_preempt_disabled+0x54/0xa0
  schedule+0xb7/0x140
  ? __mutex_lock+0x51b/0xa60
  ? __mutex_lock+0x51b/0xa60
  schedule_preempt_disabled+0x54/0xa0
  __mutex_lock+0x51b/0xa60
  read_dummy+0x23/0x70
  full_proxy_read+0x6a/0xc0
  vfs_read+0xc2/0x340
  ? __pfx_direct_file_splice_eof+0x10/0x10
  ? do_sendfile+0x1bd/0x2e0
  ksys_read+0x76/0xe0
  do_syscall_64+0xe3/0x1c0
  ? exc_page_fault+0xa9/0x1d0
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
 RIP: 0033:0x4840cd
 RSP: 002b:00007ffe99071828 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004840cd
 RDX: 0000000000001000 RSI: 00007ffe99071870 RDI: 0000000000000003
 RBP: 00007ffe99071870 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000001000000 R11: 0000000000000246 R12: 0000000000001000
 R13: 00000000132fd3a0 R14: 0000000000000001 R15: ffffffffffffffff
  </TASK>
 INFO: task cat:115 is blocked on a mutex likely owned by task cat:114.
 task:cat             state:S stack:13432 pid:114   tgid:114   ppid:106    task_flags:0x400100 flags:0x00000002
 Call Trace:
  <TASK>
  __schedule+0x731/0x960
  ? schedule_timeout+0xa8/0x120
  schedule+0xb7/0x140
  schedule_timeout+0xa8/0x120
  ? __pfx_process_timeout+0x10/0x10
  msleep_interruptible+0x3e/0x60
  read_dummy+0x2d/0x70
  full_proxy_read+0x6a/0xc0
  vfs_read+0xc2/0x340
  ? __pfx_direct_file_splice_eof+0x10/0x10
  ? do_sendfile+0x1bd/0x2e0
  ksys_read+0x76/0xe0
  do_syscall_64+0xe3/0x1c0
  ? exc_page_fault+0xa9/0x1d0
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
 RIP: 0033:0x4840cd
 RSP: 002b:00007ffe3e0147b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004840cd
 RDX: 0000000000001000 RSI: 00007ffe3e014800 RDI: 0000000000000003
 RBP: 00007ffe3e014800 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000001000000 R11: 0000000000000246 R12: 0000000000001000
 R13: 000000001a0a93a0 R14: 0000000000000001 R15: ffffffffffffffff
  </TASK>

TBD: We can extend this feature to cover other locks like rwsem and
rt_mutex, but rwsem requires to dump all the tasks which acquire and wait
that rwsem.  We can follow the waiter link but the output will be a bit
different compared with mutex case.

This patch (of 2):

The "hung_task" shows a long-time uninterruptible slept task, but most
often, it's blocked on a mutex acquired by another task.  Without dumping
such a task, investigating the root cause of the hung task problem is very
difficult.

This introduce task_struct::blocker_mutex to point the mutex lock which
this task is waiting for.  Since the mutex has "owner" information, we can
find the owner task and dump it with hung tasks.

Note: the owner can be changed while dumping the owner task, so
this is "likely" the owner of the mutex.

With this change, the hung task shows blocker task's info like below;

 INFO: task cat:115 blocked for more than 122 seconds.
       Not tainted 6.14.0-rc3-00003-ga8946be3de00 #156
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 task:cat             state:D stack:13432 pid:115   tgid:115   ppid:106    task_flags:0x400100 flags:0x00000002
 Call Trace:
  <TASK>
  __schedule+0x731/0x960
  ? schedule_preempt_disabled+0x54/0xa0
  schedule+0xb7/0x140
  ? __mutex_lock+0x51b/0xa60
  ? __mutex_lock+0x51b/0xa60
  schedule_preempt_disabled+0x54/0xa0
  __mutex_lock+0x51b/0xa60
  read_dummy+0x23/0x70
  full_proxy_read+0x6a/0xc0
  vfs_read+0xc2/0x340
  ? __pfx_direct_file_splice_eof+0x10/0x10
  ? do_sendfile+0x1bd/0x2e0
  ksys_read+0x76/0xe0
  do_syscall_64+0xe3/0x1c0
  ? exc_page_fault+0xa9/0x1d0
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
 RIP: 0033:0x4840cd
 RSP: 002b:00007ffe99071828 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004840cd
 RDX: 0000000000001000 RSI: 00007ffe99071870 RDI: 0000000000000003
 RBP: 00007ffe99071870 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000001000000 R11: 0000000000000246 R12: 0000000000001000
 R13: 00000000132fd3a0 R14: 0000000000000001 R15: ffffffffffffffff
  </TASK>
 INFO: task cat:115 is blocked on a mutex likely owned by task cat:114.
 task:cat             state:S stack:13432 pid:114   tgid:114   ppid:106    task_flags:0x400100 flags:0x00000002
 Call Trace:
  <TASK>
  __schedule+0x731/0x960
  ? schedule_timeout+0xa8/0x120
  schedule+0xb7/0x140
  schedule_timeout+0xa8/0x120
  ? __pfx_process_timeout+0x10/0x10
  msleep_interruptible+0x3e/0x60
  read_dummy+0x2d/0x70
  full_proxy_read+0x6a/0xc0
  vfs_read+0xc2/0x340
  ? __pfx_direct_file_splice_eof+0x10/0x10
  ? do_sendfile+0x1bd/0x2e0
  ksys_read+0x76/0xe0
  do_syscall_64+0xe3/0x1c0
  ? exc_page_fault+0xa9/0x1d0
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
 RIP: 0033:0x4840cd
 RSP: 002b:00007ffe3e0147b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004840cd
 RDX: 0000000000001000 RSI: 00007ffe3e014800 RDI: 0000000000000003
 RBP: 00007ffe3e014800 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000001000000 R11: 0000000000000246 R12: 0000000000001000
 R13: 000000001a0a93a0 R14: 0000000000000001 R15: ffffffffffffffff
  </TASK>

[akpm@linux-foundation.org: implement debug_show_blocker() in C rather than in CPP]
Link: https://lkml.kernel.org/r/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com
Link: https://lkml.kernel.org/r/174046695384.2194069.16796289525958195643.stgit@mhiramat.tok.corp.google.com
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Lance Yang <ioworker0@gmail.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Will Deacon <will@kernel.org>
Cc: Yongliang Gao <leonylgao@tencent.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mutex.h
include/linux/sched.h
kernel/hung_task.c
kernel/locking/mutex.c
lib/Kconfig.debug

index 2bf91b57591b49e4668752e773419ae945f124da..2143d05116be1b0ac239951a9d5d0b90ad99062c 100644 (file)
@@ -202,4 +202,6 @@ DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T))
 DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T))
 DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0)
 
+extern unsigned long mutex_get_owner(struct mutex *lock);
+
 #endif /* __LINUX_MUTEX_H */
index 9c15365a30c08b64db65f0e88a88b761aecd807e..1419d94c8e87b4a09d6aaa7e3c7a5453cbfe88a1 100644 (file)
@@ -1217,6 +1217,10 @@ struct task_struct {
        struct mutex_waiter             *blocked_on;
 #endif
 
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+       struct mutex                    *blocker_mutex;
+#endif
+
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
        int                             non_block_count;
 #endif
index 04efa7a6e69bfa79f0333e8d36926b00c9fab4af..dc898ec93463f67e515725a0930ce6a66368d008 100644 (file)
@@ -93,6 +93,43 @@ static struct notifier_block panic_block = {
        .notifier_call = hung_task_panic,
 };
 
+
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+static void debug_show_blocker(struct task_struct *task)
+{
+       struct task_struct *g, *t;
+       unsigned long owner;
+       struct mutex *lock;
+
+       RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held");
+
+       lock = READ_ONCE(task->blocker_mutex);
+       if (!lock)
+               return;
+
+       owner = mutex_get_owner(lock);
+       if (unlikely(!owner)) {
+               pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
+                       task->comm, task->pid);
+               return;
+       }
+
+       /* Ensure the owner information is correct. */
+       for_each_process_thread(g, t) {
+               if ((unsigned long)t == owner) {
+                       pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n",
+                               task->comm, task->pid, t->comm, t->pid);
+                       sched_show_task(t);
+                       return;
+               }
+       }
+}
+#else
+static inline void debug_show_blocker(struct task_struct *task)
+{
+}
+#endif
+
 static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
        unsigned long switch_count = t->nvcsw + t->nivcsw;
@@ -152,6 +189,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
                pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
                        " disables this message.\n");
                sched_show_task(t);
+               debug_show_blocker(t);
                hung_task_show_lock = true;
 
                if (sysctl_hung_task_all_cpu_backtrace)
index b36f23de48f1b544e9950a3ed20e0ba6b89b9fc5..6a543c204a1492ffdd306cac1f85d08165251c6d 100644 (file)
@@ -72,6 +72,14 @@ static inline unsigned long __owner_flags(unsigned long owner)
        return owner & MUTEX_FLAGS;
 }
 
+/* Do not use the return value as a pointer directly. */
+unsigned long mutex_get_owner(struct mutex *lock)
+{
+       unsigned long owner = atomic_long_read(&lock->owner);
+
+       return (unsigned long)__owner_task(owner);
+}
+
 /*
  * Returns: __mutex_owner(lock) on failure or NULL on success.
  */
@@ -180,6 +188,9 @@ static void
 __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
                   struct list_head *list)
 {
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+       WRITE_ONCE(current->blocker_mutex, lock);
+#endif
        debug_mutex_add_waiter(lock, waiter, current);
 
        list_add_tail(&waiter->list, list);
@@ -195,6 +206,9 @@ __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
                __mutex_clear_flag(lock, MUTEX_FLAGS);
 
        debug_mutex_remove_waiter(lock, waiter, current);
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+       WRITE_ONCE(current->blocker_mutex, NULL);
+#endif
 }
 
 /*
index 35796c290ca351b9906dc37651197aefb9100990..1f5943f6be75eacd8bdb2b6fc0d237de5b818767 100644 (file)
@@ -1260,6 +1260,17 @@ config BOOTPARAM_HUNG_TASK_PANIC
 
          Say N if unsure.
 
+config DETECT_HUNG_TASK_BLOCKER
+       bool "Dump Hung Tasks Blocker"
+       depends on DETECT_HUNG_TASK
+       depends on !PREEMPT_RT
+       default y
+       help
+         Say Y here to show the blocker task's stacktrace who acquires
+         the mutex lock which "hung tasks" are waiting.
+         This will add overhead a bit but shows suspicious tasks and
+         call trace if it comes from waiting a mutex.
+
 config WQ_WATCHDOG
        bool "Detect Workqueue Stalls"
        depends on DEBUG_KERNEL