locking: Add task::blocked_lock to serialize blocked_on state

author John Stultz <jstultz@google.com>

Tue, 24 Mar 2026 19:13:19 +0000 (19:13 +0000)

committer Peter Zijlstra <peterz@infradead.org>

Fri, 3 Apr 2026 12:23:39 +0000 (14:23 +0200)
author John Stultz <jstultz@google.com>
Tue, 24 Mar 2026 19:13:19 +0000 (19:13 +0000)
committer Peter Zijlstra <peterz@infradead.org>
Fri, 3 Apr 2026 12:23:39 +0000 (14:23 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 5a5d3dbc9cdf332c46a96012fceb611a9e944e53..2eef9bc6daaab614f31a7db808050b6cef184987 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1238,6 +1238,7 @@ struct task_struct {
  #endif
  
         struct mutex                    *blocked_on;    /* lock we're blocked on */
+       raw_spinlock_t                  blocked_lock;
  
  #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
         /*
@@ -2181,57 +2182,42 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock);
  #ifndef CONFIG_PREEMPT_RT
  static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
  {
-       struct mutex *m = p->blocked_on;
-
-       if (m)
-               lockdep_assert_held_once(&m->wait_lock);
-       return m;
+       lockdep_assert_held_once(&p->blocked_lock);
+       return p->blocked_on;
  }
  
  static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
  {
-       struct mutex *blocked_on = READ_ONCE(p->blocked_on);
-
         WARN_ON_ONCE(!m);
         /* The task should only be setting itself as blocked */
         WARN_ON_ONCE(p != current);
-       /* Currently we serialize blocked_on under the mutex::wait_lock */
-       lockdep_assert_held_once(&m->wait_lock);
+       /* Currently we serialize blocked_on under the task::blocked_lock */
+       lockdep_assert_held_once(&p->blocked_lock);
         /*
          * Check ensure we don't overwrite existing mutex value
          * with a different mutex. Note, setting it to the same
          * lock repeatedly is ok.
          */
-       WARN_ON_ONCE(blocked_on && blocked_on != m);
-       WRITE_ONCE(p->blocked_on, m);
-}
-
-static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
-{
-       guard(raw_spinlock_irqsave)(&m->wait_lock);
-       __set_task_blocked_on(p, m);
+       WARN_ON_ONCE(p->blocked_on && p->blocked_on != m);
+       p->blocked_on = m;
  }
  
  static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
  {
-       if (m) {
-               struct mutex *blocked_on = READ_ONCE(p->blocked_on);
-
-               /* Currently we serialize blocked_on under the mutex::wait_lock */
-               lockdep_assert_held_once(&m->wait_lock);
-               /*
-                * There may be cases where we re-clear already cleared
-                * blocked_on relationships, but make sure we are not
-                * clearing the relationship with a different lock.
-                */
-               WARN_ON_ONCE(blocked_on && blocked_on != m);
-       }
-       WRITE_ONCE(p->blocked_on, NULL);
+       /* Currently we serialize blocked_on under the task::blocked_lock */
+       lockdep_assert_held_once(&p->blocked_lock);
+       /*
+        * There may be cases where we re-clear already cleared
+        * blocked_on relationships, but make sure we are not
+        * clearing the relationship with a different lock.
+        */
+       WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
+       p->blocked_on = NULL;
  }
  
  static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
  {
-       guard(raw_spinlock_irqsave)(&m->wait_lock);
+       guard(raw_spinlock_irqsave)(&p->blocked_lock);
         __clear_task_blocked_on(p, m);
  }
  #else
diff --git a/init/init_task.c b/init/init_task.c

index 5c838757fc10eb769c263025da088e16af48e336..b5f48ebdc2b6eb21a61cc901f5b2b865865a4662 100644 (file)
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -169,6 +169,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
         .journal_info   = NULL,
         INIT_CPU_TIMERS(init_task)
         .pi_lock        = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
+       .blocked_lock   = __RAW_SPIN_LOCK_UNLOCKED(init_task.blocked_lock),
         .timer_slack_ns = 50000, /* 50 usec default slack */
         .thread_pid     = &init_struct_pid,
         .thread_node    = LIST_HEAD_INIT(init_signals.thread_head),
diff --git a/kernel/fork.c b/kernel/fork.c

index bc2bf58b93b6524ca8510474634c7e921ac25f9a..079802cb61002ab43d89e0efee663fc93f1387f9 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2076,6 +2076,7 @@ __latent_entropy struct task_struct *copy_process(
         ftrace_graph_init_task(p);
  
         rt_mutex_init_task(p);
+       raw_spin_lock_init(&p->blocked_lock);
  
         lockdep_assert_irqs_enabled();
  #ifdef CONFIG_PROVE_LOCKING
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c

index 2c6b02d4699be705ec85526c8b0826153342bac1..cc6aa9c6e9813b2c5d7ad893e1ae7b9321a96470 100644 (file)
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -54,13 +54,13 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
         lockdep_assert_held(&lock->wait_lock);
  
         /* Current thread can't be already blocked (since it's executing!) */
-       DEBUG_LOCKS_WARN_ON(__get_task_blocked_on(task));
+       DEBUG_LOCKS_WARN_ON(get_task_blocked_on(task));
  }
  
  void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
                          struct task_struct *task)
  {
-       struct mutex *blocked_on = __get_task_blocked_on(task);
+       struct mutex *blocked_on = get_task_blocked_on(task);
  
         DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
         DEBUG_LOCKS_WARN_ON(waiter->task != task);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c

index 2a1d165b3167e25e57ac08f988a2705141f1a2b4..4aa79bcab08c771324b99d23483199577c6e92dc 100644 (file)
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -656,6 +656,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
                         goto err_early_kill;
         }
  
+       raw_spin_lock(&current->blocked_lock);
         __set_task_blocked_on(current, lock);
         set_current_state(state);
         trace_contention_begin(lock, LCB_F_MUTEX);
@@ -669,8 +670,9 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
                  * the handoff.
                  */
                 if (__mutex_trylock(lock))
-                       goto acquired;
+                       break;
  
+               raw_spin_unlock(&current->blocked_lock);
                 /*
                  * Check for signals and kill conditions while holding
                  * wait_lock. This ensures the lock cancellation is ordered
@@ -693,12 +695,14 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
  
                 first = __mutex_waiter_is_first(lock, &waiter);
  
+               raw_spin_lock_irqsave(&lock->wait_lock, flags);
+               raw_spin_lock(&current->blocked_lock);
                 /*
                  * As we likely have been woken up by task
                  * that has cleared our blocked_on state, re-set
                  * it to the lock we are trying to acquire.
                  */
-               set_task_blocked_on(current, lock);
+               __set_task_blocked_on(current, lock);
                 set_current_state(state);
                 /*
                  * Here we order against unlock; we must either see it change
@@ -709,25 +713,33 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
                         break;
  
                 if (first) {
-                       trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
+                       bool opt_acquired;
+
                         /*
                          * mutex_optimistic_spin() can call schedule(), so
-                        * clear blocked on so we don't become unselectable
+                        * we need to release these locks before calling it,
+                        * and clear blocked on so we don't become unselectable
                          * to run.
                          */
-                       clear_task_blocked_on(current, lock);
-                       if (mutex_optimistic_spin(lock, ww_ctx, &waiter))
+                       __clear_task_blocked_on(current, lock);
+                       raw_spin_unlock(&current->blocked_lock);
+                       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+                       trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
+                       opt_acquired = mutex_optimistic_spin(lock, ww_ctx, &waiter);
+
+                       raw_spin_lock_irqsave(&lock->wait_lock, flags);
+                       raw_spin_lock(&current->blocked_lock);
+                       __set_task_blocked_on(current, lock);
+
+                       if (opt_acquired)
                                 break;
-                       set_task_blocked_on(current, lock);
                         trace_contention_begin(lock, LCB_F_MUTEX);
                 }
-
-               raw_spin_lock_irqsave(&lock->wait_lock, flags);
         }
-       raw_spin_lock_irqsave(&lock->wait_lock, flags);
-acquired:
         __clear_task_blocked_on(current, lock);
         __set_current_state(TASK_RUNNING);
+       raw_spin_unlock(&current->blocked_lock);
  
         if (ww_ctx) {
                 /*
@@ -756,11 +768,11 @@ skip_wait:
         return 0;
  
  err:
-       __clear_task_blocked_on(current, lock);
+       clear_task_blocked_on(current, lock);
         __set_current_state(TASK_RUNNING);
         __mutex_remove_waiter(lock, &waiter);
  err_early_kill:
-       WARN_ON(__get_task_blocked_on(current));
+       WARN_ON(get_task_blocked_on(current));
         trace_contention_end(lock, ret);
         raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
         debug_mutex_free_waiter(&waiter);
@@ -971,7 +983,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
                 next = waiter->task;
  
                 debug_mutex_wake_waiter(lock, waiter);
-               __clear_task_blocked_on(next, lock);
+               clear_task_blocked_on(next, lock);
                 wake_q_add(&wake_q, next);
         }
  
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h

index 9ad4da8cea00458bafb741d30f777fe8ddcb5f5e..7a8ba13fee949ee22d4493882ece41984e2a9dd4 100644 (file)
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -47,6 +47,12 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock)
         return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS);
  }
  
+static inline struct mutex *get_task_blocked_on(struct task_struct *p)
+{
+       guard(raw_spinlock_irqsave)(&p->blocked_lock);
+       return __get_task_blocked_on(p);
+}
+
  #ifdef CONFIG_DEBUG_MUTEXES
  extern void debug_mutex_lock_common(struct mutex *lock,
                                     struct mutex_waiter *waiter);
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h

index 31a785afee6c0ebfa36a2ceecc76e8fd27c9a739..e4a81790ea7ddd3d11a96e7a207378b307a8f9dc 100644 (file)
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -289,7 +289,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
                  * blocked_on pointer. Otherwise we can see circular
                  * blocked_on relationships that can't resolve.
                  */
-               __clear_task_blocked_on(waiter->task, lock);
+               clear_task_blocked_on(waiter->task, lock);
                 wake_q_add(wake_q, waiter->task);
         }
  
@@ -347,7 +347,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
                          * are waking the mutex owner, who may be currently
                          * blocked on a different mutex.
                          */
-                       __clear_task_blocked_on(owner, NULL);
+                       clear_task_blocked_on(owner, NULL);
                         wake_q_add(wake_q, owner);
                 }
                 return true;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 5b7f378af0422a215c6207d298399dcd63d6b108..1913dbc68eb98360f8095c15188a678a79603d55 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6584,6 +6584,7 @@ static struct task_struct *proxy_deactivate(struct rq *rq, struct task_struct *d
   *   p->pi_lock
   *     rq->lock
   *       mutex->wait_lock
+ *         p->blocked_lock
   *
   * Returns the task that is going to be used as execution context (the one
   * that is actually going to be run on cpu_of(rq)).
@@ -6603,8 +6604,9 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
                  * and ensure @owner sticks around.
                  */
                 guard(raw_spinlock)(&mutex->wait_lock);
+               guard(raw_spinlock)(&p->blocked_lock);
  
-               /* Check again that p is blocked with wait_lock held */
+               /* Check again that p is blocked with blocked_lock held */
                 if (mutex != __get_task_blocked_on(p)) {
                         /*
                          * Something changed in the blocked_on chain and
author	John Stultz <jstultz@google.com>
	Tue, 24 Mar 2026 19:13:19 +0000 (19:13 +0000)
committer	Peter Zijlstra <peterz@infradead.org>
	Fri, 3 Apr 2026 12:23:39 +0000 (14:23 +0200)
include/linux/sched.h		patch \| blob \| blame \| history
init/init_task.c		patch \| blob \| blame \| history
kernel/fork.c		patch \| blob \| blame \| history
kernel/locking/mutex-debug.c		patch \| blob \| blame \| history
kernel/locking/mutex.c		patch \| blob \| blame \| history
kernel/locking/mutex.h		patch \| blob \| blame \| history
kernel/locking/ww_mutex.h		patch \| blob \| blame \| history
kernel/sched/core.c		patch \| blob \| blame \| history