]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
rqspinlock: Protect pending bit owners from stalls
authorKumar Kartikeya Dwivedi <memxor@gmail.com>
Sun, 16 Mar 2025 04:05:25 +0000 (21:05 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Wed, 19 Mar 2025 15:03:05 +0000 (08:03 -0700)
The pending bit is used to avoid queueing in case the lock is
uncontended, and has demonstrated benefits for the 2 contender scenario,
esp. on x86. In case the pending bit is acquired and we wait for the
locked bit to disappear, we may get stuck due to the lock owner not
making progress. Hence, this waiting loop must be protected with a
timeout check.

To perform a graceful recovery once we decide to abort our lock
acquisition attempt in this case, we must unset the pending bit since we
own it. All waiters undoing their changes and exiting gracefully allows
the lock word to be restored to the unlocked state once all participants
(owner, waiters) have been recovered, and the lock remains usable.
Hence, set the pending bit back to zero before returning to the caller.

Introduce a lockevent (rqspinlock_lock_timeout) to capture timeout
event statistics.

Reviewed-by: Barret Rhoden <brho@google.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20250316040541.108729-10-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/asm-generic/rqspinlock.h
kernel/bpf/rqspinlock.c
kernel/locking/lock_events_list.h

index 5dd4dd8aee69e8742f95a9115b6d42f7dd700ecb..9bd11cb7acd60c7831600b1a036f076d31f9b0db 100644 (file)
@@ -15,7 +15,7 @@
 struct qspinlock;
 typedef struct qspinlock rqspinlock_t;
 
-extern void resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val);
+extern int resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val);
 
 /*
  * Default timeout for waiting loops is 0.25 seconds
index d429b923b58f352e7d23d070629130e293b29527..262294cfd36f402fd9dd577a50e2d3e1e18d6877 100644 (file)
@@ -138,6 +138,10 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]);
  * @lock: Pointer to queued spinlock structure
  * @val: Current value of the queued spinlock 32-bit word
  *
+ * Return:
+ * * 0         - Lock was acquired successfully.
+ * * -ETIMEDOUT - Lock acquisition failed because of timeout.
+ *
  * (queue tail, pending bit, lock value)
  *
  *              fast     :    slow                                  :    unlock
@@ -154,12 +158,12 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]);
  * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
  *   queue               :         ^--'                             :
  */
-void __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
+int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
 {
        struct mcs_spinlock *prev, *next, *node;
        struct rqspinlock_timeout ts;
+       int idx, ret = 0;
        u32 old, tail;
-       int idx;
 
        BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
@@ -217,8 +221,25 @@ void __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
         * clear_pending_set_locked() implementations imply full
         * barriers.
         */
-       if (val & _Q_LOCKED_MASK)
-               smp_cond_load_acquire(&lock->locked, !VAL);
+       if (val & _Q_LOCKED_MASK) {
+               RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
+               res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret));
+       }
+
+       if (ret) {
+               /*
+                * We waited for the locked bit to go back to 0, as the pending
+                * waiter, but timed out. We need to clear the pending bit since
+                * we own it. Once a stuck owner has been recovered, the lock
+                * must be restored to a valid state, hence removing the pending
+                * bit is necessary.
+                *
+                * *,1,* -> *,0,*
+                */
+               clear_pending(lock);
+               lockevent_inc(rqspinlock_lock_timeout);
+               return ret;
+       }
 
        /*
         * take ownership and clear the pending bit.
@@ -227,7 +248,7 @@ void __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
         */
        clear_pending_set_locked(lock);
        lockevent_inc(lock_pending);
-       return;
+       return 0;
 
        /*
         * End of pending bit optimistic spinning and beginning of MCS
@@ -378,5 +399,6 @@ release:
         * release the node
         */
        __this_cpu_dec(rqnodes[0].mcs.count);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath);
index 97fb6f3f840aa7e5d5a0baca616a913f397548ff..c5286249994d0c8a18533397ec24c8e26c110587 100644 (file)
@@ -49,6 +49,11 @@ LOCK_EVENT(lock_use_node4)   /* # of locking ops that use 4th percpu node */
 LOCK_EVENT(lock_no_node)       /* # of locking ops w/o using percpu node    */
 #endif /* CONFIG_QUEUED_SPINLOCKS */
 
+/*
+ * Locking events for Resilient Queued Spin Lock
+ */
+LOCK_EVENT(rqspinlock_lock_timeout)    /* # of locking ops that timeout        */
+
 /*
  * Locking events for rwsem
  */