]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
xfs: use a lockref for the buffer reference count
authorChristoph Hellwig <hch@lst.de>
Mon, 23 Mar 2026 07:50:52 +0000 (08:50 +0100)
committerCarlos Maiolino <cem@kernel.org>
Mon, 30 Mar 2026 14:34:05 +0000 (16:34 +0200)
The lockref structure allows incrementing/decrementing counters like
an atomic_t for the fast path, while still allowing complex slow path
operations as if the counter was protected by a lock.  The only slow
path operations that actually need to take the lock are the final
put, LRU evictions and marking a buffer stale.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_trace.h

index 61e393ac495282839c6d9732863e49684648410e..d53a1bdbc789c3f836330e329acd631c0f4a3480 100644 (file)
@@ -31,20 +31,20 @@ struct kmem_cache *xfs_buf_cache;
  *
  * xfs_buf_stale:
  *     b_sema (caller holds)
- *       b_lock
+ *       b_lockref.lock
  *         lru_lock
  *
  * xfs_buf_rele:
- *     b_lock
+ *     b_lockref.lock
  *       lru_lock
  *
  * xfs_buftarg_drain_rele
  *     lru_lock
- *       b_lock (trylock due to inversion)
+ *       b_lockref.lock (trylock due to inversion)
  *
  * xfs_buftarg_isolate
  *     lru_lock
- *       b_lock (trylock due to inversion)
+ *       b_lockref.lock (trylock due to inversion)
  */
 
 static void xfs_buf_submit(struct xfs_buf *bp);
@@ -78,11 +78,11 @@ xfs_buf_stale(
         */
        bp->b_flags &= ~_XBF_DELWRI_Q;
 
-       spin_lock(&bp->b_lock);
+       spin_lock(&bp->b_lockref.lock);
        atomic_set(&bp->b_lru_ref, 0);
-       if (bp->b_hold >= 0)
+       if (!__lockref_is_dead(&bp->b_lockref))
                list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
-       spin_unlock(&bp->b_lock);
+       spin_unlock(&bp->b_lockref.lock);
 }
 
 static void
@@ -274,10 +274,8 @@ xfs_buf_alloc(
         * inserting into the hash table are safe (and will have to wait for
         * the unlock to do anything non-trivial).
         */
-       bp->b_hold = 1;
+       lockref_init(&bp->b_lockref);
        sema_init(&bp->b_sema, 0); /* held, no waiters */
-
-       spin_lock_init(&bp->b_lock);
        atomic_set(&bp->b_lru_ref, 1);
        init_completion(&bp->b_iowait);
        INIT_LIST_HEAD(&bp->b_lru);
@@ -434,20 +432,6 @@ xfs_buf_find_lock(
        return 0;
 }
 
-static bool
-xfs_buf_try_hold(
-       struct xfs_buf          *bp)
-{
-       spin_lock(&bp->b_lock);
-       if (bp->b_hold == -1) {
-               spin_unlock(&bp->b_lock);
-               return false;
-       }
-       bp->b_hold++;
-       spin_unlock(&bp->b_lock);
-       return true;
-}
-
 static inline int
 xfs_buf_lookup(
        struct xfs_buf_cache    *bch,
@@ -460,7 +444,7 @@ xfs_buf_lookup(
 
        rcu_read_lock();
        bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
-       if (!bp || !xfs_buf_try_hold(bp)) {
+       if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
                rcu_read_unlock();
                return -ENOENT;
        }
@@ -511,7 +495,7 @@ xfs_buf_find_insert(
                error = PTR_ERR(bp);
                goto out_free_buf;
        }
-       if (bp && xfs_buf_try_hold(bp)) {
+       if (bp && lockref_get_not_dead(&bp->b_lockref)) {
                /* found an existing buffer */
                rcu_read_unlock();
                error = xfs_buf_find_lock(bp, flags);
@@ -853,16 +837,14 @@ xfs_buf_hold(
 {
        trace_xfs_buf_hold(bp, _RET_IP_);
 
-       spin_lock(&bp->b_lock);
-       bp->b_hold++;
-       spin_unlock(&bp->b_lock);
+       lockref_get(&bp->b_lockref);
 }
 
 static void
 xfs_buf_destroy(
        struct xfs_buf          *bp)
 {
-       ASSERT(bp->b_hold < 0);
+       ASSERT(__lockref_is_dead(&bp->b_lockref));
        ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
 
        if (!xfs_buf_is_uncached(bp)) {
@@ -888,19 +870,20 @@ xfs_buf_rele(
 {
        trace_xfs_buf_rele(bp, _RET_IP_);
 
-       spin_lock(&bp->b_lock);
-       if (!--bp->b_hold) {
+       if (lockref_put_or_lock(&bp->b_lockref))
+               return;
+       if (!--bp->b_lockref.count) {
                if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
                        goto kill;
                list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
        }
-       spin_unlock(&bp->b_lock);
+       spin_unlock(&bp->b_lockref.lock);
        return;
 
 kill:
-       bp->b_hold = -1;
+       lockref_mark_dead(&bp->b_lockref);
        list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
-       spin_unlock(&bp->b_lock);
+       spin_unlock(&bp->b_lockref.lock);
 
        xfs_buf_destroy(bp);
 }
@@ -1471,18 +1454,18 @@ xfs_buftarg_drain_rele(
        struct xfs_buf          *bp = container_of(item, struct xfs_buf, b_lru);
        struct list_head        *dispose = arg;
 
-       if (!spin_trylock(&bp->b_lock))
+       if (!spin_trylock(&bp->b_lockref.lock))
                return LRU_SKIP;
-       if (bp->b_hold > 0) {
+       if (bp->b_lockref.count > 0) {
                /* need to wait, so skip it this pass */
-               spin_unlock(&bp->b_lock);
+               spin_unlock(&bp->b_lockref.lock);
                trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
                return LRU_SKIP;
        }
 
-       bp->b_hold = -1;
+       lockref_mark_dead(&bp->b_lockref);
        list_lru_isolate_move(lru, item, dispose);
-       spin_unlock(&bp->b_lock);
+       spin_unlock(&bp->b_lockref.lock);
        return LRU_REMOVED;
 }
 
@@ -1564,18 +1547,19 @@ xfs_buftarg_isolate(
        struct list_head        *dispose = arg;
 
        /*
-        * we are inverting the lru lock/bp->b_lock here, so use a trylock.
-        * If we fail to get the lock, just skip it.
+        * We are inverting the lru lock vs bp->b_lockref.lock order here, so
+        * use a trylock.  If we fail to get the lock, just skip the buffer.
         */
-       if (!spin_trylock(&bp->b_lock))
+       if (!spin_trylock(&bp->b_lockref.lock))
                return LRU_SKIP;
+
        /*
         * Decrement the b_lru_ref count unless the value is already
         * zero. If the value is already zero, we need to reclaim the
         * buffer, otherwise it gets another trip through the LRU.
         */
        if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
-               spin_unlock(&bp->b_lock);
+               spin_unlock(&bp->b_lockref.lock);
                return LRU_ROTATE;
        }
 
@@ -1583,15 +1567,15 @@ xfs_buftarg_isolate(
         * If the buffer is in use, remove it from the LRU for now as we can't
         * free it.  It will be freed when the last reference drops.
         */
-       if (bp->b_hold > 0) {
+       if (bp->b_lockref.count > 0) {
                list_lru_isolate(lru, &bp->b_lru);
-               spin_unlock(&bp->b_lock);
+               spin_unlock(&bp->b_lockref.lock);
                return LRU_REMOVED;
        }
 
-       bp->b_hold = -1;
+       lockref_mark_dead(&bp->b_lockref);
        list_lru_isolate_move(lru, item, dispose);
-       spin_unlock(&bp->b_lock);
+       spin_unlock(&bp->b_lockref.lock);
        return LRU_REMOVED;
 }
 
index e7324d58bd96b26061bf2ae045c3e63cffec149f..3a1d066e1c13f3895a3e125c91be8fc97a00efcc 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/dax.h>
 #include <linux/uio.h>
 #include <linux/list_lru.h>
+#include <linux/lockref.h>
 
 extern struct kmem_cache *xfs_buf_cache;
 
@@ -154,7 +155,7 @@ struct xfs_buf {
 
        xfs_daddr_t             b_rhash_key;    /* buffer cache index */
        int                     b_length;       /* size of buffer in BBs */
-       int                     b_hold;         /* reference count */
+       struct lockref          b_lockref;      /* refcount + lock */
        atomic_t                b_lru_ref;      /* lru reclaim ref count */
        xfs_buf_flags_t         b_flags;        /* status flags */
        struct semaphore        b_sema;         /* semaphore for lockables */
@@ -164,7 +165,6 @@ struct xfs_buf {
         * bt_lru_lock and not by b_sema
         */
        struct list_head        b_lru;          /* lru list */
-       spinlock_t              b_lock;         /* internal state lock */
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
        struct xfs_perag        *b_pag;
index 5e8190fe2be9365ed18b656e645abf0bef6eb8ac..60d1e605dfa5b5dd3e4bbf46cd441f5416a05d78 100644 (file)
@@ -740,7 +740,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
                __entry->dev = bp->b_target->bt_dev;
                __entry->bno = xfs_buf_daddr(bp);
                __entry->nblks = bp->b_length;
-               __entry->hold = bp->b_hold;
+               __entry->hold = bp->b_lockref.count;
                __entry->pincount = atomic_read(&bp->b_pin_count);
                __entry->lockval = bp->b_sema.count;
                __entry->flags = bp->b_flags;
@@ -814,7 +814,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
                __entry->bno = xfs_buf_daddr(bp);
                __entry->length = bp->b_length;
                __entry->flags = flags;
-               __entry->hold = bp->b_hold;
+               __entry->hold = bp->b_lockref.count;
                __entry->pincount = atomic_read(&bp->b_pin_count);
                __entry->lockval = bp->b_sema.count;
                __entry->caller_ip = caller_ip;
@@ -858,7 +858,7 @@ TRACE_EVENT(xfs_buf_ioerror,
                __entry->dev = bp->b_target->bt_dev;
                __entry->bno = xfs_buf_daddr(bp);
                __entry->length = bp->b_length;
-               __entry->hold = bp->b_hold;
+               __entry->hold = bp->b_lockref.count;
                __entry->pincount = atomic_read(&bp->b_pin_count);
                __entry->lockval = bp->b_sema.count;
                __entry->error = error;
@@ -902,7 +902,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
                __entry->buf_bno = xfs_buf_daddr(bip->bli_buf);
                __entry->buf_len = bip->bli_buf->b_length;
                __entry->buf_flags = bip->bli_buf->b_flags;
-               __entry->buf_hold = bip->bli_buf->b_hold;
+               __entry->buf_hold = bip->bli_buf->b_lockref.count;
                __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
                __entry->buf_lockval = bip->bli_buf->b_sema.count;
                __entry->li_flags = bip->bli_item.li_flags;
@@ -5206,7 +5206,7 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
                __entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
                __entry->bno = xfs_buf_daddr(bp);
                __entry->nblks = bp->b_length;
-               __entry->hold = bp->b_hold;
+               __entry->hold = bp->b_lockref.count;
                __entry->pincount = atomic_read(&bp->b_pin_count);
                __entry->lockval = bp->b_sema.count;
                __entry->flags = bp->b_flags;