]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfs: don't keep a reference for buffers on the LRU
authorChristoph Hellwig <hch@lst.de>
Mon, 23 Mar 2026 07:50:51 +0000 (08:50 +0100)
committerCarlos Maiolino <cem@kernel.org>
Mon, 30 Mar 2026 14:34:05 +0000 (16:34 +0200)
Currently the buffer cache adds a reference to b_hold for buffers that
are on the LRU.  This seems to go all the way back and allows releasing
buffers from the LRU using xfs_buf_rele.  But it makes xfs_buf_rele
really complicated in differs from how other LRUs are implemented in
Linux.

Switch to not having a reference for buffers in the LRU, and use a
separate negative hold value to mark buffers as dead.  This simplifies
xfs_buf_rele, which now just deal with the last "real" reference,
and prepares for using the lockref primitive.

This also removes the b_lock protection for removing buffers from the
buffer hash.  This is the desired outcome because the rhashtable is
fully internally synchronized, and previously the lock was mostly
held out of ordering constrains in xfs_buf_rele_cached.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h

index d2f3c50d80e70fd06c395116c4275cf1f57f4e04..61e393ac495282839c6d9732863e49684648410e 100644 (file)
@@ -80,11 +80,8 @@ xfs_buf_stale(
 
        spin_lock(&bp->b_lock);
        atomic_set(&bp->b_lru_ref, 0);
-       if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
-           (list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru)))
-               bp->b_hold--;
-
-       ASSERT(bp->b_hold >= 1);
+       if (bp->b_hold >= 0)
+               list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
        spin_unlock(&bp->b_lock);
 }
 
@@ -442,7 +439,7 @@ xfs_buf_try_hold(
        struct xfs_buf          *bp)
 {
        spin_lock(&bp->b_lock);
-       if (bp->b_hold == 0) {
+       if (bp->b_hold == -1) {
                spin_unlock(&bp->b_lock);
                return false;
        }
@@ -862,76 +859,24 @@ xfs_buf_hold(
 }
 
 static void
-xfs_buf_rele_uncached(
-       struct xfs_buf          *bp)
-{
-       ASSERT(list_empty(&bp->b_lru));
-
-       spin_lock(&bp->b_lock);
-       if (--bp->b_hold) {
-               spin_unlock(&bp->b_lock);
-               return;
-       }
-       spin_unlock(&bp->b_lock);
-       xfs_buf_free(bp);
-}
-
-static void
-xfs_buf_rele_cached(
+xfs_buf_destroy(
        struct xfs_buf          *bp)
 {
-       struct xfs_buftarg      *btp = bp->b_target;
-       struct xfs_perag        *pag = bp->b_pag;
-       struct xfs_buf_cache    *bch = xfs_buftarg_buf_cache(btp, pag);
-       bool                    freebuf = false;
-
-       trace_xfs_buf_rele(bp, _RET_IP_);
-
-       spin_lock(&bp->b_lock);
-       ASSERT(bp->b_hold >= 1);
-       if (bp->b_hold > 1) {
-               bp->b_hold--;
-               goto out_unlock;
-       }
+       ASSERT(bp->b_hold < 0);
+       ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
 
-       /* we are asked to drop the last reference */
-       if (atomic_read(&bp->b_lru_ref)) {
-               /*
-                * If the buffer is added to the LRU, keep the reference to the
-                * buffer for the LRU and clear the (now stale) dispose list
-                * state flag, else drop the reference.
-                */
-               if (list_lru_add_obj(&btp->bt_lru, &bp->b_lru))
-                       bp->b_state &= ~XFS_BSTATE_DISPOSE;
-               else
-                       bp->b_hold--;
-       } else {
-               bp->b_hold--;
-               /*
-                * most of the time buffers will already be removed from the
-                * LRU, so optimise that case by checking for the
-                * XFS_BSTATE_DISPOSE flag indicating the last list the buffer
-                * was on was the disposal list
-                */
-               if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
-                       list_lru_del_obj(&btp->bt_lru, &bp->b_lru);
-               } else {
-                       ASSERT(list_empty(&bp->b_lru));
-               }
+       if (!xfs_buf_is_uncached(bp)) {
+               struct xfs_buf_cache    *bch =
+                       xfs_buftarg_buf_cache(bp->b_target, bp->b_pag);
 
-               ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
                rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
                                xfs_buf_hash_params);
-               if (pag)
-                       xfs_perag_put(pag);
-               freebuf = true;
-       }
 
-out_unlock:
-       spin_unlock(&bp->b_lock);
+               if (bp->b_pag)
+                       xfs_perag_put(bp->b_pag);
+       }
 
-       if (freebuf)
-               xfs_buf_free(bp);
+       xfs_buf_free(bp);
 }
 
 /*
@@ -942,10 +887,22 @@ xfs_buf_rele(
        struct xfs_buf          *bp)
 {
        trace_xfs_buf_rele(bp, _RET_IP_);
-       if (xfs_buf_is_uncached(bp))
-               xfs_buf_rele_uncached(bp);
-       else
-               xfs_buf_rele_cached(bp);
+
+       spin_lock(&bp->b_lock);
+       if (!--bp->b_hold) {
+               if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
+                       goto kill;
+               list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
+       }
+       spin_unlock(&bp->b_lock);
+       return;
+
+kill:
+       bp->b_hold = -1;
+       list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
+       spin_unlock(&bp->b_lock);
+
+       xfs_buf_destroy(bp);
 }
 
 /*
@@ -1254,9 +1211,11 @@ xfs_buf_ioerror_alert(
 
 /*
  * To simulate an I/O failure, the buffer must be locked and held with at least
- * three references. The LRU reference is dropped by the stale call. The buf
- * item reference is dropped via ioend processing. The third reference is owned
- * by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC.
+ * two references.
+ *
+ * The buf item reference is dropped via ioend processing. The second reference
+ * is owned by the caller and is dropped on I/O completion if the buffer is
+ * XBF_ASYNC.
  */
 void
 xfs_buf_ioend_fail(
@@ -1514,19 +1473,14 @@ xfs_buftarg_drain_rele(
 
        if (!spin_trylock(&bp->b_lock))
                return LRU_SKIP;
-       if (bp->b_hold > 1) {
+       if (bp->b_hold > 0) {
                /* need to wait, so skip it this pass */
                spin_unlock(&bp->b_lock);
                trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
                return LRU_SKIP;
        }
 
-       /*
-        * clear the LRU reference count so the buffer doesn't get
-        * ignored in xfs_buf_rele().
-        */
-       atomic_set(&bp->b_lru_ref, 0);
-       bp->b_state |= XFS_BSTATE_DISPOSE;
+       bp->b_hold = -1;
        list_lru_isolate_move(lru, item, dispose);
        spin_unlock(&bp->b_lock);
        return LRU_REMOVED;
@@ -1581,7 +1535,7 @@ xfs_buftarg_drain(
 "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
                                        (long long)xfs_buf_daddr(bp));
                        }
-                       xfs_buf_rele(bp);
+                       xfs_buf_destroy(bp);
                }
                if (loop++ != 0)
                        delay(100);
@@ -1625,7 +1579,17 @@ xfs_buftarg_isolate(
                return LRU_ROTATE;
        }
 
-       bp->b_state |= XFS_BSTATE_DISPOSE;
+       /*
+        * If the buffer is in use, remove it from the LRU for now as we can't
+        * free it.  It will be freed when the last reference drops.
+        */
+       if (bp->b_hold > 0) {
+               list_lru_isolate(lru, &bp->b_lru);
+               spin_unlock(&bp->b_lock);
+               return LRU_REMOVED;
+       }
+
+       bp->b_hold = -1;
        list_lru_isolate_move(lru, item, dispose);
        spin_unlock(&bp->b_lock);
        return LRU_REMOVED;
@@ -1647,7 +1611,7 @@ xfs_buftarg_shrink_scan(
                struct xfs_buf *bp;
                bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
                list_del_init(&bp->b_lru);
-               xfs_buf_rele(bp);
+               xfs_buf_destroy(bp);
        }
 
        return freed;
index e25cd2a160f31c9887b4280f617dc5f5fc2391c7..e7324d58bd96b26061bf2ae045c3e63cffec149f 100644 (file)
@@ -68,11 +68,6 @@ typedef unsigned int xfs_buf_flags_t;
        { XBF_INCORE,           "INCORE" }, \
        { XBF_TRYLOCK,          "TRYLOCK" }
 
-/*
- * Internal state flags.
- */
-#define XFS_BSTATE_DISPOSE      (1 << 0)       /* buffer being discarded */
-
 struct xfs_buf_cache {
        struct rhashtable       bc_hash;
 };
@@ -159,7 +154,7 @@ struct xfs_buf {
 
        xfs_daddr_t             b_rhash_key;    /* buffer cache index */
        int                     b_length;       /* size of buffer in BBs */
-       unsigned int            b_hold;         /* reference count */
+       int                     b_hold;         /* reference count */
        atomic_t                b_lru_ref;      /* lru reclaim ref count */
        xfs_buf_flags_t         b_flags;        /* status flags */
        struct semaphore        b_sema;         /* semaphore for lockables */
@@ -170,7 +165,6 @@ struct xfs_buf {
         */
        struct list_head        b_lru;          /* lru list */
        spinlock_t              b_lock;         /* internal state lock */
-       unsigned int            b_state;        /* internal state flags */
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
        struct xfs_perag        *b_pag;