xfs: avoid dquot buffer pin deadlock

author Dave Chinner <dchinner@redhat.com>

Wed, 25 Jun 2025 22:48:56 +0000 (08:48 +1000)

committer Carlos Maiolino <cem@kernel.org>

Fri, 27 Jun 2025 12:14:37 +0000 (14:14 +0200)
author Dave Chinner <dchinner@redhat.com>
Wed, 25 Jun 2025 22:48:56 +0000 (08:48 +1000)
committer Carlos Maiolino <cem@kernel.org>
Fri, 27 Jun 2025 12:14:37 +0000 (14:14 +0200)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index 8af83bd161f90d9803236b26c7145042155590ee..ba5bd6031ece3c09e83f3bd9212db192cb0bdffc 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit(
         return error;
  }
  
-/*
- * Push a single buffer on a delwri queue.
- *
- * The purpose of this function is to submit a single buffer of a delwri queue
- * and return with the buffer still on the original queue.
- *
- * The buffer locking and queue management logic between _delwri_pushbuf() and
- * _delwri_queue() guarantee that the buffer cannot be queued to another list
- * before returning.
- */
-int
-xfs_buf_delwri_pushbuf(
-       struct xfs_buf          *bp,
-       struct list_head        *buffer_list)
-{
-       int                     error;
-
-       ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
-       trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
-
-       xfs_buf_lock(bp);
-       bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
-       bp->b_flags |= XBF_WRITE;
-       xfs_buf_submit(bp);
-
-       /*
-        * The buffer is now locked, under I/O but still on the original delwri
-        * queue. Wait for I/O completion, restore the DELWRI_Q flag and
-        * return with the buffer unlocked and still on the original queue.
-        */
-       error = xfs_buf_iowait(bp);
-       bp->b_flags |= _XBF_DELWRI_Q;
-       xfs_buf_unlock(bp);
-
-       return error;
-}
-
  void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
  {
         /*
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h

index 9d2ab567cf814f103eccbf5bbdae938f878625a4..15fc569483465d2e8154fcd680e11416592e52f8 100644 (file)
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
  void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
  extern int xfs_buf_delwri_submit(struct list_head *);
  extern int xfs_buf_delwri_submit_nowait(struct list_head *);
-extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
  
  static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
  {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c

index b4e32f0860b7e68a97e8887f6d8ba38ad4ba48be..0bd8022e47b4ff6fd705a6c1e86343e163b114de 100644 (file)
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1398,11 +1398,9 @@ xfs_qm_dqflush(
  
         ASSERT(XFS_DQ_IS_LOCKED(dqp));
         ASSERT(!completion_done(&dqp->q_flush));
+       ASSERT(atomic_read(&dqp->q_pincount) == 0);
  
         trace_xfs_dqflush(dqp);
-
-       xfs_qm_dqunpin_wait(dqp);
-
         fa = xfs_qm_dqflush_check(dqp);
         if (fa) {
                 xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index 417439b587854a1fd6deebbe5e5fa0314a3e98e9..fa135ac264710afc442f8fc34cf6ce5696d2065b 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,6 +134,7 @@ xfs_qm_dqpurge(
  
         dqp->q_flags |= XFS_DQFLAG_FREEING;
  
+       xfs_qm_dqunpin_wait(dqp);
         xfs_dqflock(dqp);
  
         /*
@@ -465,6 +466,7 @@ xfs_qm_dquot_isolate(
         struct xfs_dquot        *dqp = container_of(item,
                                                 struct xfs_dquot, q_lru);
         struct xfs_qm_isolate   *isol = arg;
+       enum lru_status         ret = LRU_SKIP;
  
         if (!xfs_dqlock_nowait(dqp))
                 goto out_miss_busy;
@@ -477,6 +479,16 @@ xfs_qm_dquot_isolate(
         if (dqp->q_flags & XFS_DQFLAG_FREEING)
                 goto out_miss_unlock;
  
+       /*
+        * If the dquot is pinned or dirty, rotate it to the end of the LRU to
+        * give some time for it to be cleaned before we try to isolate it
+        * again.
+        */
+       ret = LRU_ROTATE;
+       if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) {
+               goto out_miss_unlock;
+       }
+
         /*
          * This dquot has acquired a reference in the meantime remove it from
          * the freelist and try again.
@@ -492,41 +504,14 @@ xfs_qm_dquot_isolate(
         }
  
         /*
-        * If the dquot is dirty, flush it. If it's already being flushed, just
-        * skip it so there is time for the IO to complete before we try to
-        * reclaim it again on the next LRU pass.
+        * The dquot may still be under IO, in which case the flush lock will be
+        * held. If we can't get the flush lock now, just skip over the dquot as
+        * if it was dirty.
          */
         if (!xfs_dqflock_nowait(dqp))
                 goto out_miss_unlock;
  
-       if (XFS_DQ_IS_DIRTY(dqp)) {
-               struct xfs_buf  *bp = NULL;
-               int             error;
-
-               trace_xfs_dqreclaim_dirty(dqp);
-
-               /* we have to drop the LRU lock to flush the dquot */
-               spin_unlock(&lru->lock);
-
-               error = xfs_dquot_use_attached_buf(dqp, &bp);
-               if (!bp || error == -EAGAIN) {
-                       xfs_dqfunlock(dqp);
-                       goto out_unlock_dirty;
-               }
-
-               /*
-                * dqflush completes dqflock on error, and the delwri ioend
-                * does it on success.
-                */
-               error = xfs_qm_dqflush(dqp, bp);
-               if (error)
-                       goto out_unlock_dirty;
-
-               xfs_buf_delwri_queue(bp, &isol->buffers);
-               xfs_buf_relse(bp);
-               goto out_unlock_dirty;
-       }
-
+       ASSERT(!XFS_DQ_IS_DIRTY(dqp));
         xfs_dquot_detach_buf(dqp);
         xfs_dqfunlock(dqp);
  
@@ -548,13 +533,7 @@ out_miss_unlock:
  out_miss_busy:
         trace_xfs_dqreclaim_busy(dqp);
         XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
-       return LRU_SKIP;
-
-out_unlock_dirty:
-       trace_xfs_dqreclaim_busy(dqp);
-       XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
-       xfs_dqunlock(dqp);
-       return LRU_RETRY;
+       return ret;
  }
  
  static unsigned long
@@ -1486,7 +1465,6 @@ xfs_qm_flush_one(
         struct xfs_dquot        *dqp,
         void                    *data)
  {
-       struct xfs_mount        *mp = dqp->q_mount;
         struct list_head        *buffer_list = data;
         struct xfs_buf          *bp = NULL;
         int                     error = 0;
@@ -1497,34 +1475,8 @@ xfs_qm_flush_one(
         if (!XFS_DQ_IS_DIRTY(dqp))
                 goto out_unlock;
  
-       /*
-        * The only way the dquot is already flush locked by the time quotacheck
-        * gets here is if reclaim flushed it before the dqadjust walk dirtied
-        * it for the final time. Quotacheck collects all dquot bufs in the
-        * local delwri queue before dquots are dirtied, so reclaim can't have
-        * possibly queued it for I/O. The only way out is to push the buffer to
-        * cycle the flush lock.
-        */
-       if (!xfs_dqflock_nowait(dqp)) {
-               /* buf is pinned in-core by delwri list */
-               error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
-                               mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-               if (error)
-                       goto out_unlock;
-
-               if (!(bp->b_flags & _XBF_DELWRI_Q)) {
-                       error = -EAGAIN;
-                       xfs_buf_relse(bp);
-                       goto out_unlock;
-               }
-               xfs_buf_unlock(bp);
-
-               xfs_buf_delwri_pushbuf(bp, buffer_list);
-               xfs_buf_rele(bp);
-
-               error = -EAGAIN;
-               goto out_unlock;
-       }
+       xfs_qm_dqunpin_wait(dqp);
+       xfs_dqflock(dqp);
  
         error = xfs_dquot_use_attached_buf(dqp, &bp);
         if (error)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 01d284a1c75961a528dd4386a1c4ac9c005b535d..9f0d6bc966b745daa8b6d7fb31ef0f6131358929 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
  DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
  DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
  DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
  DEFINE_BUF_EVENT(xfs_buf_get_uncached);
  DEFINE_BUF_EVENT(xfs_buf_item_relse);
  DEFINE_BUF_EVENT(xfs_buf_iodone_async);
author	Dave Chinner <dchinner@redhat.com>
	Wed, 25 Jun 2025 22:48:56 +0000 (08:48 +1000)
committer	Carlos Maiolino <cem@kernel.org>
	Fri, 27 Jun 2025 12:14:37 +0000 (14:14 +0200)
fs/xfs/xfs_buf.c		patch \| blob \| blame \| history
fs/xfs/xfs_buf.h		patch \| blob \| blame \| history
fs/xfs/xfs_dquot.c		patch \| blob \| blame \| history
fs/xfs/xfs_qm.c		patch \| blob \| blame \| history
fs/xfs/xfs_trace.h		patch \| blob \| blame \| history