xfs: fix unmount hang with unflushable inodes stuck in the AIL

author Dave Chinner <dchinner@redhat.com>

Wed, 25 Jun 2025 22:49:00 +0000 (08:49 +1000)

committer Carlos Maiolino <cem@kernel.org>

Fri, 27 Jun 2025 12:14:37 +0000 (14:14 +0200)
author Dave Chinner <dchinner@redhat.com>
Wed, 25 Jun 2025 22:49:00 +0000 (08:49 +1000)
committer Carlos Maiolino <cem@kernel.org>
Fri, 27 Jun 2025 12:14:37 +0000 (14:14 +0200)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index c95826863c82b3f431fa300c3cd88277f72304df..7fc54725c5f6cc76188e1ac0329dd374229450c8 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -612,43 +612,42 @@ xfs_buf_item_push(
   * Drop the buffer log item refcount and take appropriate action. This helper
   * determines whether the bli must be freed or not, since a decrement to zero
   * does not necessarily mean the bli is unused.
- *
- * Return true if the bli is freed, false otherwise.
   */
-bool
+void
  xfs_buf_item_put(
         struct xfs_buf_log_item *bip)
  {
-       struct xfs_log_item     *lip = &bip->bli_item;
-       bool                    aborted;
-       bool                    dirty;
+
+       ASSERT(xfs_buf_islocked(bip->bli_buf));
  
         /* drop the bli ref and return if it wasn't the last one */
         if (!atomic_dec_and_test(&bip->bli_refcount))
-               return false;
+               return;
  
-       /*
-        * We dropped the last ref and must free the item if clean or aborted.
-        * If the bli is dirty and non-aborted, the buffer was clean in the
-        * transaction but still awaiting writeback from previous changes. In
-        * that case, the bli is freed on buffer writeback completion.
-        */
-       aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
-                       xlog_is_shutdown(lip->li_log);
-       dirty = bip->bli_flags & XFS_BLI_DIRTY;
-       if (dirty && !aborted)
-               return false;
+       /* If the BLI is in the AIL, then it is still dirty and in use */
+       if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) {
+               ASSERT(bip->bli_flags & XFS_BLI_DIRTY);
+               return;
+       }
  
         /*
-        * The bli is aborted or clean. An aborted item may be in the AIL
-        * regardless of dirty state.  For example, consider an aborted
-        * transaction that invalidated a dirty bli and cleared the dirty
-        * state.
+        * In shutdown conditions, we can be asked to free a dirty BLI that
+        * isn't in the AIL. This can occur due to a checkpoint aborting a BLI
+        * instead of inserting it into the AIL at checkpoint IO completion. If
+        * there's another bli reference (e.g. a btree cursor holds a clean
+        * reference) and it is released via xfs_trans_brelse(), we can get here
+        * with that aborted, dirty BLI. In this case, it is safe to free the
+        * dirty BLI immediately, as it is not in the AIL and there are no
+        * other references to it.
+        *
+        * We should never get here with a stale BLI via that path as
+        * xfs_trans_brelse() specifically holds onto stale buffers rather than
+        * releasing them.
          */
-       if (aborted)
-               xfs_trans_ail_delete(lip, 0);
+       ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) ||
+                       test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags));
+       ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
         xfs_buf_item_relse(bip);
-       return true;
  }
  
  /*
@@ -669,6 +668,15 @@ xfs_buf_item_put(
   * if necessary but do not unlock the buffer.  This is for support of
   * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
   * free the item.
+ *
+ * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must*
+ * perform a completion abort of any objects attached to the buffer for IO
+ * tracking purposes. This generally only happens in shutdown situations,
+ * normally xfs_buf_item_unpin() will drop the last BLI reference and perform
+ * completion processing. However, because transaction completion can race with
+ * checkpoint completion during a shutdown, this release context may end up
+ * being the last active reference to the BLI and so needs to perform this
+ * cleanup.
   */
  STATIC void
  xfs_buf_item_release(
@@ -676,18 +684,19 @@ xfs_buf_item_release(
  {
         struct xfs_buf_log_item *bip = BUF_ITEM(lip);
         struct xfs_buf          *bp = bip->bli_buf;
-       bool                    released;
         bool                    hold = bip->bli_flags & XFS_BLI_HOLD;
         bool                    stale = bip->bli_flags & XFS_BLI_STALE;
-#if defined(DEBUG) || defined(XFS_WARN)
-       bool                    ordered = bip->bli_flags & XFS_BLI_ORDERED;
-       bool                    dirty = bip->bli_flags & XFS_BLI_DIRTY;
         bool                    aborted = test_bit(XFS_LI_ABORTED,
                                                    &lip->li_flags);
+       bool                    dirty = bip->bli_flags & XFS_BLI_DIRTY;
+#if defined(DEBUG) || defined(XFS_WARN)
+       bool                    ordered = bip->bli_flags & XFS_BLI_ORDERED;
  #endif
  
         trace_xfs_buf_item_release(bip);
  
+       ASSERT(xfs_buf_islocked(bp));
+
         /*
          * The bli dirty state should match whether the blf has logged segments
          * except for ordered buffers, where only the bli should be dirty.
@@ -703,16 +712,56 @@ xfs_buf_item_release(
         bp->b_transp = NULL;
         bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
  
+       /* If there are other references, then we have nothing to do. */
+       if (!atomic_dec_and_test(&bip->bli_refcount))
+               goto out_release;
+
+       /*
+        * Stale buffer completion frees the BLI, unlocks and releases the
+        * buffer. Neither the BLI or buffer are safe to reference after this
+        * call, so there's nothing more we need to do here.
+        *
+        * If we get here with a stale buffer and references to the BLI remain,
+        * we must not unlock the buffer as the last BLI reference owns lock
+        * context, not us.
+        */
+       if (stale) {
+               xfs_buf_item_finish_stale(bip);
+               xfs_buf_relse(bp);
+               ASSERT(!hold);
+               return;
+       }
+
+       /*
+        * Dirty or clean, aborted items are done and need to be removed from
+        * the AIL and released. This frees the BLI, but leaves the buffer
+        * locked and referenced.
+        */
+       if (aborted || xlog_is_shutdown(lip->li_log)) {
+               ASSERT(list_empty(&bip->bli_buf->b_li_list));
+               xfs_buf_item_done(bp);
+               goto out_release;
+       }
+
+       /*
+        * Clean, unreferenced BLIs can be immediately freed, leaving the buffer
+        * locked and referenced.
+        *
+        * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback.
+        */
+       if (!dirty)
+               xfs_buf_item_relse(bip);
+       else
+               ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
+
+       /* Not safe to reference the BLI from here */
+out_release:
         /*
-        * Unref the item and unlock the buffer unless held or stale. Stale
-        * buffers remain locked until final unpin unless the bli is freed by
-        * the unref call. The latter implies shutdown because buffer
-        * invalidation dirties the bli and transaction.
+        * If we get here with a stale buffer, we must not unlock the
+        * buffer as the last BLI reference owns lock context, not us.
          */
-       released = xfs_buf_item_put(bip);
-       if (hold || (stale && !released))
+       if (stale || hold)
                 return;
-       ASSERT(!stale || aborted);
         xfs_buf_relse(bp);
  }
  
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h

index 50dd79b59cf5a3d2f4984c15df73d64542dd0bc4..416890b84f8cd215976424f93556169f9dba90cb 100644 (file)
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -49,7 +49,7 @@ struct xfs_buf_log_item {
  
  int    xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
  void   xfs_buf_item_done(struct xfs_buf *bp);
-bool   xfs_buf_item_put(struct xfs_buf_log_item *);
+void   xfs_buf_item_put(struct xfs_buf_log_item *bip);
  void   xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
  bool   xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
  void   xfs_buf_inode_iodone(struct xfs_buf *);
author	Dave Chinner <dchinner@redhat.com>
	Wed, 25 Jun 2025 22:49:00 +0000 (08:49 +1000)
committer	Carlos Maiolino <cem@kernel.org>
	Fri, 27 Jun 2025 12:14:37 +0000 (14:14 +0200)
fs/xfs/xfs_buf_item.c		patch \| blob \| blame \| history
fs/xfs/xfs_buf_item.h		patch \| blob \| blame \| history