xfs: catch stale AGF/AGF metadata

author Dave Chinner <dchinner@redhat.com>

Wed, 25 Jun 2025 22:48:55 +0000 (08:48 +1000)

committer Carlos Maiolino <cem@kernel.org>

Fri, 27 Jun 2025 12:13:34 +0000 (14:13 +0200)
author Dave Chinner <dchinner@redhat.com>
Wed, 25 Jun 2025 22:48:55 +0000 (08:48 +1000)
committer Carlos Maiolino <cem@kernel.org>
Fri, 27 Jun 2025 12:13:34 +0000 (14:13 +0200)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c

index 7839efe050bfa056d35933ec5c5f8c871bdc7b21..000cc7f4a3ce5085ab73fef86fb202ebafe59174 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3444,16 +3444,41 @@ xfs_alloc_read_agf(
  
                 set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
         }
+
  #ifdef DEBUG
-       else if (!xfs_is_shutdown(mp)) {
-               ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
-               ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
-               ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
-               ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
-               ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level));
-               ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level));
+       /*
+        * It's possible for the AGF to be out of sync if the block device is
+        * silently dropping writes. This can happen in fstests with dmflakey
+        * enabled, which allows the buffer to be cleaned and reclaimed by
+        * memory pressure and then re-read from disk here. We will get a
+        * stale version of the AGF from disk, and nothing good can happen from
+        * here. Hence if we detect this situation, immediately shut down the
+        * filesystem.
+        *
+        * This can also happen if we are already in the middle of a forced
+        * shutdown, so don't bother checking if we are already shut down.
+        */
+       if (!xfs_is_shutdown(pag_mount(pag))) {
+               bool    ok = true;
+
+               ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
+               ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
+               ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks);
+               ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount);
+               ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest);
+               ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level);
+               ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level);
+
+               if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+                       xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
+                       xfs_trans_brelse(tp, agfbp);
+                       xfs_force_shutdown(pag_mount(pag),
+                                       SHUTDOWN_CORRUPT_ONDISK);
+                       return -EFSCORRUPTED;
+               }
         }
-#endif
+#endif /* DEBUG */
+
         if (agfbpp)
                 *agfbpp = agfbp;
         else
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index 0c47b5c6ca7d99c4067ceb8ba22f2b4ddc89ad0f..750111634d9f7b82b953f606e13aa7169b0ce7f6 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi(
                 set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
         }
  
+#ifdef DEBUG
         /*
-        * It's possible for these to be out of sync if
-        * we are in the middle of a forced shutdown.
+        * It's possible for the AGF to be out of sync if the block device is
+        * silently dropping writes. This can happen in fstests with dmflakey
+        * enabled, which allows the buffer to be cleaned and reclaimed by
+        * memory pressure and then re-read from disk here. We will get a
+        * stale version of the AGF from disk, and nothing good can happen from
+        * here. Hence if we detect this situation, immediately shut down the
+        * filesystem.
+        *
+        * This can also happen if we are already in the middle of a forced
+        * shutdown, so don't bother checking if we are already shut down.
          */
-       ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
-               xfs_is_shutdown(pag_mount(pag)));
+       if (!xfs_is_shutdown(pag_mount(pag))) {
+               bool    ok = true;
+
+               ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount);
+               ok &= pag->pagi_count == be32_to_cpu(agi->agi_count);
+
+               if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+                       xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+                       xfs_trans_brelse(tp, agibp);
+                       xfs_force_shutdown(pag_mount(pag),
+                                       SHUTDOWN_CORRUPT_ONDISK);
+                       return -EFSCORRUPTED;
+               }
+       }
+#endif /* DEBUG */
+
         if (agibpp)
                 *agibpp = agibp;
         else
author	Dave Chinner <dchinner@redhat.com>
	Wed, 25 Jun 2025 22:48:55 +0000 (08:48 +1000)
committer	Carlos Maiolino <cem@kernel.org>
	Fri, 27 Jun 2025 12:13:34 +0000 (14:13 +0200)
fs/xfs/libxfs/xfs_alloc.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| blame \| history