]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_repair: check for global free space concerns with default btree slack levels
authorDarrick J. Wong <djwong@kernel.org>
Mon, 24 Feb 2025 18:22:01 +0000 (10:22 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Tue, 25 Feb 2025 17:16:01 +0000 (09:16 -0800)
It's possible that before repair was started, the filesystem might have
been nearly full, and its metadata btree blocks could all have been
nearly full.  If we then rebuild the btrees with blocks that are only
75% full, that expansion might be enough to run out of free space.  The
solution to this is to pack the new blocks completely full if we fear
running out of space.

Previously, we only had to check and decide that on a per-AG basis.
However, now that XFS can have filesystems with metadata btrees rooted
in inodes, we have a global free space concern because there might be
enough space in each AG to regenerate the AG btrees at 75%, but that
might not leave enough space to regenerate the inode btrees, even if we
fill those blocks to 100%.

Hence we need to precompute the worst case space usage for all btrees in
the filesystem and compare /that/ against the global free space to
decide if we're going to pack the btrees maximally to conserve space.
That decision can override the per-AG determination.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
repair/globals.c
repair/globals.h
repair/phase5.c
repair/phase6.c

index 99291d6afd61b95fd57774e20aa4a883aec28b1f..143b4a8beb53f4e541711e6d1147652bcdc691b7 100644 (file)
@@ -114,6 +114,12 @@ int                thread_count;
 /* If nonzero, simulate failure after this phase. */
 int            fail_after_phase;
 
+/*
+ * Do we think we're going to be so low on disk space that we need to pack
+ * all rebuilt btree blocks completely full to avoid running out of space?
+ */
+bool           need_packed_btrees;
+
 /* quota inode numbers */
 enum quotino_state {
        QI_STATE_UNKNOWN,
index b23a06af6cc81bc715281b87eb25d2b405973c2f..8bb9bbaeca4fb01192e678dcbe238d5f755cecbf 100644 (file)
@@ -159,6 +159,8 @@ extern int          fail_after_phase;
 
 extern struct libxfs_init x;
 
+extern bool            need_packed_btrees;
+
 void set_quota_inode(xfs_dqtype_t type, xfs_ino_t);
 void lose_quota_inode(xfs_dqtype_t type);
 void clear_quota_inode(xfs_dqtype_t type);
index ac5f04697b7110041a536ec459c3244e37db80ca..cacaf74dda3a601000c2b68eee9f8e6494fd5bb7 100644 (file)
@@ -481,11 +481,14 @@ _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
 
        /*
         * Estimate the number of free blocks in this AG after rebuilding
-        * all btrees.
+        * all btrees, unless we already decided that we need to pack all
+        * btree blocks maximally.
         */
-       total_btblocks = estimate_agbtree_blocks(pag, num_extents);
-       if (num_freeblocks > total_btblocks)
-               est_agfreeblocks = num_freeblocks - total_btblocks;
+       if (!need_packed_btrees) {
+               total_btblocks = estimate_agbtree_blocks(pag, num_extents);
+               if (num_freeblocks > total_btblocks)
+                       est_agfreeblocks = num_freeblocks - total_btblocks;
+       }
 
        init_ino_cursors(&sc, pag, est_agfreeblocks, &sb_icount_ag[agno],
                        &sb_ifree_ag[agno], &btr_ino, &btr_fino);
@@ -632,6 +635,107 @@ check_rtmetadata(
        check_rtsummary(mp);
 }
 
+/*
+ * Estimate the amount of free space used by the perag metadata without
+ * building the incore tree.  This is only necessary if realtime btrees are
+ * enabled.
+ */
+static xfs_extlen_t
+estimate_agbtree_blocks_early(
+       struct xfs_perag        *pag,
+       unsigned int            *num_freeblocks)
+{
+       struct xfs_mount        *mp = pag_mount(pag);
+       xfs_agblock_t           agbno;
+       xfs_agblock_t           ag_end;
+       xfs_extlen_t            extent_len;
+       xfs_extlen_t            blen;
+       unsigned int            num_extents = 0;
+       int                     bstate;
+       bool                    in_extent = false;
+
+       /* Find the number of free space extents. */
+       ag_end = libxfs_ag_block_count(mp, pag_agno(pag));
+       for (agbno = 0; agbno < ag_end; agbno += blen) {
+               bstate = get_bmap_ext(pag_agno(pag), agbno, ag_end, &blen,
+                               false);
+               if (bstate < XR_E_INUSE)  {
+                       if (!in_extent) {
+                               /*
+                                * found the start of a free extent
+                                */
+                               in_extent = true;
+                               num_extents++;
+                               extent_len = blen;
+                       } else {
+                               extent_len += blen;
+                       }
+               } else {
+                       if (in_extent)  {
+                               /*
+                                * free extent ends here
+                                */
+                               in_extent = false;
+                               *num_freeblocks += extent_len;
+                       }
+               }
+       }
+       if (in_extent)
+               *num_freeblocks += extent_len;
+
+       return estimate_agbtree_blocks(pag, num_extents);
+}
+
+/*
+ * Decide if we need to pack every new btree block completely full to conserve
+ * disk space.  Normally we rebuild btree blocks to be 75% full, but we don't
+ * want to start rebuilding AG btrees that way only to discover that there
+ * isn't enough space left in the data volume to rebuild inode-based btrees.
+ */
+static bool
+are_packed_btrees_needed(
+       struct xfs_mount        *mp)
+{
+       struct xfs_perag        *pag = NULL;
+       struct xfs_rtgroup      *rtg = NULL;
+       unsigned long long      metadata_blocks = 0;
+       unsigned long long      fdblocks = 0;
+
+       /*
+        * If we don't have inode-based metadata, we can let the AG btrees
+        * pack as needed; there are no global space concerns here.
+        */
+       if (!xfs_has_rtrmapbt(mp))
+               return false;
+
+       while ((pag = xfs_perag_next(mp, pag))) {
+               unsigned int    ag_fdblocks = 0;
+
+               metadata_blocks += estimate_agbtree_blocks_early(pag,
+                                                                &ag_fdblocks);
+               fdblocks += ag_fdblocks;
+       }
+
+       while ((rtg = xfs_rtgroup_next(mp, rtg)))
+               metadata_blocks += estimate_rtrmapbt_blocks(rtg);
+
+       /*
+        * If we think we'll have more metadata blocks than free space, then
+        * pack the btree blocks.
+        */
+       if (metadata_blocks > fdblocks)
+               return true;
+
+       /*
+        * If the amount of free space after building btrees is less than 9%
+        * of the data volume, pack the btree blocks.
+        */
+       fdblocks -= metadata_blocks;
+       if (fdblocks < ((mp->m_sb.sb_dblocks * 3) >> 5))
+               return true;
+       return false;
+}
+
 void
 phase5(xfs_mount_t *mp)
 {
@@ -683,6 +787,8 @@ phase5(xfs_mount_t *mp)
        if (error)
                do_error(_("cannot alloc lost block bitmap\n"));
 
+       need_packed_btrees = are_packed_btrees_needed(mp);
+
        while ((pag = xfs_perag_next(mp, pag)))
                phase5_func(mp, pag, lost_blocks);
 
index cae9d9704818403f060f0458f514cb75046c9536..2ddfd0526767e0fe830640b2ff5e8996233d7ed9 100644 (file)
@@ -3399,12 +3399,18 @@ reset_rt_metadir_inodes(
                mark_ino_metadata(mp, mp->m_rtdirip->i_ino);
        }
 
-       /* Estimate how much free space will be left after building btrees */
-       while ((rtg = xfs_rtgroup_next(mp, rtg)))
-               metadata_blocks += estimate_rtrmapbt_blocks(rtg);
+       /*
+        * Estimate how much free space will be left after building btrees
+        * unless we already decided that we needed to pack all new blocks
+        * maximally.
+        */
+       if (!need_packed_btrees) {
+               while ((rtg = xfs_rtgroup_next(mp, rtg)))
+                       metadata_blocks += estimate_rtrmapbt_blocks(rtg);
 
-       if (mp->m_sb.sb_fdblocks > metadata_blocks)
-               est_fdblocks = mp->m_sb.sb_fdblocks - metadata_blocks;
+               if (mp->m_sb.sb_fdblocks > metadata_blocks)
+                       est_fdblocks = mp->m_sb.sb_fdblocks - metadata_blocks;
+       }
 
        /*
         * This isn't the whole story, but it keeps the message that we've had