]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_repair: rebuild the realtime rmap btree
authorDarrick J. Wong <djwong@kernel.org>
Mon, 24 Feb 2025 18:22:01 +0000 (10:22 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Tue, 25 Feb 2025 17:16:01 +0000 (09:16 -0800)
Rebuild the realtime rmap btree file from the reverse mapping records we
gathered from walking the inodes.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
libxfs/libxfs_api_defs.h
repair/Makefile
repair/bulkload.c
repair/bulkload.h
repair/phase6.c
repair/rmap.c
repair/rmap.h
repair/rtrmap_repair.c [new file with mode: 0644]
repair/xfs_repair.c

index b62efad757470bbe45c8ef63dbb92c1765a25450..193b1eeaa7537e8f211869c9c8a63c951b544578 100644 (file)
 #define xfs_rmap_irec_offset_unpack    libxfs_rmap_irec_offset_unpack
 #define xfs_rmap_lookup_le             libxfs_rmap_lookup_le
 #define xfs_rmap_lookup_le_range       libxfs_rmap_lookup_le_range
+#define xfs_rmap_map_extent            libxfs_rmap_map_extent
 #define xfs_rmap_map_raw               libxfs_rmap_map_raw
 #define xfs_rmap_query_all             libxfs_rmap_query_all
 #define xfs_rmap_query_range           libxfs_rmap_query_range
 #define xfs_rtginode_name              libxfs_rtginode_name
 #define xfs_rtsummary_create           libxfs_rtsummary_create
 
+#define xfs_rtginode_create            libxfs_rtginode_create
 #define xfs_rtginode_irele             libxfs_rtginode_irele
 #define xfs_rtginode_load              libxfs_rtginode_load
 #define xfs_rtginode_load_parent       libxfs_rtginode_load_parent
+#define xfs_rtginode_mkdir_parent      libxfs_rtginode_mkdir_parent
+#define xfs_rtginode_name              libxfs_rtginode_name
 #define xfs_rtgroup_alloc              libxfs_rtgroup_alloc
 #define xfs_rtgroup_extents            libxfs_rtgroup_extents
 #define xfs_rtgroup_grab               libxfs_rtgroup_grab
 #define xfs_rtgroup_get                        libxfs_rtgroup_get
 #define xfs_rtgroup_put                        libxfs_rtgroup_put
 #define xfs_rtrmapbt_calc_reserves     libxfs_rtrmapbt_calc_reserves
+#define xfs_rtrmapbt_calc_size         libxfs_rtrmapbt_calc_size
+#define xfs_rtrmapbt_commit_staged_btree       libxfs_rtrmapbt_commit_staged_btree
+#define xfs_rtrmapbt_create            libxfs_rtrmapbt_create
 #define xfs_rtrmapbt_droot_maxrecs     libxfs_rtrmapbt_droot_maxrecs
 #define xfs_rtrmapbt_maxlevels_ondisk  libxfs_rtrmapbt_maxlevels_ondisk
 #define xfs_rtrmapbt_init_cursor       libxfs_rtrmapbt_init_cursor
 #define xfs_rtrmapbt_maxrecs           libxfs_rtrmapbt_maxrecs
 #define xfs_rtrmapbt_mem_init          libxfs_rtrmapbt_mem_init
 #define xfs_rtrmapbt_mem_cursor                libxfs_rtrmapbt_mem_cursor
+#define xfs_rtrmapbt_stage_cursor      libxfs_rtrmapbt_stage_cursor
 
 #define xfs_sb_from_disk               libxfs_sb_from_disk
 #define xfs_sb_mount_rextsize          libxfs_sb_mount_rextsize
index a36a95e353a50497c91985821db3d36381d8c2a2..6f4ec3b3a9c4dc2000dab368647b702d3d05f252 100644 (file)
@@ -73,6 +73,7 @@ CFILES = \
        rcbag.c \
        rmap.c \
        rt.c \
+       rtrmap_repair.c \
        sb.c \
        scan.c \
        slab.c \
index aada5bbae579f8ce8c280ec0ed3c68e7b732bf17..a9e51de0a24c17bb1dc56bbe9efb92f80c179f59 100644 (file)
@@ -361,3 +361,44 @@ bulkload_estimate_ag_slack(
        if (bload->node_slack < 0)
                bload->node_slack = 2;
 }
+
+/*
+ * Estimate proper slack values for a btree that's being reloaded.
+ *
+ * Under most circumstances, we'll take whatever default loading value the
+ * btree bulk loading code calculates for us.  However, there are some
+ * exceptions to this rule:
+ *
+ * (1) If someone turned one of the debug knobs.
+ * (2) The FS has less than ~9% space free.
+ *
+ * Note that we actually use 3/32 for the comparison to avoid division.
+ */
+void
+bulkload_estimate_inode_slack(
+       struct xfs_mount        *mp,
+       struct xfs_btree_bload  *bload,
+       unsigned long long      free)
+{
+       /*
+        * The global values are set to -1 (i.e. take the bload defaults)
+        * unless someone has set them otherwise, so we just pull the values
+        * here.
+        */
+       bload->leaf_slack = bload_leaf_slack;
+       bload->node_slack = bload_node_slack;
+
+       /* No further changes if there's more than 3/32ths space left. */
+       if (free >= ((mp->m_sb.sb_dblocks * 3) >> 5))
+               return;
+
+       /*
+        * We're low on space; load the btrees as tightly as possible.  Leave
+        * a couple of open slots in each btree block so that we don't end up
+        * splitting the btrees like crazy right after mount.
+        */
+       if (bload->leaf_slack < 0)
+               bload->leaf_slack = 2;
+       if (bload->node_slack < 0)
+               bload->node_slack = 2;
+}
index a88aafaa678a3a629f9f5322b0669790ea3d2430..842121b15190e7eeff1ea4752f2f87f81c52af2b 100644 (file)
@@ -78,5 +78,7 @@ void bulkload_cancel(struct bulkload *bkl);
 int bulkload_commit(struct bulkload *bkl);
 void bulkload_estimate_ag_slack(struct repair_ctx *sc,
                struct xfs_btree_bload *bload, unsigned int free);
+void bulkload_estimate_inode_slack(struct xfs_mount *mp,
+               struct xfs_btree_bload *bload, unsigned long long free);
 
 #endif /* __XFS_REPAIR_BULKLOAD_H__ */
index 7d2e0554594265e5dabfbd35426d71bea86fcf59..cae9d9704818403f060f0458f514cb75046c9536 100644 (file)
@@ -21,6 +21,8 @@
 #include "repair/pptr.h"
 #include "repair/rt.h"
 #include "repair/quotacheck.h"
+#include "repair/slab.h"
+#include "repair/rmap.h"
 
 static xfs_ino_t               orphanage_ino;
 
@@ -685,6 +687,15 @@ ensure_rtgroup_summary(
        fill_rtsummary(rtg);
 }
 
+static void
+ensure_rtgroup_rmapbt(
+       struct xfs_rtgroup      *rtg,
+       xfs_filblks_t           est_fdblocks)
+{
+       if (ensure_rtgroup_file(rtg, XFS_RTGI_RMAP))
+               populate_rtgroup_rmapbt(rtg, est_fdblocks);
+}
+
 /* Initialize a root directory. */
 static int
 init_fs_root_dir(
@@ -3365,6 +3376,8 @@ reset_rt_metadir_inodes(
        struct xfs_mount        *mp)
 {
        struct xfs_rtgroup      *rtg = NULL;
+       xfs_filblks_t           metadata_blocks = 0;
+       xfs_filblks_t           est_fdblocks = 0;
        int                     error;
 
        /*
@@ -3386,6 +3399,13 @@ reset_rt_metadir_inodes(
                mark_ino_metadata(mp, mp->m_rtdirip->i_ino);
        }
 
+       /* Estimate how much free space will be left after building btrees */
+       while ((rtg = xfs_rtgroup_next(mp, rtg)))
+               metadata_blocks += estimate_rtrmapbt_blocks(rtg);
+
+       if (mp->m_sb.sb_fdblocks > metadata_blocks)
+               est_fdblocks = mp->m_sb.sb_fdblocks - metadata_blocks;
+
        /*
         * This isn't the whole story, but it keeps the message that we've had
         * for years and which is expected in xfstests and more.
@@ -3400,6 +3420,7 @@ _("        - resetting contents of realtime bitmap and summary inodes\n"));
        while ((rtg = xfs_rtgroup_next(mp, rtg))) {
                ensure_rtgroup_bitmap(rtg);
                ensure_rtgroup_summary(rtg);
+               ensure_rtgroup_rmapbt(rtg, est_fdblocks);
        }
 }
 
index a40851b4d0dc696cf06912d471e71e245423f1c4..85a65048db9afca9e48f4819495c947baecc79e0 100644 (file)
@@ -1940,3 +1940,29 @@ estimate_refcountbt_blocks(
        return libxfs_refcountbt_calc_size(mp,
                        slab_count(x->ar_refcount_items));
 }
+
+/* Estimate the size of the ondisk rtrmapbt from the incore tree. */
+xfs_filblks_t
+estimate_rtrmapbt_blocks(
+       struct xfs_rtgroup      *rtg)
+{
+       struct xfs_mount        *mp = rtg_mount(rtg);
+       struct xfs_ag_rmap      *x;
+       unsigned long long      nr_recs;
+
+       if (!rmap_needs_work(mp) || !xfs_has_rtrmapbt(mp))
+               return 0;
+
+       /*
+        * Overestimate the amount of space needed by pretending that every
+        * byte in the incore tree is used to store rtrmapbt records.  This
+        * means we can use SEEK_DATA/HOLE on the xfile, which is faster than
+        * walking the entire btree.
+        */
+       x = &rg_rmaps[rtg_rgno(rtg)];
+       if (!rmaps_has_observations(x))
+               return 0;
+
+       nr_recs = xmbuf_bytes(x->ar_xmbtp) / sizeof(struct xfs_rmap_rec);
+       return libxfs_rtrmapbt_calc_size(mp, nr_recs);
+}
index ebda561e59bc8f9ac61325faed16ce0f443f7865..23859bf6c2ad42a6e89ba1092993900dd2c2c03b 100644 (file)
@@ -60,5 +60,6 @@ int rmap_get_mem_rec(struct xfs_btree_cur *rmcur, struct xfs_rmap_irec *irec);
 
 void populate_rtgroup_rmapbt(struct xfs_rtgroup *rtg,
                xfs_filblks_t est_fdblocks);
+xfs_filblks_t estimate_rtrmapbt_blocks(struct xfs_rtgroup *rtg);
 
 #endif /* RMAP_H_ */
diff --git a/repair/rtrmap_repair.c b/repair/rtrmap_repair.c
new file mode 100644 (file)
index 0000000..2b07e89
--- /dev/null
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2019-2025 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include <libxfs.h>
+#include "btree.h"
+#include "err_protos.h"
+#include "libxlog.h"
+#include "incore.h"
+#include "globals.h"
+#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
+#include "bulkload.h"
+
+/* Ported routines from fs/xfs/scrub/rtrmap_repair.c */
+
+/*
+ * Realtime Reverse Mapping (RTRMAPBT) Repair
+ * ==========================================
+ *
+ * Gather all the rmap records for the inode and fork we're fixing, reset the
+ * incore fork, then recreate the btree.
+ */
+struct xrep_rtrmap {
+       struct xfs_btree_cur    *btree_cursor;
+
+       /* New fork. */
+       struct bulkload         new_fork_info;
+       struct xfs_btree_bload  rtrmap_bload;
+
+       struct repair_ctx       *sc;
+       struct xfs_rtgroup      *rtg;
+
+       /* Estimated free space after building all rt btrees */
+       xfs_filblks_t           est_fdblocks;
+};
+
+/* Retrieve rtrmapbt data for bulk load. */
+STATIC int
+xrep_rtrmap_get_records(
+       struct xfs_btree_cur    *cur,
+       unsigned int            idx,
+       struct xfs_btree_block  *block,
+       unsigned int            nr_wanted,
+       void                    *priv)
+{
+       struct xrep_rtrmap      *rr = priv;
+       union xfs_btree_rec     *block_rec;
+       unsigned int            loaded;
+       int                     ret;
+
+       for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
+               ret = rmap_get_mem_rec(rr->btree_cursor, &cur->bc_rec.r);
+               if (ret < 0)
+                       return ret;
+               if (ret == 0)
+                       do_error(
+ _("ran out of records while rebuilding rt rmap btree\n"));
+
+               block_rec = libxfs_btree_rec_addr(cur, idx, block);
+               cur->bc_ops->init_rec_from_cur(cur, block_rec);
+       }
+
+       return loaded;
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_rtrmap_claim_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       void                    *priv)
+{
+       struct xrep_rtrmap      *rr = priv;
+
+       return bulkload_claim_block(cur, &rr->new_fork_info, ptr);
+}
+
+/* Figure out how much space we need to create the incore btree root block. */
+STATIC size_t
+xrep_rtrmap_iroot_size(
+       struct xfs_btree_cur    *cur,
+       unsigned int            level,
+       unsigned int            nr_this_level,
+       void                    *priv)
+{
+       return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
+}
+
+/* Reserve new btree blocks and bulk load all the rtrmap records. */
+STATIC int
+xrep_rtrmap_btree_load(
+       struct xrep_rtrmap      *rr,
+       struct xfs_btree_cur    *rtrmap_cur)
+{
+       struct repair_ctx       *sc = rr->sc;
+       int                     error;
+
+       rr->rtrmap_bload.get_records = xrep_rtrmap_get_records;
+       rr->rtrmap_bload.claim_block = xrep_rtrmap_claim_block;
+       rr->rtrmap_bload.iroot_size = xrep_rtrmap_iroot_size;
+       bulkload_estimate_inode_slack(sc->mp, &rr->rtrmap_bload,
+                       rr->est_fdblocks);
+
+       /* Compute how many blocks we'll need. */
+       error = -libxfs_btree_bload_compute_geometry(rtrmap_cur,
+                       &rr->rtrmap_bload,
+                       rmap_record_count(sc->mp, true, rtg_rgno(rr->rtg)));
+       if (error)
+               return error;
+
+       /*
+        * Guess how many blocks we're going to need to rebuild an entire rtrmap
+        * from the number of extents we found, and pump up our transaction to
+        * have sufficient block reservation.
+        */
+       error = -libxfs_trans_reserve_more(sc->tp, rr->rtrmap_bload.nr_blocks,
+                       0);
+       if (error)
+               return error;
+
+       /*
+        * Reserve the space we'll need for the new btree.  Drop the cursor
+        * while we do this because that can roll the transaction and cursors
+        * can't handle that.
+        */
+       error = bulkload_alloc_file_blocks(&rr->new_fork_info,
+                       rr->rtrmap_bload.nr_blocks);
+       if (error)
+               return error;
+
+       /* Add all observed rtrmap records. */
+       error = rmap_init_mem_cursor(rr->sc->mp, sc->tp, true,
+                       rtg_rgno(rr->rtg), &rr->btree_cursor);
+       if (error)
+               return error;
+       error = -libxfs_btree_bload(rtrmap_cur, &rr->rtrmap_bload, rr);
+       libxfs_btree_del_cursor(rr->btree_cursor, error);
+       return error;
+}
+
+/* Update the inode counters. */
+STATIC int
+xrep_rtrmap_reset_counters(
+       struct xrep_rtrmap      *rr)
+{
+       struct repair_ctx       *sc = rr->sc;
+
+       /*
+        * Update the inode block counts to reflect the btree we just
+        * generated.
+        */
+       sc->ip->i_nblocks = rr->new_fork_info.ifake.if_blocks;
+       libxfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+
+       /* Quotas don't exist so we're done. */
+       return 0;
+}
+
+/*
+ * Use the collected rmap information to stage a new rt rmap btree.  If this is
+ * successful we'll return with the new btree root information logged to the
+ * repair transaction but not yet committed.
+ */
+static int
+xrep_rtrmap_build_new_tree(
+       struct xrep_rtrmap      *rr)
+{
+       struct xfs_owner_info   oinfo;
+       struct xfs_btree_cur    *cur;
+       struct repair_ctx       *sc = rr->sc;
+       struct xbtree_ifakeroot *ifake = &rr->new_fork_info.ifake;
+       int                     error;
+
+       /*
+        * Prepare to construct the new fork by initializing the new btree
+        * structure and creating a fake ifork in the ifakeroot structure.
+        */
+       libxfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
+       bulkload_init_inode(&rr->new_fork_info, sc, XFS_DATA_FORK, &oinfo);
+       cur = libxfs_rtrmapbt_init_cursor(NULL, rr->rtg);
+       libxfs_btree_stage_ifakeroot(cur, ifake);
+
+       /*
+        * Figure out the size and format of the new fork, then fill it with
+        * all the rtrmap records we've found.  Join the inode to the
+        * transaction so that we can roll the transaction while holding the
+        * inode locked.
+        */
+       libxfs_trans_ijoin(sc->tp, sc->ip, 0);
+       ifake->if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
+       error = xrep_rtrmap_btree_load(rr, cur);
+       if (error)
+               goto err_cur;
+
+       /*
+        * Install the new fork in the inode.  After this point the old mapping
+        * data are no longer accessible and the new tree is live.  We delete
+        * the cursor immediately after committing the staged root because the
+        * staged fork might be in extents format.
+        */
+       libxfs_rtrmapbt_commit_staged_btree(cur, sc->tp);
+       libxfs_btree_del_cursor(cur, 0);
+
+       /* Reset the inode counters now that we've changed the fork. */
+       error = xrep_rtrmap_reset_counters(rr);
+       if (error)
+               goto err_newbt;
+
+       /* Dispose of any unused blocks and the accounting infomation. */
+       error = bulkload_commit(&rr->new_fork_info);
+       if (error)
+               return error;
+
+       return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+err_cur:
+       if (cur)
+               libxfs_btree_del_cursor(cur, error);
+err_newbt:
+       bulkload_cancel(&rr->new_fork_info);
+       return error;
+}
+
+/* Store the realtime reverse-mappings in the rtrmapbt. */
+void
+populate_rtgroup_rmapbt(
+       struct xfs_rtgroup      *rtg,
+       xfs_filblks_t           est_fdblocks)
+{
+       struct xfs_mount        *mp = rtg_mount(rtg);
+       struct xfs_inode        *ip = rtg_rmap(rtg);
+       struct repair_ctx       sc = {
+               .mp             = mp,
+               .ip             = ip,
+       };
+       struct xrep_rtrmap      rr = {
+               .sc             = &sc,
+               .rtg            = rtg,
+               .est_fdblocks   = est_fdblocks,
+       };
+       int                     error;
+
+       if (!xfs_has_rtrmapbt(mp))
+               return;
+
+       error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
+                       &sc.tp);
+       if (error)
+               goto out;
+
+       error = xrep_rtrmap_build_new_tree(&rr);
+       if (error) {
+               libxfs_trans_cancel(sc.tp);
+               goto out;
+       }
+
+       error = -libxfs_trans_commit(sc.tp);
+out:
+       if (error)
+               do_error(
+ _("rtgroup %u rmap btree could not be rebuilt, error %d\n"),
+                       rtg_rgno(rtg), error);
+}
index 9509f04685c870d0ee3662c8a2fe217f9fe6dfe8..eeaaf643468941ae677d5001d131b89f1ab26bac 100644 (file)
@@ -1385,15 +1385,19 @@ main(int argc, char **argv)
        rcbagbt_destroy_cur_cache();
 
        /*
-        * Done with the block usage maps, toss them...
+        * Done with the block usage maps, toss them.  Realtime metadata aren't
+        * rebuilt until phase 6, so we have to keep them around.
         */
-       rmaps_free(mp);
+       if (mp->m_sb.sb_rblocks == 0)
+               rmaps_free(mp);
        free_bmaps(mp);
 
        if (!bad_ino_btree)  {
                phase6(mp);
                phase_end(mp, 6);
 
+               if (mp->m_sb.sb_rblocks != 0)
+                       rmaps_free(mp);
                free_rtgroup_inodes();
 
                phase7(mp, phase2_threads);