]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_repair: convert regular rmap repair to use in-memory btrees
authorDarrick J. Wong <djwong@kernel.org>
Mon, 22 Apr 2024 17:01:17 +0000 (10:01 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Mon, 3 Jun 2024 18:37:42 +0000 (11:37 -0700)
Convert the rmap btree repair code to use in-memory rmap btrees to store
the observed reverse mapping records.  This will eliminate the need for
a separate record sorting step, as well as eliminate the need for all
the code that turns multiple consecutive bmap records into a single rmap
record.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
include/libxfs.h
libxfs/buf_mem.h
libxfs/libxfs_api_defs.h
repair/agbtree.c
repair/agbtree.h
repair/phase5.c
repair/rmap.c
repair/rmap.h
repair/xfs_repair.c

index 79df8bc7c138769cbe7d89186f94a0ef110ddbca..fb8efb696220a8431e24281fb02e23c1b4f2fcfe 100644 (file)
@@ -87,6 +87,9 @@ struct iomap;
 #include "xfs_btree_staging.h"
 #include "xfs_rtbitmap.h"
 #include "xfs_symlink_remote.h"
+#include "libxfs/xfile.h"
+#include "libxfs/buf_mem.h"
+#include "xfs_btree_mem.h"
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
index 3829dd00d7e9859ac249bad848818beb9dbf2759..f19bc6fd700b9ad4c96d4e4e8d5f43e0e149fda3 100644 (file)
@@ -27,4 +27,9 @@ bool xmbuf_verify_daddr(struct xfs_buftarg *btp, xfs_daddr_t daddr);
 void xmbuf_trans_bdetach(struct xfs_trans *tp, struct xfs_buf *bp);
 int xmbuf_finalize(struct xfs_buf *bp);
 
+static inline unsigned long long xmbuf_bytes(struct xfs_buftarg *btp)
+{
+       return xfile_bytes(btp->bt_xfile);
+}
+
 #endif /* __XFS_BUF_MEM_H__ */
index de37d3050c7e389c2b3d03806ccc8777e96ce203..74bf15172a6132624f2c0975b2f2b35064f9a79d 100644 (file)
 #define xfs_btree_bload                        libxfs_btree_bload
 #define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry
 #define xfs_btree_del_cursor           libxfs_btree_del_cursor
+#define xfs_btree_get_block            libxfs_btree_get_block
+#define xfs_btree_goto_left_edge       libxfs_btree_goto_left_edge
+#define xfs_btree_increment            libxfs_btree_increment
 #define xfs_btree_init_block           libxfs_btree_init_block
+#define xfs_btree_mem_head_read_buf    libxfs_btree_mem_head_read_buf
 #define xfs_btree_rec_addr             libxfs_btree_rec_addr
 #define xfs_btree_stage_afakeroot      libxfs_btree_stage_afakeroot
 #define xfs_btree_stage_ifakeroot      libxfs_btree_stage_ifakeroot
+#define xfs_btree_visit_blocks         libxfs_btree_visit_blocks
 #define xfs_buf_delwri_submit          libxfs_buf_delwri_submit
 #define xfs_buf_get                    libxfs_buf_get
 #define xfs_buf_get_uncached           libxfs_buf_get_uncached
 #define xfs_rmapbt_init_cursor         libxfs_rmapbt_init_cursor
 #define xfs_rmapbt_maxlevels_ondisk    libxfs_rmapbt_maxlevels_ondisk
 #define xfs_rmapbt_maxrecs             libxfs_rmapbt_maxrecs
+#define xfs_rmapbt_mem_init            libxfs_rmapbt_mem_init
+#define xfs_rmapbt_mem_cursor          libxfs_rmapbt_mem_cursor
 #define xfs_rmapbt_stage_cursor                libxfs_rmapbt_stage_cursor
 #define xfs_rmap_compare               libxfs_rmap_compare
 #define xfs_rmap_get_rec               libxfs_rmap_get_rec
 #define xfs_rmap_irec_offset_unpack    libxfs_rmap_irec_offset_unpack
 #define xfs_rmap_lookup_le             libxfs_rmap_lookup_le
 #define xfs_rmap_lookup_le_range       libxfs_rmap_lookup_le_range
+#define xfs_rmap_map_raw               libxfs_rmap_map_raw
 #define xfs_rmap_query_all             libxfs_rmap_query_all
 #define xfs_rmap_query_range           libxfs_rmap_query_range
 
 
 #define xfs_validate_stripe_geometry   libxfs_validate_stripe_geometry
 #define xfs_verify_agbno               libxfs_verify_agbno
+#define xfs_verify_agbext              libxfs_verify_agbext
 #define xfs_verify_agino               libxfs_verify_agino
 #define xfs_verify_cksum               libxfs_verify_cksum
 #define xfs_verify_dir_ino             libxfs_verify_dir_ino
index 1a3e40cca68cc0e4e72184988f270e40b737af7b..c8f75f49e6b36359efbf53710ac9b45cbe695a07 100644 (file)
@@ -104,7 +104,8 @@ reserve_agblocks(
                        do_error(_("could not set up btree reservation: %s\n"),
                                strerror(-error));
 
-               error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
+               error = rmap_add_agbtree_mapping(mp, agno,
+                               ext_ptr->ex_startblock, len,
                                btr->newbt.oinfo.oi_owner);
                if (error)
                        do_error(_("could not set up btree rmaps: %s\n"),
@@ -602,14 +603,19 @@ get_rmapbt_records(
        unsigned int                    nr_wanted,
        void                            *priv)
 {
-       struct xfs_rmap_irec            *rec;
        struct bt_rebuild               *btr = priv;
        union xfs_btree_rec             *block_rec;
        unsigned int                    loaded;
+       int                             ret;
 
        for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
-               rec = pop_slab_cursor(btr->slab_cursor);
-               memcpy(&cur->bc_rec.r, rec, sizeof(struct xfs_rmap_irec));
+               ret = rmap_get_mem_rec(btr->rmapbt_cursor, &cur->bc_rec.r);
+               if (ret < 0)
+                       return ret;
+               if (ret == 0)
+                       do_error(
+ _("ran out of records while rebuilding AG %u rmap btree\n"),
+                                       cur->bc_ag.pag->pag_agno);
 
                block_rec = libxfs_btree_rec_addr(cur, idx, block);
                cur->bc_ops->init_rec_from_cur(cur, block_rec);
@@ -658,7 +664,7 @@ build_rmap_tree(
 {
        int                     error;
 
-       error = rmap_init_cursor(agno, &btr->slab_cursor);
+       error = rmap_init_mem_cursor(sc->mp, NULL, agno, &btr->rmapbt_cursor);
        if (error)
                do_error(
 _("Insufficient memory to construct rmap cursor.\n"));
@@ -671,7 +677,7 @@ _("Error %d while creating rmap btree for AG %u.\n"), error, agno);
 
        /* Since we're not writing the AGF yet, no need to commit the cursor */
        libxfs_btree_del_cursor(btr->cur, 0);
-       free_slab_cursor(&btr->slab_cursor);
+       libxfs_btree_del_cursor(btr->rmapbt_cursor, 0);
 }
 
 /* rebuild the refcount tree */
index 714d8e68716398d43c317523bec8d4d38d833c57..6d2c401a657d7825c3ba307b8a41d50bc3b1cbcf 100644 (file)
@@ -20,6 +20,7 @@ struct bt_rebuild {
        /* Tree-specific data. */
        union {
                struct xfs_slab_cursor  *slab_cursor;
+               struct xfs_btree_cur    *rmapbt_cursor;
                struct {
                        struct extent_tree_node *bno_rec;
                        unsigned int            freeblks;
index b689a4234143d461a23bc3b33df8c32942d0706a..52666ad882331237a64c5f148711e2c576614474 100644 (file)
@@ -712,7 +712,7 @@ phase5(xfs_mount_t *mp)
         * the superblock counters.
         */
        for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
-               error = rmap_store_ag_btree_rec(mp, agno);
+               error = rmap_commit_agbtree_mappings(mp, agno);
                if (error)
                        do_error(
 _("unable to add AG %u reverse-mapping data to btree.\n"), agno);
index 032bf494250ad8fae479e655e356c87264a198ee..c1ae7da1e7265fca4596ec34685671a70ab02b02 100644 (file)
@@ -13,6 +13,7 @@
 #include "slab.h"
 #include "rmap.h"
 #include "libfrog/bitmap.h"
+#include "libfrog/platform.h"
 
 #undef RMAP_DEBUG
 
 
 /* per-AG rmap object anchor */
 struct xfs_ag_rmap {
-       struct xfs_slab *ar_rmaps;              /* rmap observations, p4 */
-       struct xfs_slab *ar_raw_rmaps;          /* unmerged rmaps */
-       int             ar_flcount;             /* agfl entries from leftover */
-                                               /* agbt allocations */
-       struct xfs_rmap_irec    ar_last_rmap;   /* last rmap seen */
-       struct xfs_slab *ar_refcount_items;     /* refcount items, p4-5 */
+       /* root of rmap observations btree */
+       struct xfbtree          ar_xfbtree;
+       /* rmap buffer target for btree */
+       struct xfs_buftarg      *ar_xmbtp;
+
+       /* rmap observations, p4 */
+       struct xfs_slab         *ar_rmaps;
+
+       /* unmerged rmaps */
+       struct xfs_slab         *ar_raw_rmaps;
+
+       /* agfl entries from leftover agbt allocations */
+       int                     ar_flcount;
+
+       /* last rmap seen */
+       struct xfs_rmap_irec    ar_last_rmap;
+
+       /* refcount items, p4-5 */
+       struct xfs_slab         *ar_refcount_items;
 };
 
 static struct xfs_ag_rmap *ag_rmaps;
@@ -53,6 +67,61 @@ rmap_needs_work(
               xfs_has_rmapbt(mp);
 }
 
+static inline bool rmaps_has_observations(const struct xfs_ag_rmap *ag_rmap)
+{
+       return ag_rmap->ar_xfbtree.target;
+}
+
+/* Destroy an in-memory rmap btree. */
+STATIC void
+rmaps_destroy(
+       struct xfs_mount        *mp,
+       struct xfs_ag_rmap      *ag_rmap)
+{
+       free_slab(&ag_rmap->ar_refcount_items);
+
+       if (!rmaps_has_observations(ag_rmap))
+               return;
+
+       xfbtree_destroy(&ag_rmap->ar_xfbtree);
+       xmbuf_free(ag_rmap->ar_xmbtp);
+}
+
+/* Initialize the in-memory rmap btree for collecting per-AG rmap records. */
+STATIC void
+rmaps_init_ag(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       struct xfs_ag_rmap      *ag_rmap)
+{
+       char                    *descr;
+       unsigned long long      maxbytes;
+       int                     error;
+
+       maxbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_agblocks);
+       descr = kasprintf(GFP_KERNEL, "xfs_repair (%s): AG %u rmap records",
+                       mp->m_fsname, agno);
+       error = -xmbuf_alloc(mp, descr, maxbytes, &ag_rmap->ar_xmbtp);
+       kfree(descr);
+       if (error)
+               goto nomem;
+
+       error = -libxfs_rmapbt_mem_init(mp, &ag_rmap->ar_xfbtree,
+                       ag_rmap->ar_xmbtp, agno);
+       if (error)
+               goto nomem;
+
+       error = init_slab(&ag_rmap->ar_refcount_items,
+                         sizeof(struct xfs_refcount_irec));
+       if (error)
+               goto nomem;
+
+       return;
+nomem:
+       do_error(
+_("Insufficient memory while allocating realtime reverse mapping btree."));
+}
+
 /*
  * Initialize per-AG reverse map data.
  */
@@ -71,6 +140,8 @@ rmaps_init(
                do_error(_("couldn't allocate per-AG reverse map roots\n"));
 
        for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+               rmaps_init_ag(mp, i, &ag_rmaps[i]);
+
                error = init_slab(&ag_rmaps[i].ar_rmaps,
                                sizeof(struct xfs_rmap_irec));
                if (error)
@@ -82,11 +153,6 @@ _("Insufficient memory while allocating reverse mapping slabs."));
                        do_error(
 _("Insufficient memory while allocating raw metadata reverse mapping slabs."));
                ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
-               error = init_slab(&ag_rmaps[i].ar_refcount_items,
-                                 sizeof(struct xfs_refcount_irec));
-               if (error)
-                       do_error(
-_("Insufficient memory while allocating refcount item slabs."));
        }
 }
 
@@ -105,7 +171,7 @@ rmaps_free(
        for (i = 0; i < mp->m_sb.sb_agcount; i++) {
                free_slab(&ag_rmaps[i].ar_rmaps);
                free_slab(&ag_rmaps[i].ar_raw_rmaps);
-               free_slab(&ag_rmaps[i].ar_refcount_items);
+               rmaps_destroy(mp, &ag_rmaps[i]);
        }
        free(ag_rmaps);
        ag_rmaps = NULL;
@@ -136,6 +202,87 @@ rmaps_are_mergeable(
        return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
 }
 
+int
+rmap_init_mem_cursor(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_agnumber_t          agno,
+       struct xfs_btree_cur    **rmcurp)
+{
+       struct xfbtree          *xfbt;
+       struct xfs_perag        *pag;
+       int                     error;
+
+       xfbt = &ag_rmaps[agno].ar_xfbtree;
+       pag = libxfs_perag_get(mp, agno);
+       *rmcurp = libxfs_rmapbt_mem_cursor(pag, tp, xfbt);
+
+       error = -libxfs_btree_goto_left_edge(*rmcurp);
+       if (error)
+               libxfs_btree_del_cursor(*rmcurp, error);
+
+       libxfs_perag_put(pag);
+       return error;
+}
+
+/*
+ * Retrieve the next record from the in-memory rmap btree.  Returns 1 if irec
+ * has been filled out, 0 if there aren't any more records, or a negative errno
+ * value if an error happened.
+ */
+int
+rmap_get_mem_rec(
+       struct xfs_btree_cur    *rmcur,
+       struct xfs_rmap_irec    *irec)
+{
+       int                     stat = 0;
+       int                     error;
+
+       error = -libxfs_btree_increment(rmcur, 0, &stat);
+       if (error)
+               return -error;
+       if (!stat)
+               return 0;
+
+       error = -libxfs_rmap_get_rec(rmcur, irec, &stat);
+       if (error)
+               return -error;
+
+       return stat;
+}
+
+static void
+rmap_add_mem_rec(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       struct xfs_rmap_irec    *rmap)
+{
+       struct xfs_btree_cur    *rmcur;
+       struct xfbtree          *xfbt;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       xfbt = &ag_rmaps[agno].ar_xfbtree;
+       error = -libxfs_trans_alloc_empty(mp, &tp);
+       if (error)
+               do_error(_("allocating tx for in-memory rmap update\n"));
+
+       error = rmap_init_mem_cursor(mp, tp, agno, &rmcur);
+       if (error)
+               do_error(_("reading in-memory rmap btree head\n"));
+
+       error = -libxfs_rmap_map_raw(rmcur, rmap);
+       if (error)
+               do_error(_("adding rmap to in-memory btree, err %d\n"), error);
+       libxfs_btree_del_cursor(rmcur, 0);
+
+       error = xfbtree_trans_commit(xfbt, tp);
+       if (error)
+               do_error(_("committing in-memory rmap record\n"));
+
+       libxfs_trans_cancel(tp);
+}
+
 /*
  * Add an observation about a block mapping in an inode's data or attribute
  * fork for later btree reconstruction.
@@ -173,6 +320,9 @@ rmap_add_rec(
        rmap.rm_blockcount = irec->br_blockcount;
        if (irec->br_state == XFS_EXT_UNWRITTEN)
                rmap.rm_flags |= XFS_RMAP_UNWRITTEN;
+
+       rmap_add_mem_rec(mp, agno, &rmap);
+
        last_rmap = &ag_rmaps[agno].ar_last_rmap;
        if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN)
                *last_rmap = rmap;
@@ -223,6 +373,8 @@ __rmap_add_raw_rec(
                rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK;
        rmap.rm_startblock = agbno;
        rmap.rm_blockcount = len;
+
+       rmap_add_mem_rec(mp, agno, &rmap);
        return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
 }
 
@@ -273,6 +425,36 @@ rmap_add_ag_rec(
        return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false);
 }
 
+/*
+ * Add a reverse mapping for a per-AG btree extent.  These are /not/ tracked
+ * in the in-memory rmap btree because they can only be added to the rmap
+ * data after the in-memory btrees have been written to disk.
+ */
+int
+rmap_add_agbtree_mapping(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           agbno,
+       xfs_extlen_t            len,
+       uint64_t                owner)
+{
+       struct xfs_rmap_irec    rmap = {
+               .rm_owner       = owner,
+               .rm_startblock  = agbno,
+               .rm_blockcount  = len,
+       };
+       struct xfs_perag        *pag;
+
+       if (!rmap_needs_work(mp))
+               return 0;
+
+       pag = libxfs_perag_get(mp, agno);
+       assert(libxfs_verify_agbext(pag, agbno, len));
+       libxfs_perag_put(pag);
+
+       return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
+}
+
 /*
  * Merge adjacent raw rmaps and add them to the main rmap list.
  */
@@ -441,7 +623,7 @@ out:
  * the rmapbt, after which it is fully regenerated.
  */
 int
-rmap_store_ag_btree_rec(
+rmap_commit_agbtree_mappings(
        struct xfs_mount        *mp,
        xfs_agnumber_t          agno)
 {
@@ -536,7 +718,7 @@ rmap_store_ag_btree_rec(
        if (error)
                goto err;
 
-       /* Create cursors to refcount structures */
+       /* Create cursors to rmap structures */
        error = init_slab_cursor(ag_rmap->ar_rmaps, rmap_compare, &rm_cur);
        if (error)
                goto err;
@@ -869,6 +1051,21 @@ err:
 }
 #undef RMAP_END
 
+static int
+count_btree_records(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       void                    *data)
+{
+       uint64_t                *nr = data;
+       struct xfs_btree_block  *block;
+       struct xfs_buf          *bp;
+
+       block = libxfs_btree_get_block(cur, level, &bp);
+       *nr += be16_to_cpu(block->bb_numrecs);
+       return 0;
+}
+
 /*
  * Return the number of rmap objects for an AG.
  */
@@ -877,7 +1074,26 @@ rmap_record_count(
        struct xfs_mount        *mp,
        xfs_agnumber_t          agno)
 {
-       return slab_count(ag_rmaps[agno].ar_rmaps);
+       struct xfs_btree_cur    *rmcur;
+       uint64_t                nr = 0;
+       int                     error;
+
+       if (!rmaps_has_observations(&ag_rmaps[agno]))
+               return 0;
+
+       error = rmap_init_mem_cursor(mp, NULL, agno, &rmcur);
+       if (error)
+               do_error(_("%s while reading in-memory rmap btree\n"),
+                               strerror(error));
+
+       error = -libxfs_btree_visit_blocks(rmcur, count_btree_records,
+                       XFS_BTREE_VISIT_RECORDS, &nr);
+       if (error)
+               do_error(_("%s while counting in-memory rmap records\n"),
+                               strerror(error));
+
+       libxfs_btree_del_cursor(rmcur, 0);
+       return nr;
 }
 
 /*
@@ -1545,15 +1761,16 @@ estimate_rmapbt_blocks(
 
        /*
         * Overestimate the amount of space needed by pretending that every
-        * record in the incore slab will become rmapbt records.
+        * byte in the incore tree is used to store rmapbt records.  This
+        * means we can use SEEK_DATA/HOLE on the xfile, which is faster than
+        * walking the entire btree to count records.
         */
        x = &ag_rmaps[pag->pag_agno];
-       if (x->ar_rmaps)
-               nr_recs += slab_count(x->ar_rmaps);
-       if (x->ar_raw_rmaps)
-               nr_recs += slab_count(x->ar_raw_rmaps);
+       if (!rmaps_has_observations(x))
+               return 0;
 
-       return libxfs_rmapbt_calc_size(mp, nr_recs);
+       nr_recs = xmbuf_bytes(x->ar_xmbtp) / sizeof(struct xfs_rmap_rec);
+       return libxfs_rmapbt_calc_size(pag->pag_mount, nr_recs);
 }
 
 /* Estimate the size of the ondisk refcountbt from the incore data. */
index 1bc8c127d0e5a6e60a5904fcd729504a2e686bc2..2de3ec56f23480032bcd200190d82e2de75efe21 100644 (file)
@@ -24,7 +24,10 @@ extern int rmap_fold_raw_recs(struct xfs_mount *mp, xfs_agnumber_t agno);
 extern bool rmaps_are_mergeable(struct xfs_rmap_irec *r1, struct xfs_rmap_irec *r2);
 
 extern int rmap_add_fixed_ag_rec(struct xfs_mount *, xfs_agnumber_t);
-extern int rmap_store_ag_btree_rec(struct xfs_mount *, xfs_agnumber_t);
+
+int rmap_add_agbtree_mapping(struct xfs_mount *mp, xfs_agnumber_t agno,
+               xfs_agblock_t agbno, xfs_extlen_t len, uint64_t owner);
+int rmap_commit_agbtree_mappings(struct xfs_mount *mp, xfs_agnumber_t agno);
 
 uint64_t rmap_record_count(struct xfs_mount *mp, xfs_agnumber_t agno);
 extern int rmap_init_cursor(xfs_agnumber_t, struct xfs_slab_cursor **);
@@ -52,4 +55,8 @@ extern void rmap_store_agflcount(struct xfs_mount *, xfs_agnumber_t, int);
 xfs_extlen_t estimate_rmapbt_blocks(struct xfs_perag *pag);
 xfs_extlen_t estimate_refcountbt_blocks(struct xfs_perag *pag);
 
+int rmap_init_mem_cursor(struct xfs_mount *mp, struct xfs_trans *tp,
+               xfs_agnumber_t agno, struct xfs_btree_cur **rmcurp);
+int rmap_get_mem_rec(struct xfs_btree_cur *rmcur, struct xfs_rmap_irec *irec);
+
 #endif /* RMAP_H_ */
index bf56daa93b82c7c9c9167cfdf0c4e9bab64cb2b5..f8c37c6322323ac50d0e9874bdbebe234a1ea0ab 100644 (file)
@@ -949,6 +949,12 @@ repair_capture_writeback(
        struct xfs_mount        *mp = bp->b_mount;
        static pthread_mutex_t  wb_mutex = PTHREAD_MUTEX_INITIALIZER;
 
+       /* We only care about ondisk metadata. */
+       if (bp->b_target != mp->m_ddev_targp &&
+           bp->b_target != mp->m_logdev_targp &&
+           bp->b_target != mp->m_rtdev_targp)
+               return;
+
        /*
         * This write hook ignores any buffer that looks like a superblock to
         * avoid hook recursion when setting NEEDSREPAIR.  Higher level code