xfs_db: add a bmbt inflation command

author Darrick J. Wong <djwong@kernel.org>

Mon, 15 Apr 2024 23:07:49 +0000 (16:07 -0700)

committer Darrick J. Wong <djwong@kernel.org>

Wed, 17 Apr 2024 21:06:28 +0000 (14:06 -0700)
author Darrick J. Wong <djwong@kernel.org>
Mon, 15 Apr 2024 23:07:49 +0000 (16:07 -0700)
committer Darrick J. Wong <djwong@kernel.org>
Wed, 17 Apr 2024 21:06:28 +0000 (14:06 -0700)
diff --git a/db/Makefile b/db/Makefile

index d00801ab4739c694430b0950c784cee17c4f2475..83389376c36c4678ace19456a852a7404a6afa38 100644 (file)
--- a/db/Makefile
+++ b/db/Makefile
@@ -7,14 +7,63 @@ include $(TOPDIR)/include/builddefs
  
  LTCOMMAND = xfs_db
  
-HFILES = addr.h agf.h agfl.h agi.h attr.h attrshort.h bit.h block.h bmap.h \
-       btblock.h bmroot.h check.h command.h crc.h debug.h \
-       dir2.h dir2sf.h dquot.h echo.h faddr.h field.h \
-       flist.h fprint.h frag.h freesp.h hash.h help.h init.h inode.h input.h \
-       io.h logformat.h malloc.h metadump.h output.h print.h quit.h sb.h \
-       sig.h strvec.h text.h type.h write.h attrset.h symlink.h fsmap.h \
-       fuzz.h obfuscate.h
-CFILES = $(HFILES:.h=.c) btdump.c btheight.c convert.c info.c iunlink.c namei.c \
+HFILES = \
+       addr.h \
+       agf.h \
+       agfl.h \
+       agi.h \
+       attr.h \
+       attrset.h \
+       attrshort.h \
+       bit.h \
+       block.h \
+       bmap.h \
+       bmroot.h \
+       btblock.h \
+       check.h \
+       command.h \
+       crc.h \
+       debug.h \
+       dir2.h \
+       dir2sf.h \
+       dquot.h \
+       echo.h \
+       faddr.h \
+       field.h \
+       flist.h \
+       fprint.h \
+       frag.h \
+       freesp.h \
+       fsmap.h \
+       fuzz.h \
+       hash.h \
+       help.h \
+       init.h \
+       inode.h \
+       input.h \
+       io.h \
+       logformat.h \
+       malloc.h \
+       metadump.h \
+       obfuscate.h \
+       output.h \
+       print.h \
+       quit.h \
+       sb.h \
+       sig.h \
+       strvec.h \
+       symlink.h \
+       text.h \
+       type.h \
+       write.h
+CFILES = $(HFILES:.h=.c) \
+       bmap_inflate.c \
+       btdump.c \
+       btheight.c \
+       convert.c \
+       info.c \
+       iunlink.c \
+       namei.c \
         timelimit.c
  LSRCFILES = xfs_admin.sh xfs_ncheck.sh xfs_metadump.sh
  
diff --git a/db/bmap_inflate.c b/db/bmap_inflate.c

new file mode 100644 (file)

index 0000000..33b0c95
--- /dev/null
+++ b/db/bmap_inflate.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "libxfs.h"
+#include "command.h"
+#include "init.h"
+#include "output.h"
+#include "io.h"
+#include "libfrog/convert.h"
+
+static void
+bmapinflate_help(void)
+{
+       dbprintf(_(
+"\n"
+" Make the bmbt really big by cloning the first data fork mapping over and over.\n"
+" -d     Constrain dirty buffers to this many bytes.\n"
+" -e     Print the size and height of the btree and exit.\n"
+" -n nr  Create this many copies of the mapping.\n"
+"\n"
+));
+
+}
+
+static int
+find_mapping(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       struct xfs_bmbt_irec    *irec)
+{
+       struct xfs_iext_cursor  icur;
+       int                     error;
+
+       if (!xfs_has_reflink(ip->i_mount)) {
+               dbprintf(_("filesystem does not support reflink\n"));
+               return 1;
+       }
+
+       if (ip->i_df.if_nextents != 1) {
+               dbprintf(_("inode must have only one data fork mapping\n"));
+               return 1;
+       }
+
+       error = -libxfs_iread_extents(tp, ip, XFS_DATA_FORK);
+       if (error) {
+               dbprintf(_("could not read data fork, err %d\n"), error);
+               return 1;
+       }
+
+       libxfs_iext_first(&ip->i_df, &icur);
+       if (!xfs_iext_get_extent(&ip->i_df, &icur, irec)) {
+               dbprintf(_("could not read data fork mapping\n"));
+               return 1;
+       }
+
+       if (irec->br_state != XFS_EXT_NORM) {
+               dbprintf(_("cannot duplicate unwritten extent\n"));
+               return 1;
+       }
+
+       return 0;
+}
+
+static int
+set_nrext64(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       xfs_extnum_t            nextents)
+{
+       xfs_extnum_t            max_extents;
+       bool                    large_extcount;
+
+       large_extcount = xfs_inode_has_large_extent_counts(ip);
+       max_extents = xfs_iext_max_nextents(large_extcount, XFS_DATA_FORK);
+       if (nextents <= max_extents)
+               return 0;
+       if (large_extcount)
+               return EFSCORRUPTED;
+       if (!xfs_has_large_extent_counts(ip->i_mount))
+               return EFSCORRUPTED;
+
+       max_extents = xfs_iext_max_nextents(true, XFS_DATA_FORK);
+       if (nextents > max_extents)
+               return EFSCORRUPTED;
+
+       ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       return 0;
+}
+
+static int
+populate_extents(
+       struct xfs_trans                *tp,
+       struct xfs_inode                *ip,
+       struct xbtree_ifakeroot         *ifake,
+       const struct xfs_bmbt_irec      *template,
+       xfs_extnum_t                    nextents)
+{
+       struct xfs_bmbt_irec            irec = {
+               .br_startoff            = 0,
+               .br_startblock          = template->br_startblock,
+               .br_blockcount          = template->br_blockcount,
+               .br_state               = XFS_EXT_NORM,
+       };
+       struct xfs_iext_cursor          icur;
+       struct xfs_ifork                *ifp = ifake->if_fork;
+       unsigned long long              i;
+
+       /* Add all the mappings to the incore extent tree. */
+       libxfs_iext_first(ifp, &icur);
+       for (i = 0; i < nextents; i++) {
+               libxfs_iext_insert_raw(ifp, &icur, &irec);
+               ifp->if_nextents++;
+               libxfs_iext_next(ifp, &icur);
+
+               irec.br_startoff += irec.br_blockcount;
+       }
+
+       ip->i_nblocks = template->br_blockcount * nextents;
+       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       return 0;
+}
+
+struct bmbt_resv {
+       struct list_head        list;
+       xfs_fsblock_t           fsbno;
+       xfs_extlen_t            len;
+       xfs_extlen_t            used;
+};
+
+struct bmbt_data {
+       struct xfs_bmbt_irec    irec;
+       struct list_head        resv_list;
+       unsigned long long      iblocks;
+       unsigned long long      nr;
+};
+
+static int
+alloc_bmbt_blocks(
+       struct xfs_trans        **tpp,
+       struct xfs_inode        *ip,
+       struct bmbt_data        *bd,
+       uint64_t                nr_blocks)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct list_head        *resv_list = &bd->resv_list;
+       int                     error = 0;
+
+       while (nr_blocks > 0) {
+               struct xfs_alloc_arg    args = {
+                       .tp             = *tpp,
+                       .mp             = mp,
+                       .minlen         = 1,
+                       .maxlen         = nr_blocks,
+                       .prod           = 1,
+                       .resv           = XFS_AG_RESV_NONE,
+               };
+               struct bmbt_resv        *resv;
+               xfs_fsblock_t           target = 0;
+
+               if (xfs_has_rmapbt(mp)) {
+                       xfs_agnumber_t          tgt_agno;
+
+                       /*
+                        * Try to allocate bmbt blocks in a different AG so
+                        * that we don't blow up the rmapbt with the bmbt
+                        * records.
+                        */
+                       tgt_agno = 1 + XFS_FSB_TO_AGNO(mp,
+                                                       bd->irec.br_startblock);
+                       if (tgt_agno >= mp->m_sb.sb_agcount)
+                               tgt_agno = 0;
+                       target = XFS_AGB_TO_FSB(mp, tgt_agno, 0);
+               }
+
+               libxfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino,
+                               XFS_DATA_FORK);
+
+               error = -libxfs_alloc_vextent_start_ag(&args, target);
+               if (error)
+                       return error;
+               if (args.fsbno == NULLFSBLOCK)
+                       return ENOSPC;
+
+               resv = kmalloc(sizeof(struct bmbt_resv), 0);
+               if (!resv)
+                       return ENOMEM;
+
+               INIT_LIST_HEAD(&resv->list);
+               resv->fsbno = args.fsbno;
+               resv->len = args.len;
+               resv->used = 0;
+               list_add_tail(&resv->list, resv_list);
+
+               nr_blocks -= args.len;
+
+               error = -libxfs_trans_roll_inode(tpp, ip);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+static int
+get_bmbt_records(
+       struct xfs_btree_cur    *cur,
+       unsigned int            idx,
+       struct xfs_btree_block  *block,
+       unsigned int            nr_wanted,
+       void                    *priv)
+{
+       struct xfs_bmbt_irec    *irec = &cur->bc_rec.b;
+       struct bmbt_data        *bd = priv;
+       union xfs_btree_rec     *block_rec;
+       struct xfs_ifork        *ifp = cur->bc_ino.ifake->if_fork;
+       unsigned int            loaded;
+
+       for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
+               memcpy(irec, &bd->irec, sizeof(struct xfs_bmbt_irec));
+
+               block_rec = libxfs_btree_rec_addr(cur, idx, block);
+               cur->bc_ops->init_rec_from_cur(cur, block_rec);
+               ifp->if_nextents++;
+
+               bd->irec.br_startoff += bd->irec.br_blockcount;
+       }
+
+       return loaded;
+}
+
+static int
+claim_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       void                    *priv)
+{
+       struct bmbt_data        *bd = priv;
+       struct bmbt_resv        *resv;
+       xfs_fsblock_t           fsb;
+
+       /*
+        * The first item in the list should always have a free block unless
+        * we're completely out.
+        */
+       resv = list_first_entry(&bd->resv_list, struct bmbt_resv, list);
+       if (resv->used == resv->len)
+               return ENOSPC;
+
+       fsb = resv->fsbno + resv->used;
+       resv->used++;
+
+       /* If we used all the blocks in this reservation, move it to the end. */
+       if (resv->used == resv->len)
+               list_move_tail(&resv->list, &bd->resv_list);
+
+       ptr->l = cpu_to_be64(fsb);
+       bd->iblocks++;
+       return 0;
+}
+
+static size_t
+iroot_size(
+       struct xfs_btree_cur    *cur,
+       unsigned int            level,
+       unsigned int            nr_this_level,
+       void                    *priv)
+{
+       return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level);
+}
+
+static int
+populate_btree(
+       struct xfs_trans                **tpp,
+       struct xfs_inode                *ip,
+       uint16_t                        dirty_blocks,
+       struct xbtree_ifakeroot         *ifake,
+       struct xfs_btree_cur            *bmap_cur,
+       const struct xfs_bmbt_irec      *template,
+       xfs_extnum_t                    nextents)
+{
+       struct xfs_btree_bload          bmap_bload = {
+               .get_records            = get_bmbt_records,
+               .claim_block            = claim_block,
+               .iroot_size             = iroot_size,
+               .max_dirty              = dirty_blocks,
+               .leaf_slack             = 1,
+               .node_slack             = 1,
+       };
+       struct bmbt_data                bd = {
+               .irec                   = {
+                       .br_startoff    = 0,
+                       .br_startblock  = template->br_startblock,
+                       .br_blockcount  = template->br_blockcount,
+                       .br_state       = XFS_EXT_NORM,
+               },
+               .iblocks                = 0,
+       };
+       struct bmbt_resv                *resv, *n;
+       int                             error;
+
+       error = -libxfs_btree_bload_compute_geometry(bmap_cur, &bmap_bload,
+                       nextents);
+       if (error)
+               return error;
+
+       error = -libxfs_trans_reserve_more(*tpp, bmap_bload.nr_blocks, 0);
+       if (error)
+               return error;
+
+       INIT_LIST_HEAD(&bd.resv_list);
+       error = alloc_bmbt_blocks(tpp, ip, &bd, bmap_bload.nr_blocks);
+       if (error)
+               return error;
+
+       error = -libxfs_btree_bload(bmap_cur, &bmap_bload, &bd);
+       if (error)
+              goto out_resv_list;
+
+       ip->i_nblocks = bd.iblocks + (template->br_blockcount * nextents);
+       libxfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+
+out_resv_list:
+       /* Leak any unused blocks */
+       list_for_each_entry_safe(resv, n, &bd.resv_list, list) {
+               list_del(&resv->list);
+               kmem_free(resv);
+       }
+       return error;
+}
+
+static int
+build_new_datafork(
+       struct xfs_trans                **tpp,
+       struct xfs_inode                *ip,
+       uint16_t                        dirty_blocks,
+       const struct xfs_bmbt_irec      *irec,
+       xfs_extnum_t                    nextents)
+{
+       struct xbtree_ifakeroot         ifake;
+       struct xfs_btree_cur            *bmap_cur;
+       int                             error;
+
+       error = set_nrext64(*tpp, ip, nextents);
+       if (error)
+               return error;
+
+       /* Set up staging for the new bmbt */
+       ifake.if_fork = kmem_cache_zalloc(xfs_ifork_cache, 0);
+       ifake.if_fork_size = xfs_inode_fork_size(ip, XFS_DATA_FORK);
+       bmap_cur = libxfs_bmbt_stage_cursor(ip->i_mount, ip, &ifake);
+
+       /*
+        * Figure out the size and format of the new fork, then fill it with
+        * the bmap record we want.
+        */
+       if (nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) {
+               ifake.if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
+               error = populate_extents(*tpp, ip, &ifake, irec, nextents);
+       } else {
+               ifake.if_fork->if_format = XFS_DINODE_FMT_BTREE;
+               error = populate_btree(tpp, ip, dirty_blocks, &ifake, bmap_cur,
+                               irec, nextents);
+       }
+       if (error) {
+               libxfs_btree_del_cursor(bmap_cur, 0);
+               goto err_ifork;
+       }
+
+       /* Install the new fork in the inode. */
+       libxfs_bmbt_commit_staged_btree(bmap_cur, *tpp, XFS_DATA_FORK);
+       libxfs_btree_del_cursor(bmap_cur, 0);
+
+       /* Mark filesystem as needsrepair */
+       dbprintf(_("filesystem is now inconsistent, xfs_repair required!\n"));
+       mp->m_sb.sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+       libxfs_log_sb(*tpp);
+
+err_ifork:
+       kmem_cache_free(xfs_ifork_cache, ifake.if_fork);
+       return error;
+}
+
+static int
+estimate_size(
+       struct xfs_inode                *ip,
+       unsigned long long              dirty_blocks,
+       xfs_extnum_t                    nextents)
+{
+       struct xfs_btree_bload          bmap_bload = {
+               .leaf_slack             = 1,
+               .node_slack             = 1,
+       };
+       struct xbtree_ifakeroot         ifake;
+       struct xfs_btree_cur            *bmap_cur;
+       int                             error;
+
+       /* FMT_EXTENTS means we report zero btblocks and zero height */
+       if (nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
+               goto report;
+
+       ifake.if_fork = kmem_cache_zalloc(xfs_ifork_cache, 0);
+       ifake.if_fork_size = xfs_inode_fork_size(ip, XFS_DATA_FORK);
+
+       bmap_cur = libxfs_bmbt_stage_cursor(ip->i_mount, ip, &ifake);
+       error = -libxfs_btree_bload_compute_geometry(bmap_cur, &bmap_bload,
+                       nextents);
+       libxfs_btree_del_cursor(bmap_cur, error);
+
+       kmem_cache_free(xfs_ifork_cache, ifake.if_fork);
+
+       if (error)
+               return error;
+
+report:
+       dbprintf(_("ino 0x%llx nextents %llu btblocks %llu btheight %u dirty %u\n"),
+                       ip->i_ino, nextents, bmap_bload.nr_blocks,
+                       bmap_bload.btree_height, dirty_blocks);
+
+       return 0;
+}
+
+static int
+bmapinflate_f(
+       int                     argc,
+       char                    **argv)
+{
+       struct xfs_bmbt_irec    irec;
+       struct xfs_inode        *ip;
+       struct xfs_trans        *tp;
+       char                    *p;
+       unsigned long long      nextents = 0;
+       unsigned long long      dirty_bytes = 60U << 20; /* 60MiB */
+       unsigned long long      dirty_blocks;
+       unsigned int            resblks;
+       bool                    estimate = false;
+       int                     c, error;
+
+       if (iocur_top->ino == NULLFSINO) {
+               dbprintf(_("no current inode\n"));
+               return 0;
+       }
+
+       optind = 0;
+       while ((c = getopt(argc, argv, "d:en:")) != EOF) {
+               switch (c) {
+               case 'e':
+                       estimate = true;
+                       break;
+               case 'n':
+                       errno = 0;
+                       nextents = strtoull(optarg, &p, 0);
+                       if (errno) {
+                               perror(optarg);
+                               return 1;
+                       }
+                       break;
+               case 'd':
+                       errno = 0;
+                       dirty_bytes = cvtnum(mp->m_sb.sb_blocksize,
+                                            mp->m_sb.sb_sectsize, optarg);
+                       if (errno) {
+                               perror(optarg);
+                               return 1;
+                       }
+                       break;
+               default:
+                       dbprintf(_("bad option for bmap command\n"));
+                       return 0;
+               }
+       }
+
+       dirty_blocks = XFS_B_TO_FSBT(mp, dirty_bytes);
+       if (dirty_blocks >= UINT16_MAX)
+               dirty_blocks = UINT16_MAX - 1;
+
+       error = -libxfs_iget(mp, NULL, iocur_top->ino, 0, &ip);
+       if (error) {
+               dbprintf(_("could not grab inode 0x%llx, err %d\n"),
+                               iocur_top->ino, error);
+               return 1;
+       }
+
+       error = estimate_size(ip, dirty_blocks, nextents);
+       if (error)
+               goto out_irele;
+       if (estimate)
+               goto done;
+
+       resblks = libxfs_bmbt_calc_size(mp, nextents);
+       error = -libxfs_trans_alloc_inode(ip, &M_RES(mp)->tr_itruncate,
+                       resblks, 0, false, &tp);
+       if (error) {
+               dbprintf(_("could not allocate transaction, err %d\n"),
+                               error);
+               return 1;
+       }
+
+       error = find_mapping(tp, ip, &irec);
+       if (error)
+               goto out_cancel;
+
+       error = build_new_datafork(&tp, ip, dirty_blocks, &irec, nextents);
+       if (error) {
+               dbprintf(_("could not build new data fork, err %d\n"),
+                               error);
+               exitcode = 1;
+               goto out_cancel;
+       }
+
+       error = -libxfs_trans_commit(tp);
+       if (error) {
+               dbprintf(_("could not commit transaction, err %d\n"),
+                               error);
+               exitcode = 1;
+               return 1;
+       }
+
+done:
+       libxfs_irele(ip);
+       return 0;
+
+out_cancel:
+       libxfs_trans_cancel(tp);
+out_irele:
+       libxfs_irele(ip);
+       return 1;
+}
+
+static const struct cmdinfo bmapinflate_cmd = {
+       .name           = "bmapinflate",
+       .cfunc          = bmapinflate_f,
+       .argmin         = 0,
+       .argmax         = -1,
+       .canpush        = 0,
+       .args           = N_("[-n copies] [-e] [-d maxdirty]"),
+       .oneline        = N_("inflate bmbt by copying mappings"),
+       .help           = bmapinflate_help,
+};
+
+void
+bmapinflate_init(void)
+{
+       if (!expert_mode)
+               return;
+
+       add_command(&bmapinflate_cmd);
+}
diff --git a/db/command.c b/db/command.c

index 2bbd7b0b24f9568dac1ca0f2925f7b700019ea34..6cda03e9856d84bb4a3fa6c50757779aeb49877b 100644 (file)
--- a/db/command.c
+++ b/db/command.c
@@ -142,4 +142,5 @@ init_commands(void)
         fuzz_init();
         timelimit_init();
         iunlink_init();
+       bmapinflate_init();
  }
diff --git a/db/command.h b/db/command.h

index a89e71504f9c7c1a7043befec8be1fc4aa7b3669..2c2926afd7b516c9da46698784dece890aeb2628 100644 (file)
--- a/db/command.h
+++ b/db/command.h
@@ -35,3 +35,4 @@ extern void           btheight_init(void);
  extern void            timelimit_init(void);
  extern void            namei_init(void);
  extern void            iunlink_init(void);
+extern void            bmapinflate_init(void);
diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8

index f53ddd67d87c0cfb15b711cf35bc0f7c73cdc062..a7f6d55ed8bed6ce3caa7e0c4b5875cb2e948299 100644 (file)
--- a/man/man8/xfs_db.8
+++ b/man/man8/xfs_db.8
@@ -388,6 +388,29 @@ and
  options are used to select the attribute or data
  area of the inode, if neither option is given then both areas are shown.
  .TP
+.BI "bmapinflate [\-d " dirty_bytes "] [-e] [\-n " nr "]
+Duplicates the first data fork mapping this many times, as if the mapping had
+been repeatedly reflinked.
+This is an expert-mode command for exercising high-refcount filesystems only.
+Existing data fork mappings will be forgotten and the refcount btree will not
+be updated.
+This command leaves at least the refcount btree and the inode inconsistent;
+.B xfs_repair
+must be run afterwards.
+.RS 1.0i
+.TP 0.4i
+.B \-d
+Constrain the memory consumption of new dirty btree blocks to this quantity.
+Defaults to 60MiB.
+.TP 0.4i
+.B \-e
+Estimate the number of blocks and height of the new data fork mapping
+structure and exit without changing anything.
+.TP 0.4i
+.B \-n
+Create this many copies of the first mapping.
+.RE
+.TP
  .B btdump [-a] [-i]
  If the cursor points to a btree node, dump the btree from that block downward.
  If instead the cursor points to an inode, dump the data fork block mapping btree if there is one.
author	Darrick J. Wong <djwong@kernel.org>
	Mon, 15 Apr 2024 23:07:49 +0000 (16:07 -0700)
committer	Darrick J. Wong <djwong@kernel.org>
	Wed, 17 Apr 2024 21:06:28 +0000 (14:06 -0700)
db/Makefile		patch \| blob \| blame \| history
db/bmap_inflate.c	[new file with mode: 0644]	patch \| blob
db/command.c		patch \| blob \| blame \| history
db/command.h		patch \| blob \| blame \| history
man/man8/xfs_db.8		patch \| blob \| blame \| history