xfs_repair: rebuild block mappings from rmapbt data

author Darrick J. Wong <djwong@kernel.org>

Mon, 15 Apr 2024 23:07:49 +0000 (16:07 -0700)

committer Darrick J. Wong <djwong@kernel.org>

Wed, 17 Apr 2024 21:06:28 +0000 (14:06 -0700)
author Darrick J. Wong <djwong@kernel.org>
Mon, 15 Apr 2024 23:07:49 +0000 (16:07 -0700)
committer Darrick J. Wong <djwong@kernel.org>
Wed, 17 Apr 2024 21:06:28 +0000 (14:06 -0700)
diff --git a/include/xfs_trans.h b/include/xfs_trans.h

index ab298ccfe5565e9e26609492a46e3e3d72098e3d..ac82c3bc480a61bd56579b2921054348a29e8163 100644 (file)
--- a/include/xfs_trans.h
+++ b/include/xfs_trans.h
@@ -98,6 +98,8 @@ int   libxfs_trans_alloc_rollable(struct xfs_mount *mp, uint blocks,
  int    libxfs_trans_alloc_empty(struct xfs_mount *mp, struct xfs_trans **tpp);
  int    libxfs_trans_commit(struct xfs_trans *);
  void   libxfs_trans_cancel(struct xfs_trans *);
+int    libxfs_trans_reserve_more(struct xfs_trans *tp, uint blocks,
+                       uint rtextents);
  
  /* cancel dfops associated with a transaction */
  void xfs_defer_cancel(struct xfs_trans *);
diff --git a/libfrog/util.h b/libfrog/util.h

index 1b97881bf1687c351aba142b3e9293825c5a752e..5df95e69cd11da87b5c262e556148d6e9cf37572 100644 (file)
--- a/libfrog/util.h
+++ b/libfrog/util.h
@@ -8,4 +8,9 @@
  
  unsigned int   log2_roundup(unsigned int i);
  
+#define min_t(type,x,y) \
+       ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#define max_t(type,x,y) \
+       ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+
  #endif /* __LIBFROG_UTIL_H__ */
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h

index 28960317ab6bdaeed4058edb4195d627defc75b2..769733ec2ee3bc8f22392235018d39be4feef242 100644 (file)
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -32,7 +32,7 @@
  #define xfs_alloc_fix_freelist         libxfs_alloc_fix_freelist
  #define xfs_alloc_min_freelist         libxfs_alloc_min_freelist
  #define xfs_alloc_read_agf             libxfs_alloc_read_agf
-#define xfs_alloc_vextent              libxfs_alloc_vextent
+#define xfs_alloc_vextent_start_ag     libxfs_alloc_vextent_start_ag
  
  #define xfs_ascii_ci_hashname          libxfs_ascii_ci_hashname
  
@@ -44,11 +44,18 @@
  #define xfs_attr_shortform_verify      libxfs_attr_shortform_verify
  
  #define __xfs_bmap_add_free            __libxfs_bmap_add_free
+#define xfs_bmap_validate_extent       libxfs_bmap_validate_extent
  #define xfs_bmapi_read                 libxfs_bmapi_read
+#define xfs_bmapi_remap                        libxfs_bmapi_remap
  #define xfs_bmapi_write                        libxfs_bmapi_write
  #define xfs_bmap_last_offset           libxfs_bmap_last_offset
+#define xfs_bmbt_calc_size             libxfs_bmbt_calc_size
+#define xfs_bmbt_commit_staged_btree   libxfs_bmbt_commit_staged_btree
+#define xfs_bmbt_disk_get_startoff     libxfs_bmbt_disk_get_startoff
+#define xfs_bmbt_disk_set_all          libxfs_bmbt_disk_set_all
  #define xfs_bmbt_maxlevels_ondisk      libxfs_bmbt_maxlevels_ondisk
  #define xfs_bmbt_maxrecs               libxfs_bmbt_maxrecs
+#define xfs_bmbt_stage_cursor          libxfs_bmbt_stage_cursor
  #define xfs_bmdr_maxrecs               libxfs_bmdr_maxrecs
  
  #define xfs_btree_bload                        libxfs_btree_bload
@@ -117,6 +124,7 @@
  
  #define xfs_finobt_calc_reserves       libxfs_finobt_calc_reserves
  #define xfs_free_extent                        libxfs_free_extent
+#define xfs_free_extent_later          libxfs_free_extent_later
  #define xfs_free_perag                 libxfs_free_perag
  #define xfs_fs_geometry                        libxfs_fs_geometry
  #define xfs_highbit32                  libxfs_highbit32
@@ -127,7 +135,10 @@
  #define xfs_ialloc_read_agi            libxfs_ialloc_read_agi
  #define xfs_idata_realloc              libxfs_idata_realloc
  #define xfs_idestroy_fork              libxfs_idestroy_fork
+#define xfs_iext_first                 libxfs_iext_first
+#define xfs_iext_insert_raw            libxfs_iext_insert_raw
  #define xfs_iext_lookup_extent         libxfs_iext_lookup_extent
+#define xfs_iext_next                  libxfs_iext_next
  #define xfs_ifork_zap_attr             libxfs_ifork_zap_attr
  #define xfs_imap_to_bp                 libxfs_imap_to_bp
  #define xfs_initialize_perag           libxfs_initialize_perag
@@ -174,10 +185,12 @@
  #define xfs_rmapbt_stage_cursor                libxfs_rmapbt_stage_cursor
  #define xfs_rmap_compare               libxfs_rmap_compare
  #define xfs_rmap_get_rec               libxfs_rmap_get_rec
+#define xfs_rmap_ino_bmbt_owner                libxfs_rmap_ino_bmbt_owner
  #define xfs_rmap_irec_offset_pack      libxfs_rmap_irec_offset_pack
  #define xfs_rmap_irec_offset_unpack    libxfs_rmap_irec_offset_unpack
  #define xfs_rmap_lookup_le             libxfs_rmap_lookup_le
  #define xfs_rmap_lookup_le_range       libxfs_rmap_lookup_le_range
+#define xfs_rmap_query_all             libxfs_rmap_query_all
  #define xfs_rmap_query_range           libxfs_rmap_query_range
  
  #define xfs_rtbitmap_getword           libxfs_rtbitmap_getword
diff --git a/libxfs/trans.c b/libxfs/trans.c

index bd1186b24e628074efd6ddc2605f9c4e906849cb..8143a6a99f620b528bfc9017fc8a318f8bad83e7 100644 (file)
--- a/libxfs/trans.c
+++ b/libxfs/trans.c
@@ -1143,3 +1143,51 @@ libxfs_trans_alloc_inode(
         *tpp = tp;
         return 0;
  }
+
+/*
+ * Try to reserve more blocks for a transaction.  The single use case we
+ * support is for offline repair -- use a transaction to gather data without
+ * fear of btree cycle deadlocks; calculate how many blocks we really need
+ * from that data; and only then start modifying data.  This can fail due to
+ * ENOSPC, so we have to be able to cancel the transaction.
+ */
+int
+libxfs_trans_reserve_more(
+       struct xfs_trans        *tp,
+       uint                    blocks,
+       uint                    rtextents)
+{
+       int                     error = 0;
+
+       ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+
+       /*
+        * Attempt to reserve the needed disk blocks by decrementing
+        * the number needed from the number available.  This will
+        * fail if the count would go below zero.
+        */
+       if (blocks > 0) {
+               if (tp->t_mountp->m_sb.sb_fdblocks < blocks)
+                       return -ENOSPC;
+               tp->t_blk_res += blocks;
+       }
+
+       /*
+        * Attempt to reserve the needed realtime extents by decrementing
+        * the number needed from the number available.  This will
+        * fail if the count would go below zero.
+        */
+       if (rtextents > 0) {
+               if (tp->t_mountp->m_sb.sb_rextents < rtextents) {
+                       error = -ENOSPC;
+                       goto out_blocks;
+               }
+       }
+
+       return 0;
+out_blocks:
+       if (blocks > 0)
+               tp->t_blk_res -= blocks;
+
+       return error;
+}
diff --git a/repair/Makefile b/repair/Makefile

index 2c40e59a30fc1b85dd31bdf3f3509545dac01467..e5014deb0ce841cc9581b066545b93ef8bb648eb 100644 (file)
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -16,6 +16,7 @@ HFILES = \
         avl.h \
         bulkload.h \
         bmap.h \
+       bmap_repair.h \
         btree.h \
         da_util.h \
         dinode.h \
@@ -41,6 +42,7 @@ CFILES = \
         avl.c \
         bulkload.c \
         bmap.c \
+       bmap_repair.c \
         btree.c \
         da_util.c \
         dino_chunks.c \
diff --git a/repair/agbtree.c b/repair/agbtree.c

index c6f0512fe7dee7cad7c97edd5fac99b98c7d7a62..38f3f7b8feacbd2f08b314ec88332879f71fba55 100644 (file)
--- a/repair/agbtree.c
+++ b/repair/agbtree.c
@@ -22,7 +22,7 @@ init_rebuild(
  {
         memset(btr, 0, sizeof(struct bt_rebuild));
  
-       bulkload_init_ag(&btr->newbt, sc, oinfo);
+       bulkload_init_ag(&btr->newbt, sc, oinfo, NULLFSBLOCK);
         btr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
         bulkload_estimate_ag_slack(sc, &btr->bload, est_agfreeblocks);
  }
diff --git a/repair/bmap_repair.c b/repair/bmap_repair.c

new file mode 100644 (file)

index 0000000..1dbcafb
--- /dev/null
+++ b/repair/bmap_repair.c
@@ -0,0 +1,748 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2019-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include <libxfs.h>
+#include "btree.h"
+#include "err_protos.h"
+#include "libxlog.h"
+#include "incore.h"
+#include "globals.h"
+#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
+#include "bulkload.h"
+#include "bmap_repair.h"
+#include "libfrog/util.h"
+
+/*
+ * Inode Fork Block Mapping (BMBT) Repair
+ * ======================================
+ *
+ * Gather all the rmap records for the inode and fork we're fixing, reset the
+ * incore fork, then recreate the btree.
+ */
+struct xrep_bmap {
+       /* List of new bmap records. */
+       struct xfs_slab         *bmap_records;
+       struct xfs_slab_cursor  *bmap_cursor;
+
+       /* New fork. */
+       struct bulkload         new_fork_info;
+       struct xfs_btree_bload  bmap_bload;
+
+       struct repair_ctx       *sc;
+
+       /* How many blocks did we find allocated to this file? */
+       xfs_rfsblock_t          nblocks;
+
+       /* How many bmbt blocks did we find for this fork? */
+       xfs_rfsblock_t          old_bmbt_block_count;
+
+       /* Which fork are we fixing? */
+       int                     whichfork;
+};
+
+/* Remember this reverse-mapping as a series of bmap records. */
+STATIC int
+xrep_bmap_from_rmap(
+       struct xrep_bmap        *rb,
+       xfs_fileoff_t           startoff,
+       xfs_fsblock_t           startblock,
+       xfs_filblks_t           blockcount,
+       bool                    unwritten)
+{
+       struct xfs_bmbt_rec     rbe;
+       struct xfs_bmbt_irec    irec;
+       int                     error = 0;
+
+       irec.br_startoff = startoff;
+       irec.br_startblock = startblock;
+       irec.br_state = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+
+       do {
+               xfs_failaddr_t  fa;
+
+               irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
+                               XFS_MAX_BMBT_EXTLEN);
+
+               fa = libxfs_bmap_validate_extent(rb->sc->ip, rb->whichfork,
+                               &irec);
+               if (fa)
+                       return -EFSCORRUPTED;
+
+               libxfs_bmbt_disk_set_all(&rbe, &irec);
+
+               error = slab_add(rb->bmap_records, &rbe);
+               if (error)
+                       return error;
+
+               irec.br_startblock += irec.br_blockcount;
+               irec.br_startoff += irec.br_blockcount;
+               blockcount -= irec.br_blockcount;
+       } while (blockcount > 0);
+
+       return 0;
+}
+
+/* Check for any obvious errors or conflicts in the file mapping. */
+STATIC int
+xrep_bmap_check_fork_rmap(
+       struct xrep_bmap                *rb,
+       struct xfs_btree_cur            *cur,
+       const struct xfs_rmap_irec      *rec)
+{
+       struct repair_ctx               *sc = rb->sc;
+
+       /*
+        * Data extents for rt files are never stored on the data device, but
+        * everything else (xattrs, bmbt blocks) can be.
+        */
+       if (XFS_IS_REALTIME_INODE(sc->ip) &&
+           !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
+               return EFSCORRUPTED;
+
+       /* Check that this is within the AG. */
+       if (!xfs_verify_agbext(cur->bc_ag.pag, rec->rm_startblock,
+                               rec->rm_blockcount))
+               return EFSCORRUPTED;
+
+       /* No contradictory flags. */
+       if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
+           (rec->rm_flags & XFS_RMAP_UNWRITTEN))
+               return EFSCORRUPTED;
+
+       /* Check the file offset range. */
+       if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
+           !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
+               return EFSCORRUPTED;
+
+       return 0;
+}
+
+/* Record extents that belong to this inode's fork. */
+STATIC int
+xrep_bmap_walk_rmap(
+       struct xfs_btree_cur            *cur,
+       const struct xfs_rmap_irec      *rec,
+       void                            *priv)
+{
+       struct xrep_bmap                *rb = priv;
+       struct xfs_mount                *mp = cur->bc_mp;
+       xfs_fsblock_t                   fsbno;
+       int                             error;
+
+       /* Skip extents which are not owned by this inode and fork. */
+       if (rec->rm_owner != rb->sc->ip->i_ino)
+               return 0;
+
+       error = xrep_bmap_check_fork_rmap(rb, cur, rec);
+       if (error)
+               return error;
+
+       /*
+        * Record all blocks allocated to this file even if the extent isn't
+        * for the fork we're rebuilding so that we can reset di_nblocks later.
+        */
+       rb->nblocks += rec->rm_blockcount;
+
+       /* If this rmap isn't for the fork we want, we're done. */
+       if (rb->whichfork == XFS_DATA_FORK &&
+           (rec->rm_flags & XFS_RMAP_ATTR_FORK))
+               return 0;
+       if (rb->whichfork == XFS_ATTR_FORK &&
+           !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
+               return 0;
+
+       fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
+                       rec->rm_startblock);
+
+       if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
+               rb->old_bmbt_block_count += rec->rm_blockcount;
+               return 0;
+       }
+
+       return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
+                       rec->rm_blockcount,
+                       rec->rm_flags & XFS_RMAP_UNWRITTEN);
+}
+
+/* Compare two bmap extents. */
+static int
+xrep_bmap_extent_cmp(
+       const void                      *a,
+       const void                      *b)
+{
+       xfs_fileoff_t                   ao;
+       xfs_fileoff_t                   bo;
+
+       ao = libxfs_bmbt_disk_get_startoff((struct xfs_bmbt_rec *)a);
+       bo = libxfs_bmbt_disk_get_startoff((struct xfs_bmbt_rec *)b);
+
+       if (ao > bo)
+               return 1;
+       else if (ao < bo)
+               return -1;
+       return 0;
+}
+
+/* Scan one AG for reverse mappings that we can turn into extent maps. */
+STATIC int
+xrep_bmap_scan_ag(
+       struct xrep_bmap        *rb,
+       struct xfs_perag        *pag)
+{
+       struct repair_ctx       *sc = rb->sc;
+       struct xfs_mount        *mp = sc->mp;
+       struct xfs_buf          *agf_bp = NULL;
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       error = -libxfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp);
+       if (error)
+               return error;
+       if (!agf_bp)
+               return ENOMEM;
+       cur = libxfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, pag);
+       error = -libxfs_rmap_query_all(cur, xrep_bmap_walk_rmap, rb);
+       libxfs_btree_del_cursor(cur, error);
+       libxfs_trans_brelse(sc->tp, agf_bp);
+       return error;
+}
+
+/*
+ * Collect block mappings for this fork of this inode and decide if we have
+ * enough space to rebuild.  Caller is responsible for cleaning up the list if
+ * anything goes wrong.
+ */
+STATIC int
+xrep_bmap_find_mappings(
+       struct xrep_bmap        *rb)
+{
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          agno;
+       int                     error;
+
+       /* Iterate the rmaps for extents. */
+       for_each_perag(rb->sc->mp, agno, pag) {
+               error = xrep_bmap_scan_ag(rb, pag);
+               if (error) {
+                       libxfs_perag_put(pag);
+                       return error;
+               }
+       }
+
+       return 0;
+}
+
+/* Retrieve bmap data for bulk load. */
+STATIC int
+xrep_bmap_get_records(
+       struct xfs_btree_cur    *cur,
+       unsigned int            idx,
+       struct xfs_btree_block  *block,
+       unsigned int            nr_wanted,
+       void                    *priv)
+{
+       struct xfs_bmbt_rec     *rec;
+       struct xfs_bmbt_irec    *irec = &cur->bc_rec.b;
+       struct xrep_bmap        *rb = priv;
+       union xfs_btree_rec     *block_rec;
+       unsigned int            loaded;
+
+       for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
+               rec = pop_slab_cursor(rb->bmap_cursor);
+               libxfs_bmbt_disk_get_all(rec, irec);
+
+               block_rec = libxfs_btree_rec_addr(cur, idx, block);
+               cur->bc_ops->init_rec_from_cur(cur, block_rec);
+       }
+
+       return loaded;
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_bmap_claim_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       void                    *priv)
+{
+       struct xrep_bmap        *rb = priv;
+
+       return bulkload_claim_block(cur, &rb->new_fork_info, ptr);
+}
+
+/* Figure out how much space we need to create the incore btree root block. */
+STATIC size_t
+xrep_bmap_iroot_size(
+       struct xfs_btree_cur    *cur,
+       unsigned int            level,
+       unsigned int            nr_this_level,
+       void                    *priv)
+{
+       ASSERT(level > 0);
+
+       return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level);
+}
+
+/* Update the inode counters. */
+STATIC int
+xrep_bmap_reset_counters(
+       struct xrep_bmap        *rb)
+{
+       struct repair_ctx       *sc = rb->sc;
+       struct xbtree_ifakeroot *ifake = &rb->new_fork_info.ifake;
+       int64_t                 delta;
+
+       /*
+        * Update the inode block counts to reflect the extents we found in the
+        * rmapbt.
+        */
+       delta = ifake->if_blocks - rb->old_bmbt_block_count;
+       sc->ip->i_nblocks = rb->nblocks + delta;
+       libxfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+
+       /* Quotas don't exist so we're done. */
+       return 0;
+}
+
+/*
+ * Ensure that the inode being repaired is ready to handle a certain number of
+ * extents, or return EFSCORRUPTED.  Caller must hold the ILOCK of the inode
+ * being repaired and have joined it to the scrub transaction.
+ */
+static int
+xrep_ino_ensure_extent_count(
+       struct repair_ctx       *sc,
+       int                     whichfork,
+       xfs_extnum_t            nextents)
+{
+       xfs_extnum_t            max_extents;
+       bool                    large_extcount;
+
+       large_extcount = xfs_inode_has_large_extent_counts(sc->ip);
+       max_extents = xfs_iext_max_nextents(large_extcount, whichfork);
+       if (nextents <= max_extents)
+               return 0;
+       if (large_extcount)
+               return EFSCORRUPTED;
+       if (!xfs_has_large_extent_counts(sc->mp))
+               return EFSCORRUPTED;
+
+       max_extents = xfs_iext_max_nextents(true, whichfork);
+       if (nextents > max_extents)
+               return EFSCORRUPTED;
+
+       sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+       libxfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+       return 0;
+}
+
+/*
+ * Create a new iext tree and load it with block mappings.  If the inode is
+ * in extents format, that's all we need to do to commit the new mappings.
+ * If it is in btree format, this takes care of preloading the incore tree.
+ */
+STATIC int
+xrep_bmap_extents_load(
+       struct xrep_bmap        *rb,
+       struct xfs_btree_cur    *bmap_cur,
+       uint64_t                nextents)
+{
+       struct xfs_iext_cursor  icur;
+       struct xbtree_ifakeroot *ifake = &rb->new_fork_info.ifake;
+       struct xfs_ifork        *ifp = ifake->if_fork;
+       unsigned int            i;
+       int                     error;
+
+       ASSERT(ifp->if_bytes == 0);
+
+       error = init_slab_cursor(rb->bmap_records, xrep_bmap_extent_cmp,
+                       &rb->bmap_cursor);
+       if (error)
+               return error;
+
+       /* Add all the mappings to the incore extent tree. */
+       libxfs_iext_first(ifp, &icur);
+       for (i = 0; i < nextents; i++) {
+               struct xfs_bmbt_rec     *rec;
+
+               rec = pop_slab_cursor(rb->bmap_cursor);
+               libxfs_bmbt_disk_get_all(rec, &bmap_cur->bc_rec.b);
+               libxfs_iext_insert_raw(ifp, &icur, &bmap_cur->bc_rec.b);
+               ifp->if_nextents++;
+               libxfs_iext_next(ifp, &icur);
+       }
+       free_slab_cursor(&rb->bmap_cursor);
+
+       return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
+                       ifp->if_nextents);
+}
+
+/*
+ * Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
+ * and load the incore extent tree.
+ */
+STATIC int
+xrep_bmap_btree_load(
+       struct xrep_bmap        *rb,
+       struct xfs_btree_cur    *bmap_cur,
+       uint64_t                nextents)
+{
+       struct repair_ctx       *sc = rb->sc;
+       int                     error;
+
+       rb->bmap_bload.get_records = xrep_bmap_get_records;
+       rb->bmap_bload.claim_block = xrep_bmap_claim_block;
+       rb->bmap_bload.iroot_size = xrep_bmap_iroot_size;
+       rb->bmap_bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
+
+       /*
+        * Always make the btree as small as possible, since we might need the
+        * space to rebuild the space metadata btrees in later phases.
+        */
+       rb->bmap_bload.leaf_slack = 0;
+       rb->bmap_bload.node_slack = 0;
+
+       /* Compute how many blocks we'll need. */
+       error = -libxfs_btree_bload_compute_geometry(bmap_cur, &rb->bmap_bload,
+                       nextents);
+       if (error)
+               return error;
+
+       /*
+        * Guess how many blocks we're going to need to rebuild an entire bmap
+        * from the number of extents we found, and pump up our transaction to
+        * have sufficient block reservation.
+        */
+       error = -libxfs_trans_reserve_more(sc->tp, rb->bmap_bload.nr_blocks, 0);
+       if (error)
+               return error;
+
+       /* Reserve the space we'll need for the new btree. */
+       error = bulkload_alloc_file_blocks(&rb->new_fork_info,
+                       rb->bmap_bload.nr_blocks);
+       if (error)
+               return error;
+
+       /* Add all observed bmap records. */
+       error = init_slab_cursor(rb->bmap_records, xrep_bmap_extent_cmp,
+                       &rb->bmap_cursor);
+       if (error)
+               return error;
+       error = -libxfs_btree_bload(bmap_cur, &rb->bmap_bload, rb);
+       free_slab_cursor(&rb->bmap_cursor);
+       if (error)
+              return error;
+
+       /*
+        * Load the new bmap records into the new incore extent tree to
+        * preserve delalloc reservations for regular files.  The directory
+        * code loads the extent tree during xfs_dir_open and assumes
+        * thereafter that it remains loaded, so we must not violate that
+        * assumption.
+        */
+       return xrep_bmap_extents_load(rb, bmap_cur, nextents);
+}
+
+/*
+ * Use the collected bmap information to stage a new bmap fork.  If this is
+ * successful we'll return with the new fork information logged to the repair
+ * transaction but not yet committed.
+ */
+STATIC int
+xrep_bmap_build_new_fork(
+       struct xrep_bmap        *rb)
+{
+       struct xfs_owner_info   oinfo;
+       struct repair_ctx       *sc = rb->sc;
+       struct xfs_btree_cur    *bmap_cur;
+       struct xbtree_ifakeroot *ifake = &rb->new_fork_info.ifake;
+       uint64_t                nextents;
+       int                     error;
+
+       /*
+        * Sort the bmap extents by startblock to avoid btree splits when we
+        * rebuild the bmbt btree.
+        */
+       qsort_slab(rb->bmap_records, xrep_bmap_extent_cmp);
+
+       /*
+        * Prepare to construct the new fork by initializing the new btree
+        * structure and creating a fake ifork in the ifakeroot structure.
+        */
+       libxfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
+       bulkload_init_inode(&rb->new_fork_info, sc, rb->whichfork, &oinfo);
+       bmap_cur = libxfs_bmbt_stage_cursor(sc->mp, sc->ip, ifake);
+
+       /*
+        * Figure out the size and format of the new fork, then fill it with
+        * all the bmap records we've found.  Join the inode to the transaction
+        * so that we can roll the transaction while holding the inode locked.
+        */
+       libxfs_trans_ijoin(sc->tp, sc->ip, 0);
+       nextents = slab_count(rb->bmap_records);
+       if (nextents <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
+               ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
+               error = xrep_bmap_extents_load(rb, bmap_cur, nextents);
+       } else {
+               ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
+               error = xrep_bmap_btree_load(rb, bmap_cur, nextents);
+       }
+       if (error)
+               goto err_cur;
+
+       /*
+        * Install the new fork in the inode.  After this point the old mapping
+        * data are no longer accessible and the new tree is live.  We delete
+        * the cursor immediately after committing the staged root because the
+        * staged fork might be in extents format.
+        */
+       libxfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
+       libxfs_btree_del_cursor(bmap_cur, 0);
+
+       /* Reset the inode counters now that we've changed the fork. */
+       error = xrep_bmap_reset_counters(rb);
+       if (error)
+               goto err_newbt;
+
+       /* Dispose of any unused blocks and the accounting infomation. */
+       error = bulkload_commit(&rb->new_fork_info);
+       if (error)
+               return error;
+
+       return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+err_cur:
+       if (bmap_cur)
+               libxfs_btree_del_cursor(bmap_cur, error);
+err_newbt:
+       bulkload_cancel(&rb->new_fork_info);
+       return error;
+}
+
+/* Check for garbage inputs.  Returns ECANCELED if there's nothing to do. */
+STATIC int
+xrep_bmap_check_inputs(
+       struct repair_ctx       *sc,
+       int                     whichfork)
+{
+       struct xfs_ifork        *ifp = xfs_ifork_ptr(sc->ip, whichfork);
+
+       ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
+
+       if (!xfs_has_rmapbt(sc->mp))
+               return EOPNOTSUPP;
+
+       /* No fork means nothing to rebuild. */
+       if (!ifp)
+               return ECANCELED;
+
+       /*
+        * We only know how to repair extent mappings, which is to say that we
+        * only support extents and btree fork format.  Repairs to a local
+        * format fork require a higher level repair function, so we do not
+        * have any work to do here.
+        */
+       switch (ifp->if_format) {
+       case XFS_DINODE_FMT_DEV:
+       case XFS_DINODE_FMT_LOCAL:
+       case XFS_DINODE_FMT_UUID:
+               return ECANCELED;
+       case XFS_DINODE_FMT_EXTENTS:
+       case XFS_DINODE_FMT_BTREE:
+               break;
+       default:
+               return EFSCORRUPTED;
+       }
+
+       if (whichfork == XFS_ATTR_FORK)
+               return 0;
+
+       /* Only files, symlinks, and directories get to have data forks. */
+       switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
+       case S_IFREG:
+       case S_IFDIR:
+       case S_IFLNK:
+               /* ok */
+               break;
+       default:
+               return EINVAL;
+       }
+
+       /* Don't know how to rebuild realtime data forks. */
+       if (XFS_IS_REALTIME_INODE(sc->ip))
+               return EOPNOTSUPP;
+
+       return 0;
+}
+
+/* Repair an inode fork. */
+STATIC int
+xrep_bmap(
+       struct repair_ctx       *sc,
+       int                     whichfork)
+{
+       struct xrep_bmap        *rb;
+       int                     error = 0;
+
+       error = xrep_bmap_check_inputs(sc, whichfork);
+       if (error == ECANCELED)
+               return 0;
+       if (error)
+               return error;
+
+       rb = kmem_zalloc(sizeof(struct xrep_bmap), KM_NOFS | KM_MAYFAIL);
+       if (!rb)
+               return ENOMEM;
+       rb->sc = sc;
+       rb->whichfork = whichfork;
+
+       /* Set up some storage */
+       error = init_slab(&rb->bmap_records, sizeof(struct xfs_bmbt_rec));
+       if (error)
+               goto out_rb;
+
+       /* Collect all reverse mappings for this fork's extents. */
+       error = xrep_bmap_find_mappings(rb);
+       if (error)
+               goto out_bitmap;
+
+       /* Rebuild the bmap information. */
+       error = xrep_bmap_build_new_fork(rb);
+
+       /*
+        * We don't need to free the old bmbt blocks because we're rebuilding
+        * all the space metadata later.
+        */
+
+out_bitmap:
+       free_slab(&rb->bmap_records);
+out_rb:
+       kmem_free(rb);
+       return error;
+}
+
+/* Rebuild some inode's bmap. */
+int
+rebuild_bmap(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino,
+       int                     whichfork,
+       unsigned long           nr_extents,
+       struct xfs_buf          **ino_bpp,
+       struct xfs_dinode       **dinop,
+       int                     *dirty)
+{
+       struct repair_ctx       sc = {
+               .mp             = mp,
+       };
+       const struct xfs_buf_ops *bp_ops;
+       unsigned long           boffset;
+       unsigned long long      resblks;
+       xfs_daddr_t             bp_bn;
+       int                     bp_length;
+       int                     error, err2;
+
+       bp_bn = xfs_buf_daddr(*ino_bpp);
+       bp_length = (*ino_bpp)->b_length;
+       bp_ops = (*ino_bpp)->b_ops;
+       boffset = (char *)(*dinop) - (char *)(*ino_bpp)->b_addr;
+
+       /*
+        * Bail out if the inode didn't think it had extents.  Otherwise, zap
+        * it back to a zero-extents fork so that we can rebuild it.
+        */
+       switch (whichfork) {
+       case XFS_DATA_FORK:
+               if ((*dinop)->di_nextents == 0)
+                       return 0;
+               (*dinop)->di_format = XFS_DINODE_FMT_EXTENTS;
+               (*dinop)->di_nextents = 0;
+               libxfs_dinode_calc_crc(mp, *dinop);
+               *dirty = 1;
+               break;
+       case XFS_ATTR_FORK:
+               if ((*dinop)->di_anextents == 0)
+                       return 0;
+               (*dinop)->di_aformat = XFS_DINODE_FMT_EXTENTS;
+               (*dinop)->di_anextents = 0;
+               libxfs_dinode_calc_crc(mp, *dinop);
+               *dirty = 1;
+               break;
+       default:
+               return EINVAL;
+       }
+
+       resblks = libxfs_bmbt_calc_size(mp, nr_extents);
+       error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, 0,
+                       0, &sc.tp);
+       if (error)
+               return error;
+
+       /*
+        * Repair magic: the caller passed us the inode cluster buffer for the
+        * inode.  The _iget call grabs the buffer to load the incore inode, so
+        * the buffer must be attached to the transaction to avoid recursing
+        * the buffer lock.
+        *
+        * Unfortunately, the _iget call drops the buffer once the inode is
+        * loaded, so if we've made any changes we have to log the buffer, hold
+        * it, and roll the transaction.  This persists the caller's changes
+        * and maintains our ownership of the cluster buffer.
+        */
+       libxfs_trans_bjoin(sc.tp, *ino_bpp);
+       if (*dirty) {
+               unsigned int    end = BBTOB((*ino_bpp)->b_length) - 1;
+
+               libxfs_trans_log_buf(sc.tp, *ino_bpp, 0, end);
+               *dirty = 0;
+
+               libxfs_trans_bhold(sc.tp, *ino_bpp);
+               error = -libxfs_trans_roll(&sc.tp);
+               libxfs_trans_bjoin(sc.tp, *ino_bpp);
+               if (error)
+                       goto out_cancel;
+       }
+
+       /* Grab the inode and fix the bmbt. */
+       error = -libxfs_iget(mp, sc.tp, ino, 0, &sc.ip);
+       if (error)
+               goto out_cancel;
+       error = xrep_bmap(&sc, whichfork);
+       if (error)
+               libxfs_trans_cancel(sc.tp);
+       else
+               error = -libxfs_trans_commit(sc.tp);
+
+       /*
+        * Rebuilding the inode fork rolled the transaction, so we need to
+        * re-grab the inode cluster buffer and dinode pointer for the caller.
+        */
+       err2 = -libxfs_imap_to_bp(mp, NULL, &sc.ip->i_imap, ino_bpp);
+       if (err2)
+               do_error(
+ _("Unable to re-grab inode cluster buffer after failed repair of inode %llu, error %d.\n"),
+                               (unsigned long long)ino, err2);
+       *dinop = xfs_buf_offset(*ino_bpp, sc.ip->i_imap.im_boffset);
+       libxfs_irele(sc.ip);
+
+       return error;
+
+out_cancel:
+       libxfs_trans_cancel(sc.tp);
+
+       /*
+        * Try to regrab the old buffer so we have something to return to the
+        * caller.
+        */
+       err2 = -libxfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, bp_bn,
+                       bp_length, 0, ino_bpp, bp_ops);
+       if (err2)
+               do_error(
+ _("Unable to re-grab inode cluster buffer after failed repair of inode %llu, error %d.\n"),
+                               (unsigned long long)ino, err2);
+       *dinop = xfs_buf_offset(*ino_bpp, boffset);
+       return error;
+}
diff --git a/repair/bmap_repair.h b/repair/bmap_repair.h

new file mode 100644 (file)

index 0000000..6d55359
--- /dev/null
+++ b/repair/bmap_repair.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2019-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef REBUILD_H_
+#define REBUILD_H_
+
+int rebuild_bmap(struct xfs_mount *mp, xfs_ino_t ino, int whichfork,
+                unsigned long nr_extents, struct xfs_buf **ino_bpp,
+                struct xfs_dinode **dinop, int *dirty);
+
+#endif /* REBUILD_H_ */
diff --git a/repair/bulkload.c b/repair/bulkload.c

index 18158c397f56937090f32b95a81ac6cac6f2e70f..a97839f549ddae37910bffe5af5cc2e3753f7ffe 100644 (file)
--- a/repair/bulkload.c
+++ b/repair/bulkload.c
@@ -14,14 +14,29 @@ void
  bulkload_init_ag(
         struct bulkload                 *bkl,
         struct repair_ctx               *sc,
-       const struct xfs_owner_info     *oinfo)
+       const struct xfs_owner_info     *oinfo,
+       xfs_fsblock_t                   alloc_hint)
  {
         memset(bkl, 0, sizeof(struct bulkload));
         bkl->sc = sc;
         bkl->oinfo = *oinfo; /* structure copy */
+       bkl->alloc_hint = alloc_hint;
         INIT_LIST_HEAD(&bkl->resv_list);
  }
  
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+bulkload_init_inode(
+       struct bulkload                 *bkl,
+       struct repair_ctx               *sc,
+       int                             whichfork,
+       const struct xfs_owner_info     *oinfo)
+{
+       bulkload_init_ag(bkl, sc, oinfo, XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
+       bkl->ifake.if_fork = kmem_cache_zalloc(xfs_ifork_cache, 0);
+       bkl->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
+}
+
  /* Designate specific blocks to be used to build our new btree. */
  static int
  bulkload_add_blocks(
@@ -71,17 +86,199 @@ bulkload_add_extent(
         return bulkload_add_blocks(bkl, pag, &args);
  }
  
-/* Free all the accounting info and disk space we reserved for a new btree. */
-void
-bulkload_commit(
+/* Don't let our allocation hint take us beyond EOFS */
+static inline void
+bulkload_validate_file_alloc_hint(
         struct bulkload         *bkl)
  {
+       struct repair_ctx       *sc = bkl->sc;
+
+       if (libxfs_verify_fsbno(sc->mp, bkl->alloc_hint))
+               return;
+
+       bkl->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
+}
+
+/* Allocate disk space for our new file-based btree. */
+int
+bulkload_alloc_file_blocks(
+       struct bulkload         *bkl,
+       uint64_t                nr_blocks)
+{
+       struct repair_ctx       *sc = bkl->sc;
+       struct xfs_mount        *mp = sc->mp;
+       int                     error = 0;
+
+       while (nr_blocks > 0) {
+               struct xfs_alloc_arg    args = {
+                       .tp             = sc->tp,
+                       .mp             = mp,
+                       .oinfo          = bkl->oinfo,
+                       .minlen         = 1,
+                       .maxlen         = nr_blocks,
+                       .prod           = 1,
+                       .resv           = XFS_AG_RESV_NONE,
+               };
+               struct xfs_perag        *pag;
+               xfs_agnumber_t          agno;
+
+               bulkload_validate_file_alloc_hint(bkl);
+
+               error = -libxfs_alloc_vextent_start_ag(&args, bkl->alloc_hint);
+               if (error)
+                       return error;
+               if (args.fsbno == NULLFSBLOCK)
+                       return ENOSPC;
+
+               agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
+
+               pag = libxfs_perag_get(mp, agno);
+               if (!pag) {
+                       ASSERT(0);
+                       return -EFSCORRUPTED;
+               }
+
+               error = bulkload_add_blocks(bkl, pag, &args);
+               libxfs_perag_put(pag);
+               if (error)
+                       return error;
+
+               nr_blocks -= args.len;
+               bkl->alloc_hint = args.fsbno + args.len;
+
+               error = -libxfs_defer_finish(&sc->tp);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+/*
+ * Free the unused part of a space extent that was reserved for a new ondisk
+ * structure.  Returns the number of EFIs logged or a negative errno.
+ */
+static inline int
+bulkload_free_extent(
+       struct bulkload         *bkl,
+       struct bulkload_resv    *resv,
+       bool                    btree_committed)
+{
+       struct repair_ctx       *sc = bkl->sc;
+       xfs_agblock_t           free_agbno = resv->agbno;
+       xfs_extlen_t            free_aglen = resv->len;
+       xfs_fsblock_t           fsbno;
+       int                     error;
+
+       if (!btree_committed || resv->used == 0) {
+               /*
+                * If we're not committing a new btree or we didn't use the
+                * space reservation, free the entire space extent.
+                */
+               goto free;
+       }
+
+       /*
+        * We used space and committed the btree.  Remove the written blocks
+        * from the reservation and possibly log a new EFI to free any unused
+        * reservation space.
+        */
+       free_agbno += resv->used;
+       free_aglen -= resv->used;
+
+       if (free_aglen == 0)
+               return 0;
+
+free:
+       /*
+        * Use EFIs to free the reservations.  We don't need to use EFIs here
+        * like the kernel, but we'll do it to keep the code matched.
+        */
+       fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
+       error = -libxfs_free_extent_later(sc->tp, fsbno, free_aglen,
+                       &bkl->oinfo, XFS_AG_RESV_NONE, true);
+       if (error)
+               return error;
+
+       return 1;
+}
+
+/* Free all the accounting info and disk space we reserved for a new btree. */
+static int
+bulkload_free(
+       struct bulkload         *bkl,
+       bool                    btree_committed)
+{
+       struct repair_ctx       *sc = bkl->sc;
         struct bulkload_resv    *resv, *n;
+       unsigned int            freed = 0;
+       int                     error = 0;
+
+       list_for_each_entry_safe(resv, n, &bkl->resv_list, list) {
+               int             ret;
+
+               ret = bulkload_free_extent(bkl, resv, btree_committed);
+               list_del(&resv->list);
+               libxfs_perag_put(resv->pag);
+               kfree(resv);
+
+               if (ret < 0) {
+                       error = ret;
+                       goto junkit;
+               }
+
+               freed += ret;
+               if (freed >= XREP_MAX_ITRUNCATE_EFIS) {
+                       error = -libxfs_defer_finish(&sc->tp);
+                       if (error)
+                               goto junkit;
+                       freed = 0;
+               }
+       }
  
+       if (freed)
+               error = -libxfs_defer_finish(&sc->tp);
+junkit:
+       /*
+        * If we still have reservations attached to @newbt, cleanup must have
+        * failed and the filesystem is about to go down.  Clean up the incore
+        * reservations.
+        */
         list_for_each_entry_safe(resv, n, &bkl->resv_list, list) {
                 list_del(&resv->list);
+               libxfs_perag_put(resv->pag);
                 kfree(resv);
         }
+
+       if (sc->ip) {
+               kmem_cache_free(xfs_ifork_cache, bkl->ifake.if_fork);
+               bkl->ifake.if_fork = NULL;
+       }
+
+       return error;
+}
+
+/*
+ * Free all the accounting info and unused disk space allocations after
+ * committing a new btree.
+ */
+int
+bulkload_commit(
+       struct bulkload         *bkl)
+{
+       return bulkload_free(bkl, true);
+}
+
+/*
+ * Free all the accounting info and all of the disk space we reserved for a new
+ * btree that we're not going to commit.  We want to try to roll things back
+ * cleanly for things like ENOSPC midway through allocation.
+ */
+void
+bulkload_cancel(
+       struct bulkload         *bkl)
+{
+       bulkload_free(bkl, false);
  }
  
  /* Feed one of the reserved btree blocks to the bulk loader. */
diff --git a/repair/bulkload.h b/repair/bulkload.h

index f4790e3b3de6504605ec58b6802e38815920c6a6..a88aafaa678a3a629f9f5322b0669790ea3d2430 100644 (file)
--- a/repair/bulkload.h
+++ b/repair/bulkload.h
@@ -8,9 +8,17 @@
  
  extern int bload_leaf_slack;
  extern int bload_node_slack;
+/*
+ * This is the maximum number of deferred extent freeing item extents (EFIs)
+ * that we'll attach to a transaction without rolling the transaction to avoid
+ * overrunning a tr_itruncate reservation.
+ */
+#define XREP_MAX_ITRUNCATE_EFIS        (128)
  
  struct repair_ctx {
         struct xfs_mount        *mp;
+       struct xfs_inode        *ip;
+       struct xfs_trans        *tp;
  };
  
  struct bulkload_resv {
@@ -36,7 +44,10 @@ struct bulkload {
         struct list_head        resv_list;
  
         /* Fake root for new btree. */
-       struct xbtree_afakeroot afake;
+       union {
+               struct xbtree_afakeroot afake;
+               struct xbtree_ifakeroot ifake;
+       };
  
         /* rmap owner of these blocks */
         struct xfs_owner_info   oinfo;
@@ -44,6 +55,9 @@ struct bulkload {
         /* The last reservation we allocated from. */
         struct bulkload_resv    *last_resv;
  
+       /* Hint as to where we should allocate blocks. */
+       xfs_fsblock_t           alloc_hint;
+
         /* Number of blocks reserved via resv_list. */
         unsigned int            nr_reserved;
  };
@@ -52,12 +66,16 @@ struct bulkload {
         list_for_each_entry_safe((resv), (n), &(bkl)->resv_list, list)
  
  void bulkload_init_ag(struct bulkload *bkl, struct repair_ctx *sc,
-               const struct xfs_owner_info *oinfo);
+               const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint);
+void bulkload_init_inode(struct bulkload *bkl, struct repair_ctx *sc,
+               int whichfork, const struct xfs_owner_info *oinfo);
  int bulkload_claim_block(struct xfs_btree_cur *cur, struct bulkload *bkl,
                 union xfs_btree_ptr *ptr);
  int bulkload_add_extent(struct bulkload *bkl, struct xfs_perag *pag,
                 xfs_agblock_t agbno, xfs_extlen_t len);
-void bulkload_commit(struct bulkload *bkl);
+int bulkload_alloc_file_blocks(struct bulkload *bkl, uint64_t nr_blocks);
+void bulkload_cancel(struct bulkload *bkl);
+int bulkload_commit(struct bulkload *bkl);
  void bulkload_estimate_ag_slack(struct repair_ctx *sc,
                 struct xfs_btree_bload *bload, unsigned int free);
  
diff --git a/repair/dinode.c b/repair/dinode.c

index a18af3ff77722bbe51e64ae76f4e87a4a08bee25..b8f5bf4e550e282ca27eb8f3795c3204bb1b3bb3 100644 (file)
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -20,6 +20,7 @@
  #include "threads.h"
  #include "slab.h"
  #include "rmap.h"
+#include "bmap_repair.h"
  
  /*
   * gettext lookups for translations of strings use mutexes internally to
@@ -1909,7 +1910,9 @@ process_inode_data_fork(
         xfs_ino_t               lino = XFS_AGINO_TO_INO(mp, agno, ino);
         int                     err = 0;
         xfs_extnum_t            nex, max_nex;
+       int                     try_rebuild = -1; /* don't know yet */
  
+retry:
         /*
          * extent count on disk is only valid for positive values. The kernel
          * uses negative values in memory. hence if we see negative numbers
@@ -1938,11 +1941,15 @@ process_inode_data_fork(
                 *totblocks = 0;
                 break;
         case XFS_DINODE_FMT_EXTENTS:
+               if (!rmapbt_suspect && try_rebuild == -1)
+                       try_rebuild = 1;
                 err = process_exinode(mp, agno, ino, dino, type, dirty,
                         totblocks, nextents, dblkmap, XFS_DATA_FORK,
                         check_dups);
                 break;
         case XFS_DINODE_FMT_BTREE:
+               if (!rmapbt_suspect && try_rebuild == -1)
+                       try_rebuild = 1;
                 err = process_btinode(mp, agno, ino, dino, type, dirty,
                         totblocks, nextents, dblkmap, XFS_DATA_FORK,
                         check_dups);
@@ -1958,8 +1965,28 @@ process_inode_data_fork(
         if (err)  {
                 do_warn(_("bad data fork in inode %" PRIu64 "\n"), lino);
                 if (!no_modify)  {
+                       if (try_rebuild == 1) {
+                               do_warn(
+_("rebuilding inode %"PRIu64" data fork\n"),
+                                       lino);
+                               try_rebuild = 0;
+                               err = rebuild_bmap(mp, lino, XFS_DATA_FORK,
+                                               be32_to_cpu(dino->di_nextents),
+                                               ino_bpp, dinop, dirty);
+                               dino = *dinop;
+                               if (!err)
+                                       goto retry;
+                               do_warn(
+_("inode %"PRIu64" data fork rebuild failed, error %d, clearing\n"),
+                                       lino, err);
+                       }
                         clear_dinode(mp, dino, lino);
                         *dirty += 1;
+                       ASSERT(*dirty > 0);
+               } else if (try_rebuild == 1) {
+                       do_warn(
+_("would have tried to rebuild inode %"PRIu64" data fork\n"),
+                                       lino);
                 }
                 return 1;
         }
@@ -2025,7 +2052,9 @@ process_inode_attr_fork(
         struct blkmap           *ablkmap = NULL;
         int                     repair = 0;
         int                     err;
+       int                     try_rebuild = -1; /* don't know yet */
  
+retry:
         if (!dino->di_forkoff) {
                 *anextents = 0;
                 if (dino->di_aformat != XFS_DINODE_FMT_EXTENTS) {
@@ -2052,6 +2081,8 @@ process_inode_attr_fork(
                 err = process_lclinode(mp, agno, ino, dino, XFS_ATTR_FORK);
                 break;
         case XFS_DINODE_FMT_EXTENTS:
+               if (!rmapbt_suspect && try_rebuild == -1)
+                       try_rebuild = 1;
                 ablkmap = blkmap_alloc(*anextents, XFS_ATTR_FORK);
                 *anextents = 0;
                 err = process_exinode(mp, agno, ino, dino, type, dirty,
@@ -2059,6 +2090,8 @@ process_inode_attr_fork(
                                 XFS_ATTR_FORK, check_dups);
                 break;
         case XFS_DINODE_FMT_BTREE:
+               if (!rmapbt_suspect && try_rebuild == -1)
+                       try_rebuild = 1;
                 ablkmap = blkmap_alloc(*anextents, XFS_ATTR_FORK);
                 *anextents = 0;
                 err = process_btinode(mp, agno, ino, dino, type, dirty,
@@ -2084,10 +2117,29 @@ process_inode_attr_fork(
                 do_warn(_("bad attribute fork in inode %" PRIu64 "\n"), lino);
  
                 if (!no_modify)  {
+                       if (try_rebuild == 1) {
+                               do_warn(
+_("rebuilding inode %"PRIu64" attr fork\n"),
+                                       lino);
+                               try_rebuild = 0;
+                               err = rebuild_bmap(mp, lino, XFS_ATTR_FORK,
+                                               be16_to_cpu(dino->di_anextents),
+                                               ino_bpp, dinop, dirty);
+                               dino = *dinop;
+                               if (!err)
+                                       goto retry;
+                               do_warn(
+_("inode %"PRIu64" attr fork rebuild failed, error %d"),
+                                       lino, err);
+                       }
                         do_warn(_(", clearing attr fork\n"));
                         *dirty += clear_dinode_attr(mp, dino, lino);
                         ASSERT(*dirty > 0);
-               } else  {
+               } else if (try_rebuild) {
+                       do_warn(
+_("would have tried to rebuild inode %"PRIu64" attr fork or cleared it\n"),
+                                       lino);
+               } else {
                         do_warn(_(", would clear attr fork\n"));
                 }
  
diff --git a/repair/rmap.c b/repair/rmap.c

index 6bb77e08249240b80672870f7cd9721c4dbe38d7..a2291c7b3b015d9c5d77f8b25373c5ced1ca55a6 100644 (file)
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -33,7 +33,7 @@ struct xfs_ag_rmap {
  };
  
  static struct xfs_ag_rmap *ag_rmaps;
-static bool rmapbt_suspect;
+bool rmapbt_suspect;
  static bool refcbt_suspect;
  
  static inline int rmap_compare(const void *a, const void *b)
diff --git a/repair/rmap.h b/repair/rmap.h

index 6004e9f68b631e231cc5d2b457bfde7569bec947..1dad2f5890a41762bca58c8b9e1a7e6da32c8fbb 100644 (file)
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -7,6 +7,7 @@
  #define RMAP_H_
  
  extern bool collect_rmaps;
+extern bool rmapbt_suspect;
  
  extern bool rmap_needs_work(struct xfs_mount *);
author	Darrick J. Wong <djwong@kernel.org>
	Mon, 15 Apr 2024 23:07:49 +0000 (16:07 -0700)
committer	Darrick J. Wong <djwong@kernel.org>
	Wed, 17 Apr 2024 21:06:28 +0000 (14:06 -0700)
include/xfs_trans.h		patch \| blob \| blame \| history
libfrog/util.h		patch \| blob \| blame \| history
libxfs/libxfs_api_defs.h		patch \| blob \| blame \| history
libxfs/trans.c		patch \| blob \| blame \| history
repair/Makefile		patch \| blob \| blame \| history
repair/agbtree.c		patch \| blob \| blame \| history
repair/bmap_repair.c	[new file with mode: 0644]	patch \| blob
repair/bmap_repair.h	[new file with mode: 0644]	patch \| blob
repair/bulkload.c		patch \| blob \| blame \| history
repair/bulkload.h		patch \| blob \| blame \| history
repair/dinode.c		patch \| blob \| blame \| history
repair/rmap.c		patch \| blob \| blame \| history
repair/rmap.h		patch \| blob \| blame \| history