]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_repair: check existing rmapbt entries against observed rmaps
authorDarrick J. Wong <darrick.wong@oracle.com>
Fri, 19 Aug 2016 00:33:49 +0000 (10:33 +1000)
committerDave Chinner <david@fromorbit.com>
Fri, 19 Aug 2016 00:33:49 +0000 (10:33 +1000)
Once we've finished collecting reverse mapping observations from the
metadata scan, check those observations against the rmap btree
(particularly if we're in -n mode) to detect rmapbt problems.

[dchinner: libxfs'ify the various libxfs calls. ]

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
libxfs/libxfs_api_defs.h
repair/phase4.c
repair/rmap.c
repair/rmap.h
repair/scan.c

index 337363d7f27de6689539c4cbd2b81c16b14e8917..bd78819115b001840dbaa8b83bdb8a4d6a4a7201 100644 (file)
@@ -63,6 +63,7 @@
 #define xfs_attr_leaf_newentsize       libxfs_attr_leaf_newentsize
 
 #define xfs_alloc_fix_freelist         libxfs_alloc_fix_freelist
+#define xfs_alloc_read_agf             libxfs_alloc_read_agf
 #define xfs_bmap_last_offset           libxfs_bmap_last_offset
 #define xfs_bmap_search_extents                libxfs_bmap_search_extents
 #define xfs_bmapi_write                        libxfs_bmapi_write
 #define xfs_idestroy_fork              libxfs_idestroy_fork
 
 #define xfs_rmap_query_range           libxfs_rmap_query_range
+#define xfs_rmap_lookup_le             libxfs_rmap_lookup_le
+#define xfs_rmap_get_rec               libxfs_rmap_get_rec
+#define xfs_rmap_irec_offset_unpack    libxfs_rmap_irec_offset_unpack
+#define xfs_rmapbt_init_cursor         libxfs_rmapbt_init_cursor
+#define xfs_btree_del_cursor           libxfs_btree_del_cursor
+
 
 #define xfs_log_sb                     libxfs_log_sb
 #define xfs_sb_from_disk               libxfs_sb_from_disk
index 8880c91400cfccc48fdbc459119542991ace6fe5..e234d9283e55909dca8f069b03c330e26be6fe38 100644 (file)
@@ -174,6 +174,12 @@ _("unable to add AG %u metadata reverse-mapping data.\n"), agno);
        if (error)
                do_error(
 _("unable to merge AG %u metadata reverse-mapping data.\n"), agno);
+
+       error = check_rmaps(wq->mp, agno);
+       if (error)
+               do_error(
+_("%s while checking reverse-mappings"),
+                        strerror(-error));
 }
 
 static void
index 8f532fb6eb6b23fa736bc05050fb207df062a236..226ca9d1213c2c521c09eadbf0255c22705a06ec 100644 (file)
@@ -42,6 +42,7 @@ struct xfs_ag_rmap {
 };
 
 static struct xfs_ag_rmap *ag_rmaps;
+static bool rmapbt_suspect;
 
 /*
  * Compare rmap observations for array sorting.
@@ -442,3 +443,258 @@ dump_rmap(
 #else
 # define dump_rmap(m, a, r)
 #endif
+
+/*
+ * Return the number of rmap objects for an AG.
+ */
+size_t
+rmap_record_count(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t          agno)
+{
+       return slab_count(ag_rmaps[agno].ar_rmaps);
+}
+
+/*
+ * Return a slab cursor that will return rmap objects in order.
+ */
+int
+init_rmap_cursor(
+       xfs_agnumber_t          agno,
+       struct xfs_slab_cursor  **cur)
+{
+       return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
+}
+
+/*
+ * Disable the refcount btree check.
+ */
+void
+rmap_avoid_check(void)
+{
+       rmapbt_suspect = true;
+}
+
+/* Look for an rmap in the rmapbt that matches a given rmap. */
+static int
+lookup_rmap(
+       struct xfs_btree_cur    *bt_cur,
+       struct xfs_rmap_irec    *rm_rec,
+       struct xfs_rmap_irec    *tmp,
+       int                     *have)
+{
+       int                     error;
+
+       /* Use the regular btree retrieval routine. */
+       error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
+                               rm_rec->rm_blockcount,
+                               rm_rec->rm_owner, rm_rec->rm_offset,
+                               rm_rec->rm_flags, have);
+       if (error)
+               return error;
+       if (*have == 0)
+               return error;
+       return -libxfs_rmap_get_rec(bt_cur, tmp, have);
+}
+
+/* Does the btree rmap cover the observed rmap? */
+#define NEXTP(x)       ((x)->rm_startblock + (x)->rm_blockcount)
+#define NEXTL(x)       ((x)->rm_offset + (x)->rm_blockcount)
+static bool
+is_good_rmap(
+       struct xfs_rmap_irec    *observed,
+       struct xfs_rmap_irec    *btree)
+{
+       /* Can't have mismatches in the flags or the owner. */
+       if (btree->rm_flags != observed->rm_flags ||
+           btree->rm_owner != observed->rm_owner)
+               return false;
+
+       /*
+        * Btree record can't physically start after the observed
+        * record, nor can it end before the observed record.
+        */
+       if (btree->rm_startblock > observed->rm_startblock ||
+           NEXTP(btree) < NEXTP(observed))
+               return false;
+
+       /* If this is metadata or bmbt, we're done. */
+       if (XFS_RMAP_NON_INODE_OWNER(observed->rm_owner) ||
+           (observed->rm_flags & XFS_RMAP_BMBT_BLOCK))
+               return true;
+       /*
+        * Btree record can't logically start after the observed
+        * record, nor can it end before the observed record.
+        */
+       if (btree->rm_offset > observed->rm_offset ||
+           NEXTL(btree) < NEXTL(observed))
+               return false;
+
+       return true;
+}
+#undef NEXTP
+#undef NEXTL
+
+/*
+ * Compare the observed reverse mappings against what's in the ag btree.
+ */
+int
+check_rmaps(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno)
+{
+       struct xfs_slab_cursor  *rm_cur;
+       struct xfs_btree_cur    *bt_cur = NULL;
+       int                     error;
+       int                     have;
+       struct xfs_buf          *agbp = NULL;
+       struct xfs_rmap_irec    *rm_rec;
+       struct xfs_rmap_irec    tmp;
+       struct xfs_perag        *pag;           /* per allocation group data */
+
+       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return 0;
+       if (rmapbt_suspect) {
+               if (no_modify && agno == 0)
+                       do_warn(_("would rebuild corrupt rmap btrees.\n"));
+               return 0;
+       }
+
+       /* Create cursors to refcount structures */
+       error = init_rmap_cursor(agno, &rm_cur);
+       if (error)
+               return error;
+
+       error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       if (error)
+               goto err;
+
+       /* Leave the per-ag data "uninitialized" since we rewrite it later */
+       pag = xfs_perag_get(mp, agno);
+       pag->pagf_init = 0;
+       xfs_perag_put(pag);
+
+       bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
+       if (!bt_cur) {
+               error = -ENOMEM;
+               goto err;
+       }
+
+       rm_rec = pop_slab_cursor(rm_cur);
+       while (rm_rec) {
+               error = lookup_rmap(bt_cur, rm_rec, &tmp, &have);
+               if (error)
+                       goto err;
+               if (!have) {
+                       do_warn(
+_("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
+%s%soff %"PRIu64"\n"),
+                               agno, rm_rec->rm_startblock,
+                               (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
+                                       _("unwritten ") : "",
+                               rm_rec->rm_blockcount,
+                               rm_rec->rm_owner,
+                               (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
+                                       _("attr ") : "",
+                               (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
+                                       _("bmbt ") : "",
+                               rm_rec->rm_offset);
+                       goto next_loop;
+               }
+
+               /* Compare each refcount observation against the btree's */
+               if (!is_good_rmap(rm_rec, &tmp)) {
+                       do_warn(
+_("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRId64" %s%soff \
+%"PRIu64"; should be (%u/%u) %slen %u owner %"PRId64" %s%soff %"PRIu64"\n"),
+                               agno, tmp.rm_startblock,
+                               (tmp.rm_flags & XFS_RMAP_UNWRITTEN) ?
+                                       _("unwritten ") : "",
+                               tmp.rm_blockcount,
+                               tmp.rm_owner,
+                               (tmp.rm_flags & XFS_RMAP_ATTR_FORK) ?
+                                       _("attr ") : "",
+                               (tmp.rm_flags & XFS_RMAP_BMBT_BLOCK) ?
+                                       _("bmbt ") : "",
+                               tmp.rm_offset,
+                               agno, rm_rec->rm_startblock,
+                               (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
+                                       _("unwritten ") : "",
+                               rm_rec->rm_blockcount,
+                               rm_rec->rm_owner,
+                               (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
+                                       _("attr ") : "",
+                               (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
+                                       _("bmbt ") : "",
+                               rm_rec->rm_offset);
+                       goto next_loop;
+               }
+next_loop:
+               rm_rec = pop_slab_cursor(rm_cur);
+       }
+
+err:
+       if (bt_cur)
+               libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+       if (agbp)
+               libxfs_putbuf(agbp);
+       free_slab_cursor(&rm_cur);
+       return 0;
+}
+
+/*
+ * Compare the key fields of two rmap records -- positive if key1 > key2,
+ * negative if key1 < key2, and zero if equal.
+ */
+__int64_t
+rmap_diffkeys(
+       struct xfs_rmap_irec    *kp1,
+       struct xfs_rmap_irec    *kp2)
+{
+       __u64                   oa;
+       __u64                   ob;
+       __int64_t               d;
+       struct xfs_rmap_irec    tmp;
+
+       tmp = *kp1;
+       tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
+       oa = xfs_rmap_irec_offset_pack(&tmp);
+       tmp = *kp2;
+       tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
+       ob = xfs_rmap_irec_offset_pack(&tmp);
+
+       d = (__int64_t)kp1->rm_startblock - kp2->rm_startblock;
+       if (d)
+               return d;
+
+       if (kp1->rm_owner > kp2->rm_owner)
+               return 1;
+       else if (kp2->rm_owner > kp1->rm_owner)
+               return -1;
+
+       if (oa > ob)
+               return 1;
+       else if (ob > oa)
+               return -1;
+       return 0;
+}
+
+/* Compute the high key of an rmap record. */
+void
+rmap_high_key_from_rec(
+       struct xfs_rmap_irec    *rec,
+       struct xfs_rmap_irec    *key)
+{
+       int                     adj;
+
+       adj = rec->rm_blockcount - 1;
+
+       key->rm_startblock = rec->rm_startblock + adj;
+       key->rm_owner = rec->rm_owner;
+       key->rm_offset = rec->rm_offset;
+       key->rm_flags = rec->rm_flags & XFS_RMAP_KEY_FLAGS;
+       if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
+           (rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+               return;
+       key->rm_offset += adj;
+}
index f948f25d4f2c5fa62c6b459e354ed12cf05e2c19..d9d08d4489bce61da275c9cae59a660eccc9894e 100644 (file)
@@ -36,4 +36,14 @@ extern bool mergeable_rmaps(struct xfs_rmap_irec *r1, struct xfs_rmap_irec *r2);
 
 extern int add_fixed_ag_rmap_data(struct xfs_mount *, xfs_agnumber_t);
 
+extern size_t rmap_record_count(struct xfs_mount *, xfs_agnumber_t);
+extern int init_rmap_cursor(xfs_agnumber_t, struct xfs_slab_cursor **);
+extern void rmap_avoid_check(void);
+extern int check_rmaps(struct xfs_mount *, xfs_agnumber_t);
+
+extern __int64_t rmap_diffkeys(struct xfs_rmap_irec *kp1,
+               struct xfs_rmap_irec *kp2);
+extern void rmap_high_key_from_rec(struct xfs_rmap_irec *rec,
+               struct xfs_rmap_irec *key);
+
 #endif /* RMAP_H_ */
index d22dca3debef39f0a6bb7ba454036710425edb7e..1f20622230579ebb4d81ec187f15c9bfbea0e2cf 100644 (file)
@@ -29,6 +29,7 @@
 #include "bmap.h"
 #include "progress.h"
 #include "threads.h"
+#include "slab.h"
 #include "rmap.h"
 
 static xfs_mount_t     *mp = NULL;
@@ -869,6 +870,12 @@ _("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
        }
 }
 
+struct rmap_priv {
+       struct aghdr_cnts       *agcnts;
+       struct xfs_rmap_irec    high_key;
+       xfs_agblock_t           nr_blocks;
+};
+
 static void
 scan_rmapbt(
        struct xfs_btree_block  *block,
@@ -880,19 +887,23 @@ scan_rmapbt(
        __uint32_t              magic,
        void                    *priv)
 {
-       struct aghdr_cnts       *agcnts = priv;
        const char              *name = "rmap";
        int                     i;
        xfs_rmap_ptr_t          *pp;
        struct xfs_rmap_rec     *rp;
+       struct rmap_priv        *rmap_priv = priv;
        int                     hdr_errors = 0;
        int                     numrecs;
        int                     state;
        xfs_agblock_t           lastblock = 0;
+       struct xfs_rmap_key     *kp;
+       struct xfs_rmap_irec    key;
 
        if (magic != XFS_RMAP_CRC_MAGIC) {
                name = "(unknown)";
-               assert(0);
+               hdr_errors++;
+               suspect++;
+               goto out;
        }
 
        if (be32_to_cpu(block->bb_magic) != magic) {
@@ -900,7 +911,7 @@ scan_rmapbt(
                        be32_to_cpu(block->bb_magic), name, agno, bno);
                hdr_errors++;
                if (suspect)
-                       return;
+                       goto out;
        }
 
        /*
@@ -909,16 +920,17 @@ scan_rmapbt(
         * free data block counter.
         */
        if (!isroot) {
-               agcnts->agfbtreeblks++;
-               agcnts->fdblocks++;
+               rmap_priv->agcnts->agfbtreeblks++;
+               rmap_priv->agcnts->fdblocks++;
        }
+       rmap_priv->nr_blocks++;
 
        if (be16_to_cpu(block->bb_level) != level) {
                do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
                        level, be16_to_cpu(block->bb_level), name, agno, bno);
                hdr_errors++;
                if (suspect)
-                       return;
+                       goto out;
        }
 
        /* check for btree blocks multiply claimed */
@@ -928,7 +940,7 @@ scan_rmapbt(
                do_warn(
 _("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
                                name, state, agno, bno, suspect);
-               return;
+               goto out;
        }
        set_bmap(agno, bno, XR_E_FS_MAP);
 
@@ -962,7 +974,20 @@ _("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
                        len = be32_to_cpu(rp[i].rm_blockcount);
                        owner = be64_to_cpu(rp[i].rm_owner);
                        offset = be64_to_cpu(rp[i].rm_offset);
-                       end = b + len;
+
+                       key.rm_flags = 0;
+                       key.rm_startblock = b;
+                       key.rm_blockcount = len;
+                       key.rm_owner = owner;
+                       if (libxfs_rmap_irec_offset_unpack(offset, &key)) {
+                               /* Look for impossible flags. */
+                               do_warn(
+       _("invalid flags in record %u of %s btree block %u/%u\n"),
+                                       i, name, agno, bno);
+                               continue;
+                       }
+
+                       end = key.rm_startblock + key.rm_blockcount;
 
                        /* Make sure agbno & len make sense. */
                        if (!verify_agbno(mp, agno, b)) {
@@ -1001,6 +1026,18 @@ advance:
                                        goto advance;
                        }
 
+                       /* Check that we don't go past the high key. */
+                       key.rm_startblock += key.rm_blockcount - 1;
+                       if (!XFS_RMAP_NON_INODE_OWNER(key.rm_owner) &&
+                           !(key.rm_flags & XFS_RMAP_BMBT_BLOCK))
+                               key.rm_offset += key.rm_blockcount - 1;
+                       key.rm_blockcount = 0;
+                       if (rmap_diffkeys(&key, &rmap_priv->high_key) > 0) {
+                               do_warn(
+       _("record %d greater than high key of block (%u/%u) in %s tree\n"),
+                                       i, agno, bno, name);
+                       }
+
                        /* Check for block owner collisions. */
                        for ( ; b < end; b += blen)  {
                                state = get_bmap_ext(agno, b, end, &blen);
@@ -1008,7 +1045,7 @@ advance:
                                                state, name);
                        }
                }
-               return;
+               goto out;
        }
 
        /*
@@ -1036,12 +1073,33 @@ advance:
                        mp->m_rmap_mnr[1], mp->m_rmap_mxr[1],
                        name, agno, bno);
                if (suspect)
-                       return;
+                       goto out;
                suspect++;
        } else if (suspect) {
                suspect = 0;
        }
 
+       /* check the node's high keys */
+       for (i = 0; !isroot && i < numrecs; i++) {
+               kp = XFS_RMAP_HIGH_KEY_ADDR(block, i + 1);
+
+               key.rm_flags = 0;
+               key.rm_startblock = be32_to_cpu(kp->rm_startblock);
+               key.rm_owner = be64_to_cpu(kp->rm_owner);
+               if (libxfs_rmap_irec_offset_unpack(be64_to_cpu(kp->rm_offset),
+                               &key)) {
+                       /* Look for impossible flags. */
+                       do_warn(
+       _("invalid flags in key %u of %s btree block %u/%u\n"),
+                               i, name, agno, bno);
+                       continue;
+               }
+               if (rmap_diffkeys(&key, &rmap_priv->high_key) > 0)
+                       do_warn(
+       _("key %d greater than high key of block (%u/%u) in %s tree\n"),
+                               i, agno, bno, name);
+       }
+
        for (i = 0; i < numrecs; i++)  {
                xfs_agblock_t           bno = be32_to_cpu(pp[i]);
 
@@ -1054,11 +1112,30 @@ advance:
                 * pointer mismatch, try and extract as much data
                 * as possible.
                 */
+               kp = XFS_RMAP_HIGH_KEY_ADDR(block, i + 1);
+               rmap_priv->high_key.rm_flags = 0;
+               rmap_priv->high_key.rm_startblock =
+                               be32_to_cpu(kp->rm_startblock);
+               rmap_priv->high_key.rm_owner =
+                               be64_to_cpu(kp->rm_owner);
+               if (libxfs_rmap_irec_offset_unpack(be64_to_cpu(kp->rm_offset),
+                               &rmap_priv->high_key)) {
+                       /* Look for impossible flags. */
+                       do_warn(
+       _("invalid flags in high key %u of %s btree block %u/%u\n"),
+                               i, name, agno, bno);
+                       continue;
+               }
+
                if (bno != 0 && verify_agbno(mp, agno, bno)) {
                        scan_sbtree(bno, level, agno, suspect, scan_rmapbt, 0,
                                    magic, priv, &xfs_rmapbt_buf_ops);
                }
        }
+
+out:
+       if (suspect)
+               rmap_avoid_check();
 }
 
 /*
@@ -1827,15 +1904,26 @@ validate_agf(
        }
 
        if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+               struct rmap_priv        priv;
+
+               memset(&priv.high_key, 0xFF, sizeof(priv.high_key));
+               priv.high_key.rm_blockcount = 0;
+               priv.agcnts = agcnts;
+               priv.nr_blocks = 0;
                bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
                if (bno != 0 && verify_agbno(mp, agno, bno)) {
                        scan_sbtree(bno,
                                    be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
                                    agno, 0, scan_rmapbt, 1, XFS_RMAP_CRC_MAGIC,
-                                   agcnts, &xfs_rmapbt_buf_ops);
+                                   &priv, &xfs_rmapbt_buf_ops);
+                       if (be32_to_cpu(agf->agf_rmap_blocks) != priv.nr_blocks)
+                               do_warn(_("bad rmapbt block count %u, saw %u\n"),
+                                       priv.nr_blocks,
+                                       be32_to_cpu(agf->agf_rmap_blocks));
                } else  {
                        do_warn(_("bad agbno %u for rmapbt root, agno %d\n"),
                                bno, agno);
+                       rmap_avoid_check();
                }
        }