]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_repair: process reverse-mapping data into refcount data
authorDarrick J. Wong <darrick.wong@oracle.com>
Tue, 25 Oct 2016 22:14:34 +0000 (15:14 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Wed, 26 Oct 2016 22:01:20 +0000 (15:01 -0700)
Take all the reverse-mapping data we've acquired and use it to generate
reference count data.  This data is used in phase 5 to rebuild the
refcount btree.

v2: Update to reflect separation of rmap_irec flags.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
repair/phase4.c
repair/rmap.c
repair/rmap.h

index 9da1bb1288fe207245b3f12291a1805891c49508..86992c9009a55a90d8d95764f6ace90808091cce 100644 (file)
@@ -192,6 +192,21 @@ _("%s while checking reverse-mappings"),
                         strerror(-error));
 }
 
+static void
+compute_ag_refcounts(
+       work_queue_t    *wq,
+       xfs_agnumber_t  agno,
+       void            *arg)
+{
+       int             error;
+
+       error = compute_refcounts(wq->mp, agno);
+       if (error)
+               do_error(
+_("%s while computing reference count records.\n"),
+                        strerror(-error));
+}
+
 static void
 process_rmap_data(
        struct xfs_mount        *mp)
@@ -206,6 +221,14 @@ process_rmap_data(
        for (i = 0; i < mp->m_sb.sb_agcount; i++)
                queue_work(&wq, check_rmap_btrees, i, NULL);
        destroy_work_queue(&wq);
+
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return;
+
+       create_work_queue(&wq, mp, libxfs_nproc());
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)
+               queue_work(&wq, compute_ag_refcounts, i, NULL);
+       destroy_work_queue(&wq);
 }
 
 void
@@ -359,7 +382,9 @@ phase4(xfs_mount_t *mp)
 
        /*
         * Process all the reverse-mapping data that we collected.  This
-        * involves checking the rmap data against the btree.
+        * involves checking the rmap data against the btree, computing
+        * reference counts based on the rmap data, and checking the counts
+        * against the refcount btree.
         */
        process_rmap_data(mp);
 
index 645af311f8a2eae700bda25b613873c75c59b5e4..c6ebcb73431ed836e66d59472443188d8b73bfd8 100644 (file)
@@ -42,6 +42,7 @@ struct xfs_ag_rmap {
        int             ar_flcount;             /* agfl entries from leftover */
                                                /* agbt allocations */
        struct xfs_rmap_irec    ar_last_rmap;   /* last rmap seen */
+       struct xfs_slab *ar_refcount_items;     /* refcount items, p4-5 */
 };
 
 static struct xfs_ag_rmap *ag_rmaps;
@@ -88,7 +89,8 @@ bool
 rmap_needs_work(
        struct xfs_mount        *mp)
 {
-       return xfs_sb_version_hasrmapbt(&mp->m_sb);
+       return xfs_sb_version_hasreflink(&mp->m_sb) ||
+              xfs_sb_version_hasrmapbt(&mp->m_sb);
 }
 
 /*
@@ -120,6 +122,11 @@ _("Insufficient memory while allocating reverse mapping slabs."));
                        do_error(
 _("Insufficient memory while allocating raw metadata reverse mapping slabs."));
                ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
+               error = init_slab(&ag_rmaps[i].ar_refcount_items,
+                                 sizeof(struct xfs_refcount_irec));
+               if (error)
+                       do_error(
+_("Insufficient memory while allocating refcount item slabs."));
        }
 }
 
@@ -138,6 +145,7 @@ rmaps_free(
        for (i = 0; i < mp->m_sb.sb_agcount; i++) {
                free_slab(&ag_rmaps[i].ar_rmaps);
                free_slab(&ag_rmaps[i].ar_raw_rmaps);
+               free_slab(&ag_rmaps[i].ar_refcount_items);
        }
        free(ag_rmaps);
        ag_rmaps = NULL;
@@ -597,6 +605,229 @@ rmap_dump(
 # define rmap_dump(m, a, r)
 #endif
 
+/*
+ * Rebuilding the Reference Count & Reverse Mapping Btrees
+ *
+ * The reference count (refcnt) and reverse mapping (rmap) btrees are
+ * rebuilt during phase 5, like all other AG btrees.  Therefore, reverse
+ * mappings must be processed into reference counts at the end of phase
+ * 4, and the rmaps must be recorded during phase 4.  There is a need to
+ * access the rmaps in physical block order, but no particular need for
+ * random access, so the slab.c code provides a big logical array
+ * (consisting of smaller slabs) and some inorder iterator functions.
+ *
+ * Once we've recorded all the reverse mappings, we're ready to
+ * translate the rmaps into refcount entries.  Imagine the rmap entries
+ * as rectangles representing extents of physical blocks, and that the
+ * rectangles can be laid down to allow them to overlap each other; then
+ * we know that we must emit a refcnt btree entry wherever the amount of
+ * overlap changes, i.e. the emission stimulus is level-triggered:
+ *
+ *                 -    ---
+ *       --      ----- ----   ---        ------
+ * --   ----     ----------- ----     ---------
+ * -------------------------------- -----------
+ * ^ ^  ^^ ^^    ^ ^^ ^^^  ^^^^  ^ ^^ ^  ^     ^
+ * 2 1  23 21    3 43 234  2123  1 01 2  3     0
+ *
+ * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
+ *
+ * Note that in the actual refcnt btree we don't store the refcount < 2
+ * cases because the bnobt tells us which blocks are free; single-use
+ * blocks aren't recorded in the bnobt or the refcntbt.  If the rmapbt
+ * supports storing multiple entries covering a given block we could
+ * theoretically dispense with the refcntbt and simply count rmaps, but
+ * that's inefficient in the (hot) write path, so we'll take the cost of
+ * the extra tree to save time.  Also there's no guarantee that rmap
+ * will be enabled.
+ *
+ * Given an array of rmaps sorted by physical block number, a starting
+ * physical block (sp), a bag to hold rmaps that cover sp, and the next
+ * physical block where the level changes (np), we can reconstruct the
+ * refcount btree as follows:
+ *
+ * While there are still unprocessed rmaps in the array,
+ *  - Set sp to the physical block (pblk) of the next unprocessed rmap.
+ *  - Add to the bag all rmaps in the array where startblock == sp.
+ *  - Set np to the physical block where the bag size will change.  This
+ *    is the minimum of (the pblk of the next unprocessed rmap) and
+ *    (startblock + len of each rmap in the bag).
+ *  - Record the bag size as old_bag_size.
+ *
+ *  - While the bag isn't empty,
+ *     - Remove from the bag all rmaps where startblock + len == np.
+ *     - Add to the bag all rmaps in the array where startblock == np.
+ *     - If the bag size isn't old_bag_size, store the refcount entry
+ *       (sp, np - sp, bag_size) in the refcnt btree.
+ *     - If the bag is empty, break out of the inner loop.
+ *     - Set old_bag_size to the bag size
+ *     - Set sp = np.
+ *     - Set np to the physical block where the bag size will change.
+ *       This is the minimum of (the pblk of the next unprocessed rmap)
+ *       and (startblock + len of each rmap in the bag).
+ *
+ * An implementation detail is that because this processing happens
+ * during phase 4, the refcount entries are stored in an array so that
+ * phase 5 can load them into the refcount btree.  The rmaps can be
+ * loaded directly into the rmap btree during phase 5 as well.
+ */
+
+/*
+ * Emit a refcount object for refcntbt reconstruction during phase 5.
+ */
+#define REFCOUNT_CLAMP(nr)     ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
+static void
+refcount_emit(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           agbno,
+       xfs_extlen_t            len,
+       size_t                  nr_rmaps)
+{
+       struct xfs_refcount_irec        rlrec;
+       int                     error;
+       struct xfs_slab         *rlslab;
+
+       rlslab = ag_rmaps[agno].ar_refcount_items;
+       ASSERT(nr_rmaps > 0);
+
+       dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
+               agno, agbno, len, nr_rmaps);
+       rlrec.rc_startblock = agbno;
+       rlrec.rc_blockcount = len;
+       rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
+       error = slab_add(rlslab, &rlrec);
+       if (error)
+               do_error(
+_("Insufficient memory while recreating refcount tree."));
+}
+#undef REFCOUNT_CLAMP
+
+/*
+ * Transform a pile of physical block mapping observations into refcount data
+ * for eventual rebuilding of the btrees.
+ */
+#define RMAP_END(r)    ((r)->rm_startblock + (r)->rm_blockcount)
+int
+compute_refcounts(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t          agno)
+{
+       struct xfs_bag          *stack_top = NULL;
+       struct xfs_slab         *rmaps;
+       struct xfs_slab_cursor  *rmaps_cur;
+       struct xfs_rmap_irec    *array_cur;
+       struct xfs_rmap_irec    *rmap;
+       xfs_agblock_t           sbno;   /* first bno of this rmap set */
+       xfs_agblock_t           cbno;   /* first bno of this refcount set */
+       xfs_agblock_t           nbno;   /* next bno where rmap set changes */
+       size_t                  n, idx;
+       size_t                  old_stack_nr;
+       int                     error;
+
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return 0;
+
+       rmaps = ag_rmaps[agno].ar_rmaps;
+
+       error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
+       if (error)
+               return error;
+
+       error = init_bag(&stack_top);
+       if (error)
+               goto err;
+
+       /* While there are rmaps to be processed... */
+       n = 0;
+       while (n < slab_count(rmaps)) {
+               array_cur = peek_slab_cursor(rmaps_cur);
+               sbno = cbno = array_cur->rm_startblock;
+               /* Push all rmaps with pblk == sbno onto the stack */
+               for (;
+                    array_cur && array_cur->rm_startblock == sbno;
+                    array_cur = peek_slab_cursor(rmaps_cur)) {
+                       advance_slab_cursor(rmaps_cur); n++;
+                       rmap_dump("push0", agno, array_cur);
+                       error = bag_add(stack_top, array_cur);
+                       if (error)
+                               goto err;
+               }
+
+               /* Set nbno to the bno of the next refcount change */
+               if (n < slab_count(rmaps))
+                       nbno = array_cur->rm_startblock;
+               else
+                       nbno = NULLAGBLOCK;
+               foreach_bag_ptr(stack_top, idx, rmap) {
+                       nbno = min(nbno, RMAP_END(rmap));
+               }
+
+               /* Emit reverse mappings, if needed */
+               ASSERT(nbno > sbno);
+               old_stack_nr = bag_count(stack_top);
+
+               /* While stack isn't empty... */
+               while (bag_count(stack_top)) {
+                       /* Pop all rmaps that end at nbno */
+                       foreach_bag_ptr_reverse(stack_top, idx, rmap) {
+                               if (RMAP_END(rmap) != nbno)
+                                       continue;
+                               rmap_dump("pop", agno, rmap);
+                               error = bag_remove(stack_top, idx);
+                               if (error)
+                                       goto err;
+                       }
+
+                       /* Push array items that start at nbno */
+                       for (;
+                            array_cur && array_cur->rm_startblock == nbno;
+                            array_cur = peek_slab_cursor(rmaps_cur)) {
+                               advance_slab_cursor(rmaps_cur); n++;
+                               rmap_dump("push1", agno, array_cur);
+                               error = bag_add(stack_top, array_cur);
+                               if (error)
+                                       goto err;
+                       }
+
+                       /* Emit refcount if necessary */
+                       ASSERT(nbno > cbno);
+                       if (bag_count(stack_top) != old_stack_nr) {
+                               if (old_stack_nr > 1) {
+                                       refcount_emit(mp, agno, cbno,
+                                                     nbno - cbno,
+                                                     old_stack_nr);
+                               }
+                               cbno = nbno;
+                       }
+
+                       /* Stack empty, go find the next rmap */
+                       if (bag_count(stack_top) == 0)
+                               break;
+                       old_stack_nr = bag_count(stack_top);
+                       sbno = nbno;
+
+                       /* Set nbno to the bno of the next refcount change */
+                       if (n < slab_count(rmaps))
+                               nbno = array_cur->rm_startblock;
+                       else
+                               nbno = NULLAGBLOCK;
+                       foreach_bag_ptr(stack_top, idx, rmap) {
+                               nbno = min(nbno, RMAP_END(rmap));
+                       }
+
+                       /* Emit reverse mappings, if needed */
+                       ASSERT(nbno > sbno);
+               }
+       }
+err:
+       free_bag(&stack_top);
+       free_slab_cursor(&rmaps_cur);
+
+       return error;
+}
+#undef RMAP_END
+
 /*
  * Return the number of rmap objects for an AG.
  */
index 7106dfc3c0eb4fc594847957e7756908dbbf69b2..01dec9f2a53bd31ba47a21676ba2d56586ed5f84 100644 (file)
@@ -49,6 +49,8 @@ extern __int64_t rmap_diffkeys(struct xfs_rmap_irec *kp1,
 extern void rmap_high_key_from_rec(struct xfs_rmap_irec *rec,
                struct xfs_rmap_irec *key);
 
+extern int compute_refcounts(struct xfs_mount *, xfs_agnumber_t);
+
 extern void fix_freelist(struct xfs_mount *, xfs_agnumber_t, bool);
 extern void rmap_store_agflcount(struct xfs_mount *, xfs_agnumber_t, int);