int ar_flcount; /* agfl entries from leftover */
/* agbt allocations */
struct xfs_rmap_irec ar_last_rmap; /* last rmap seen */
+ struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
};
static struct xfs_ag_rmap *ag_rmaps;
rmap_needs_work(
struct xfs_mount *mp)
{
- return xfs_sb_version_hasrmapbt(&mp->m_sb);
+ return xfs_sb_version_hasreflink(&mp->m_sb) ||
+ xfs_sb_version_hasrmapbt(&mp->m_sb);
}
/*
do_error(
_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
+ error = init_slab(&ag_rmaps[i].ar_refcount_items,
+ sizeof(struct xfs_refcount_irec));
+ if (error)
+ do_error(
+_("Insufficient memory while allocating refcount item slabs."));
}
}
for (i = 0; i < mp->m_sb.sb_agcount; i++) {
free_slab(&ag_rmaps[i].ar_rmaps);
free_slab(&ag_rmaps[i].ar_raw_rmaps);
+ free_slab(&ag_rmaps[i].ar_refcount_items);
}
free(ag_rmaps);
ag_rmaps = NULL;
# define rmap_dump(m, a, r)
#endif
+/*
+ * Rebuilding the Reference Count & Reverse Mapping Btrees
+ *
+ * The reference count (refcnt) and reverse mapping (rmap) btrees are
+ * rebuilt during phase 5, like all other AG btrees. Therefore, reverse
+ * mappings must be processed into reference counts at the end of phase
+ * 4, and the rmaps must be recorded during phase 4. There is a need to
+ * access the rmaps in physical block order, but no particular need for
+ * random access, so the slab.c code provides a big logical array
+ * (consisting of smaller slabs) and some inorder iterator functions.
+ *
+ * Once we've recorded all the reverse mappings, we're ready to
+ * translate the rmaps into refcount entries. Imagine the rmap entries
+ * as rectangles representing extents of physical blocks, and that the
+ * rectangles can be laid down to allow them to overlap each other; then
+ * we know that we must emit a refcnt btree entry wherever the amount of
+ * overlap changes, i.e. the emission stimulus is level-triggered:
+ *
+ * - ---
+ * -- ----- ---- --- ------
+ * -- ---- ----------- ---- ---------
+ * -------------------------------- -----------
+ * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
+ * 2 1 23 21 3 43 234 2123 1 01 2 3 0
+ *
+ * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
+ *
+ * Note that in the actual refcnt btree we don't store the refcount < 2
+ * cases because the bnobt tells us which blocks are free; single-use
+ * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
+ * supports storing multiple entries covering a given block we could
+ * theoretically dispense with the refcntbt and simply count rmaps, but
+ * that's inefficient in the (hot) write path, so we'll take the cost of
+ * the extra tree to save time. Also there's no guarantee that rmap
+ * will be enabled.
+ *
+ * Given an array of rmaps sorted by physical block number, a starting
+ * physical block (sp), a bag to hold rmaps that cover sp, and the next
+ * physical block where the level changes (np), we can reconstruct the
+ * refcount btree as follows:
+ *
+ * While there are still unprocessed rmaps in the array,
+ * - Set sp to the physical block (pblk) of the next unprocessed rmap.
+ * - Add to the bag all rmaps in the array where startblock == sp.
+ * - Set np to the physical block where the bag size will change. This
+ * is the minimum of (the pblk of the next unprocessed rmap) and
+ * (startblock + len of each rmap in the bag).
+ * - Record the bag size as old_bag_size.
+ *
+ * - While the bag isn't empty,
+ * - Remove from the bag all rmaps where startblock + len == np.
+ * - Add to the bag all rmaps in the array where startblock == np.
+ * - If the bag size isn't old_bag_size, store the refcount entry
+ * (sp, np - sp, bag_size) in the refcnt btree.
+ * - If the bag is empty, break out of the inner loop.
+ * - Set old_bag_size to the bag size
+ * - Set sp = np.
+ * - Set np to the physical block where the bag size will change.
+ * This is the minimum of (the pblk of the next unprocessed rmap)
+ * and (startblock + len of each rmap in the bag).
+ *
+ * An implementation detail is that because this processing happens
+ * during phase 4, the refcount entries are stored in an array so that
+ * phase 5 can load them into the refcount btree. The rmaps can be
+ * loaded directly into the rmap btree during phase 5 as well.
+ */
+
+/*
+ * Emit a refcount object for refcntbt reconstruction during phase 5.
+ */
+#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
+static void
+refcount_emit(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ size_t nr_rmaps)
+{
+ struct xfs_refcount_irec rlrec;
+ int error;
+ struct xfs_slab *rlslab;
+
+ rlslab = ag_rmaps[agno].ar_refcount_items;
+ ASSERT(nr_rmaps > 0);
+
+ dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
+ agno, agbno, len, nr_rmaps);
+ rlrec.rc_startblock = agbno;
+ rlrec.rc_blockcount = len;
+ rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
+ error = slab_add(rlslab, &rlrec);
+ if (error)
+ do_error(
+_("Insufficient memory while recreating refcount tree."));
+}
+#undef REFCOUNT_CLAMP
+
+/*
+ * Transform a pile of physical block mapping observations into refcount data
+ * for eventual rebuilding of the btrees.
+ */
+#define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
+int
+compute_refcounts(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_bag *stack_top = NULL;
+ struct xfs_slab *rmaps;
+ struct xfs_slab_cursor *rmaps_cur;
+ struct xfs_rmap_irec *array_cur;
+ struct xfs_rmap_irec *rmap;
+ xfs_agblock_t sbno; /* first bno of this rmap set */
+ xfs_agblock_t cbno; /* first bno of this refcount set */
+ xfs_agblock_t nbno; /* next bno where rmap set changes */
+ size_t n, idx;
+ size_t old_stack_nr;
+ int error;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+
+ rmaps = ag_rmaps[agno].ar_rmaps;
+
+ error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
+ if (error)
+ return error;
+
+ error = init_bag(&stack_top);
+ if (error)
+ goto err;
+
+ /* While there are rmaps to be processed... */
+ n = 0;
+ while (n < slab_count(rmaps)) {
+ array_cur = peek_slab_cursor(rmaps_cur);
+ sbno = cbno = array_cur->rm_startblock;
+ /* Push all rmaps with pblk == sbno onto the stack */
+ for (;
+ array_cur && array_cur->rm_startblock == sbno;
+ array_cur = peek_slab_cursor(rmaps_cur)) {
+ advance_slab_cursor(rmaps_cur); n++;
+ rmap_dump("push0", agno, array_cur);
+ error = bag_add(stack_top, array_cur);
+ if (error)
+ goto err;
+ }
+
+ /* Set nbno to the bno of the next refcount change */
+ if (n < slab_count(rmaps))
+ nbno = array_cur->rm_startblock;
+ else
+ nbno = NULLAGBLOCK;
+ foreach_bag_ptr(stack_top, idx, rmap) {
+ nbno = min(nbno, RMAP_END(rmap));
+ }
+
+ /* Emit reverse mappings, if needed */
+ ASSERT(nbno > sbno);
+ old_stack_nr = bag_count(stack_top);
+
+ /* While stack isn't empty... */
+ while (bag_count(stack_top)) {
+ /* Pop all rmaps that end at nbno */
+ foreach_bag_ptr_reverse(stack_top, idx, rmap) {
+ if (RMAP_END(rmap) != nbno)
+ continue;
+ rmap_dump("pop", agno, rmap);
+ error = bag_remove(stack_top, idx);
+ if (error)
+ goto err;
+ }
+
+ /* Push array items that start at nbno */
+ for (;
+ array_cur && array_cur->rm_startblock == nbno;
+ array_cur = peek_slab_cursor(rmaps_cur)) {
+ advance_slab_cursor(rmaps_cur); n++;
+ rmap_dump("push1", agno, array_cur);
+ error = bag_add(stack_top, array_cur);
+ if (error)
+ goto err;
+ }
+
+ /* Emit refcount if necessary */
+ ASSERT(nbno > cbno);
+ if (bag_count(stack_top) != old_stack_nr) {
+ if (old_stack_nr > 1) {
+ refcount_emit(mp, agno, cbno,
+ nbno - cbno,
+ old_stack_nr);
+ }
+ cbno = nbno;
+ }
+
+ /* Stack empty, go find the next rmap */
+ if (bag_count(stack_top) == 0)
+ break;
+ old_stack_nr = bag_count(stack_top);
+ sbno = nbno;
+
+ /* Set nbno to the bno of the next refcount change */
+ if (n < slab_count(rmaps))
+ nbno = array_cur->rm_startblock;
+ else
+ nbno = NULLAGBLOCK;
+ foreach_bag_ptr(stack_top, idx, rmap) {
+ nbno = min(nbno, RMAP_END(rmap));
+ }
+
+ /* Emit reverse mappings, if needed */
+ ASSERT(nbno > sbno);
+ }
+ }
+err:
+ free_bag(&stack_top);
+ free_slab_cursor(&rmaps_cur);
+
+ return error;
+}
+#undef RMAP_END
+
/*
* Return the number of rmap objects for an AG.
*/