]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blobdiff - libxfs/xfs_rmap_btree.c
xfs: convert flex-array declarations in xfs attr shortform objects
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_rmap_btree.c
index 5595cf4d28fd2bbe5390976db9f1008a08a7b5b8..d6e2fc0a3f94929c952634f213b7bae4ffafa67a 100644 (file)
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2014 Red Hat, Inc.
  * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "libxfs_priv.h"
 #include "xfs_fs.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
+#include "xfs_btree_staging.h"
 #include "xfs_rmap.h"
 #include "xfs_rmap_btree.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
+#include "xfs_ag.h"
 #include "xfs_ag_resv.h"
 
+static struct kmem_cache       *xfs_rmapbt_cur_cache;
+
 /*
  * Reverse map btree.
  *
@@ -65,70 +52,61 @@ xfs_rmapbt_dup_cursor(
        struct xfs_btree_cur    *cur)
 {
        return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
-                       cur->bc_private.a.agbp, cur->bc_private.a.agno);
+                               cur->bc_ag.agbp, cur->bc_ag.pag);
 }
 
 STATIC void
 xfs_rmapbt_set_root(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr,
-       int                     inc)
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_ptr       *ptr,
+       int                             inc)
 {
-       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
-       xfs_agnumber_t          seqno = be32_to_cpu(agf->agf_seqno);
+       struct xfs_buf          *agbp = cur->bc_ag.agbp;
+       struct xfs_agf          *agf = agbp->b_addr;
        int                     btnum = cur->bc_btnum;
-       struct xfs_perag        *pag = xfs_perag_get(cur->bc_mp, seqno);
 
        ASSERT(ptr->s != 0);
 
        agf->agf_roots[btnum] = ptr->s;
        be32_add_cpu(&agf->agf_levels[btnum], inc);
-       pag->pagf_levels[btnum] += inc;
-       xfs_perag_put(pag);
+       cur->bc_ag.pag->pagf_levels[btnum] += inc;
 
        xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
 }
 
 STATIC int
 xfs_rmapbt_alloc_block(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *start,
-       union xfs_btree_ptr     *new,
-       int                     *stat)
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_ptr       *start,
+       union xfs_btree_ptr             *new,
+       int                             *stat)
 {
-       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       struct xfs_buf          *agbp = cur->bc_ag.agbp;
+       struct xfs_agf          *agf = agbp->b_addr;
+       struct xfs_perag        *pag = cur->bc_ag.pag;
        int                     error;
        xfs_agblock_t           bno;
 
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
        /* Allocate the new block from the freelist. If we can't, give up.  */
-       error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+       error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp,
                                       &bno, 1);
-       if (error) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       if (error)
                return error;
-       }
 
-       trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
-                       bno, 1);
+       trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1);
        if (bno == NULLAGBLOCK) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
                *stat = 0;
                return 0;
        }
 
-       xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1,
-                       false);
+       xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
 
-       xfs_trans_agbtree_delta(cur->bc_tp, 1);
        new->s = cpu_to_be32(bno);
        be32_add_cpu(&agf->agf_rmap_blocks, 1);
        xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
 
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
+
        *stat = 1;
        return 0;
 }
@@ -138,24 +116,25 @@ xfs_rmapbt_free_block(
        struct xfs_btree_cur    *cur,
        struct xfs_buf          *bp)
 {
-       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       struct xfs_buf          *agbp = cur->bc_ag.agbp;
+       struct xfs_agf          *agf = agbp->b_addr;
+       struct xfs_perag        *pag = cur->bc_ag.pag;
        xfs_agblock_t           bno;
        int                     error;
 
-       bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
-       trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
+       bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
+       trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno,
                        bno, 1);
        be32_add_cpu(&agf->agf_rmap_blocks, -1);
        xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
-       error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+       error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1);
        if (error)
                return error;
 
-       xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+       xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
                              XFS_EXTENT_BUSY_SKIP_DISCARD);
-       xfs_trans_agbtree_delta(cur->bc_tp, -1);
 
+       xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
        return 0;
 }
 
@@ -175,14 +154,24 @@ xfs_rmapbt_get_maxrecs(
        return cur->bc_mp->m_rmap_mxr[level != 0];
 }
 
+/*
+ * Convert the ondisk record's offset field into the ondisk key's offset field.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
+{
+       return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
+}
+
 STATIC void
 xfs_rmapbt_init_key_from_rec(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
+       union xfs_btree_key             *key,
+       const union xfs_btree_rec       *rec)
 {
        key->rmap.rm_startblock = rec->rmap.rm_startblock;
        key->rmap.rm_owner = rec->rmap.rm_owner;
-       key->rmap.rm_offset = rec->rmap.rm_offset;
+       key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
 }
 
 /*
@@ -194,18 +183,18 @@ xfs_rmapbt_init_key_from_rec(
  */
 STATIC void
 xfs_rmapbt_init_high_key_from_rec(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
+       union xfs_btree_key             *key,
+       const union xfs_btree_rec       *rec)
 {
-       uint64_t                off;
-       int                     adj;
+       uint64_t                        off;
+       int                             adj;
 
        adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
 
        key->rmap.rm_startblock = rec->rmap.rm_startblock;
        be32_add_cpu(&key->rmap.rm_startblock, adj);
        key->rmap.rm_owner = rec->rmap.rm_owner;
-       key->rmap.rm_offset = rec->rmap.rm_offset;
+       key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
        if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
            XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
                return;
@@ -231,23 +220,32 @@ xfs_rmapbt_init_ptr_from_cur(
        struct xfs_btree_cur    *cur,
        union xfs_btree_ptr     *ptr)
 {
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+       struct xfs_agf          *agf = cur->bc_ag.agbp->b_addr;
 
-       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
-       ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
+       ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
 
        ptr->s = agf->agf_roots[cur->bc_btnum];
 }
 
+/*
+ * Mask the appropriate parts of the ondisk key field for a key comparison.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline uint64_t offset_keymask(uint64_t offset)
+{
+       return offset & ~XFS_RMAP_OFF_UNWRITTEN;
+}
+
 STATIC int64_t
 xfs_rmapbt_key_diff(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *key)
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_key       *key)
 {
-       struct xfs_rmap_irec    *rec = &cur->bc_rec.r;
-       struct xfs_rmap_key     *kp = &key->rmap;
-       __u64                   x, y;
-       int64_t                 d;
+       struct xfs_rmap_irec            *rec = &cur->bc_rec.r;
+       const struct xfs_rmap_key       *kp = &key->rmap;
+       __u64                           x, y;
+       int64_t                         d;
 
        d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
        if (d)
@@ -260,8 +258,8 @@ xfs_rmapbt_key_diff(
        else if (y > x)
                return -1;
 
-       x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
-       y = rec->rm_offset;
+       x = offset_keymask(be64_to_cpu(kp->rm_offset));
+       y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
        if (x > y)
                return 1;
        else if (y > x)
@@ -271,43 +269,56 @@ xfs_rmapbt_key_diff(
 
 STATIC int64_t
 xfs_rmapbt_diff_two_keys(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *k1,
-       union xfs_btree_key     *k2)
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_key       *k1,
+       const union xfs_btree_key       *k2,
+       const union xfs_btree_key       *mask)
 {
-       struct xfs_rmap_key     *kp1 = &k1->rmap;
-       struct xfs_rmap_key     *kp2 = &k2->rmap;
-       int64_t                 d;
-       __u64                   x, y;
+       const struct xfs_rmap_key       *kp1 = &k1->rmap;
+       const struct xfs_rmap_key       *kp2 = &k2->rmap;
+       int64_t                         d;
+       __u64                           x, y;
+
+       /* Doesn't make sense to mask off the physical space part */
+       ASSERT(!mask || mask->rmap.rm_startblock);
 
        d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
-                      be32_to_cpu(kp2->rm_startblock);
+                    be32_to_cpu(kp2->rm_startblock);
        if (d)
                return d;
 
-       x = be64_to_cpu(kp1->rm_owner);
-       y = be64_to_cpu(kp2->rm_owner);
-       if (x > y)
-               return 1;
-       else if (y > x)
-               return -1;
+       if (!mask || mask->rmap.rm_owner) {
+               x = be64_to_cpu(kp1->rm_owner);
+               y = be64_to_cpu(kp2->rm_owner);
+               if (x > y)
+                       return 1;
+               else if (y > x)
+                       return -1;
+       }
+
+       if (!mask || mask->rmap.rm_offset) {
+               /* Doesn't make sense to allow offset but not owner */
+               ASSERT(!mask || mask->rmap.rm_owner);
+
+               x = offset_keymask(be64_to_cpu(kp1->rm_offset));
+               y = offset_keymask(be64_to_cpu(kp2->rm_offset));
+               if (x > y)
+                       return 1;
+               else if (y > x)
+                       return -1;
+       }
 
-       x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
-       y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
-       if (x > y)
-               return 1;
-       else if (y > x)
-               return -1;
        return 0;
 }
 
-static bool
+static xfs_failaddr_t
 xfs_rmapbt_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_perag        *pag = bp->b_pag;
+       xfs_failaddr_t          fa;
        unsigned int            level;
 
        /*
@@ -322,20 +333,21 @@ xfs_rmapbt_verify(
         * from the on disk AGF. Again, we can only check against maximum limits
         * in this case.
         */
-       if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
-               return false;
+       if (!xfs_verify_magic(bp, block->bb_magic))
+               return __this_address;
 
-       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
-               return false;
-       if (!xfs_btree_sblock_v5hdr_verify(bp))
-               return false;
+       if (!xfs_has_rmapbt(mp))
+               return __this_address;
+       fa = xfs_btree_sblock_v5hdr_verify(bp);
+       if (fa)
+               return fa;
 
        level = be16_to_cpu(block->bb_level);
-       if (pag && pag->pagf_init) {
+       if (pag && xfs_perag_initialised_agf(pag)) {
                if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
-                       return false;
+                       return __this_address;
        } else if (level >= mp->m_rmap_maxlevels)
-               return false;
+               return __this_address;
 
        return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
 }
@@ -344,25 +356,30 @@ static void
 xfs_rmapbt_read_verify(
        struct xfs_buf  *bp)
 {
+       xfs_failaddr_t  fa;
+
        if (!xfs_btree_sblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_rmapbt_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_rmapbt_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 
-       if (bp->b_error) {
+       if (bp->b_error)
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
 }
 
 static void
 xfs_rmapbt_write_verify(
        struct xfs_buf  *bp)
 {
-       if (!xfs_rmapbt_verify(bp)) {
+       xfs_failaddr_t  fa;
+
+       fa = xfs_rmapbt_verify(bp);
+       if (fa) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
        xfs_btree_sblock_calc_crc(bp);
@@ -371,15 +388,17 @@ xfs_rmapbt_write_verify(
 
 const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
        .name                   = "xfs_rmapbt",
+       .magic                  = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
        .verify_read            = xfs_rmapbt_read_verify,
        .verify_write           = xfs_rmapbt_write_verify,
+       .verify_struct          = xfs_rmapbt_verify,
 };
 
 STATIC int
 xfs_rmapbt_keys_inorder(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *k1,
-       union xfs_btree_key     *k2)
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_key       *k1,
+       const union xfs_btree_key       *k2)
 {
        uint32_t                x;
        uint32_t                y;
@@ -398,8 +417,8 @@ xfs_rmapbt_keys_inorder(
                return 1;
        else if (a > b)
                return 0;
-       a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
-       b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
+       a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
+       b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
        if (a <= b)
                return 1;
        return 0;
@@ -407,9 +426,9 @@ xfs_rmapbt_keys_inorder(
 
 STATIC int
 xfs_rmapbt_recs_inorder(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *r1,
-       union xfs_btree_rec     *r2)
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_rec       *r1,
+       const union xfs_btree_rec       *r2)
 {
        uint32_t                x;
        uint32_t                y;
@@ -428,13 +447,33 @@ xfs_rmapbt_recs_inorder(
                return 1;
        else if (a > b)
                return 0;
-       a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
-       b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
+       a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
+       b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
        if (a <= b)
                return 1;
        return 0;
 }
 
+STATIC enum xbtree_key_contig
+xfs_rmapbt_keys_contiguous(
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_key       *key1,
+       const union xfs_btree_key       *key2,
+       const union xfs_btree_key       *mask)
+{
+       ASSERT(!mask || mask->rmap.rm_startblock);
+
+       /*
+        * We only support checking contiguity of the physical space component.
+        * If any callers ever need more specificity than that, they'll have to
+        * implement it here.
+        */
+       ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
+
+       return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
+                                be32_to_cpu(key2->rmap.rm_startblock));
+}
+
 static const struct xfs_btree_ops xfs_rmapbt_ops = {
        .rec_len                = sizeof(struct xfs_rmap_rec),
        .key_len                = 2 * sizeof(struct xfs_rmap_key),
@@ -454,53 +493,130 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
        .diff_two_keys          = xfs_rmapbt_diff_two_keys,
        .keys_inorder           = xfs_rmapbt_keys_inorder,
        .recs_inorder           = xfs_rmapbt_recs_inorder,
+       .keys_contiguous        = xfs_rmapbt_keys_contiguous,
 };
 
-/*
- * Allocate a new allocation btree cursor.
- */
-struct xfs_btree_cur *
-xfs_rmapbt_init_cursor(
+static struct xfs_btree_cur *
+xfs_rmapbt_init_common(
        struct xfs_mount        *mp,
        struct xfs_trans        *tp,
-       struct xfs_buf          *agbp,
-       xfs_agnumber_t          agno)
+       struct xfs_perag        *pag)
 {
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
        struct xfs_btree_cur    *cur;
 
-       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
-       cur->bc_tp = tp;
-       cur->bc_mp = mp;
        /* Overlapping btree; 2 keys per pointer. */
-       cur->bc_btnum = XFS_BTNUM_RMAP;
+       cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
+                       mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
        cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
-       cur->bc_blocklog = mp->m_sb.sb_blocklog;
+       cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
        cur->bc_ops = &xfs_rmapbt_ops;
+
+       cur->bc_ag.pag = xfs_perag_hold(pag);
+       return cur;
+}
+
+/* Create a new reverse mapping btree cursor. */
+struct xfs_btree_cur *
+xfs_rmapbt_init_cursor(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       struct xfs_perag        *pag)
+{
+       struct xfs_agf          *agf = agbp->b_addr;
+       struct xfs_btree_cur    *cur;
+
+       cur = xfs_rmapbt_init_common(mp, tp, pag);
        cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
-       cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
+       cur->bc_ag.agbp = agbp;
+       return cur;
+}
 
-       cur->bc_private.a.agbp = agbp;
-       cur->bc_private.a.agno = agno;
+/* Create a new reverse mapping btree cursor with a fake root for staging. */
+struct xfs_btree_cur *
+xfs_rmapbt_stage_cursor(
+       struct xfs_mount        *mp,
+       struct xbtree_afakeroot *afake,
+       struct xfs_perag        *pag)
+{
+       struct xfs_btree_cur    *cur;
 
+       cur = xfs_rmapbt_init_common(mp, NULL, pag);
+       xfs_btree_stage_afakeroot(cur, afake);
        return cur;
 }
 
+/*
+ * Install a new reverse mapping btree root.  Caller is responsible for
+ * invalidating and freeing the old btree blocks.
+ */
+void
+xfs_rmapbt_commit_staged_btree(
+       struct xfs_btree_cur    *cur,
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp)
+{
+       struct xfs_agf          *agf = agbp->b_addr;
+       struct xbtree_afakeroot *afake = cur->bc_ag.afake;
+
+       ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
+
+       agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
+       agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
+       agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
+       xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
+                                   XFS_AGF_RMAP_BLOCKS);
+       xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
+}
+
+/* Calculate number of records in a reverse mapping btree block. */
+static inline unsigned int
+xfs_rmapbt_block_maxrecs(
+       unsigned int            blocklen,
+       bool                    leaf)
+{
+       if (leaf)
+               return blocklen / sizeof(struct xfs_rmap_rec);
+       return blocklen /
+               (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+}
+
 /*
  * Calculate number of records in an rmap btree block.
  */
 int
 xfs_rmapbt_maxrecs(
-       struct xfs_mount        *mp,
        int                     blocklen,
        int                     leaf)
 {
        blocklen -= XFS_RMAP_BLOCK_LEN;
+       return xfs_rmapbt_block_maxrecs(blocklen, leaf);
+}
 
-       if (leaf)
-               return blocklen / sizeof(struct xfs_rmap_rec);
-       return blocklen /
-               (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+/* Compute the max possible height for reverse mapping btrees. */
+unsigned int
+xfs_rmapbt_maxlevels_ondisk(void)
+{
+       unsigned int            minrecs[2];
+       unsigned int            blocklen;
+
+       blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
+
+       minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2;
+       minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2;
+
+       /*
+        * Compute the asymptotic maxlevels for an rmapbt on any reflink fs.
+        *
+        * On a reflink filesystem, each AG block can have up to 2^32 (per the
+        * refcount record format) owners, which means that theoretically we
+        * could face up to 2^64 rmap records.  However, we're likely to run
+        * out of blocks in the AG long before that happens, which means that
+        * we must compute the max height based on what the btree will look
+        * like if it consumes almost all the blocks in the AG due to maximal
+        * sharing factor.
+        */
+       return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
 }
 
 /* Compute the maximum height of an rmap btree. */
@@ -508,26 +624,36 @@ void
 xfs_rmapbt_compute_maxlevels(
        struct xfs_mount                *mp)
 {
-       /*
-        * On a non-reflink filesystem, the maximum number of rmap
-        * records is the number of blocks in the AG, hence the max
-        * rmapbt height is log_$maxrecs($agblocks).  However, with
-        * reflink each AG block can have up to 2^32 (per the refcount
-        * record format) owners, which means that theoretically we
-        * could face up to 2^64 rmap records.
-        *
-        * That effectively means that the max rmapbt height must be
-        * XFS_BTREE_MAXLEVELS.  "Fortunately" we'll run out of AG
-        * blocks to feed the rmapbt long before the rmapbt reaches
-        * maximum height.  The reflink code uses ag_resv_critical to
-        * disallow reflinking when less than 10% of the per-AG metadata
-        * block reservation since the fallback is a regular file copy.
-        */
-       if (xfs_sb_version_hasreflink(&mp->m_sb))
-               mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
-       else
-               mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp,
+       if (!xfs_has_rmapbt(mp)) {
+               mp->m_rmap_maxlevels = 0;
+               return;
+       }
+
+       if (xfs_has_reflink(mp)) {
+               /*
+                * Compute the asymptotic maxlevels for an rmap btree on a
+                * filesystem that supports reflink.
+                *
+                * On a reflink filesystem, each AG block can have up to 2^32
+                * (per the refcount record format) owners, which means that
+                * theoretically we could face up to 2^64 rmap records.
+                * However, we're likely to run out of blocks in the AG long
+                * before that happens, which means that we must compute the
+                * max height based on what the btree will look like if it
+                * consumes almost all the blocks in the AG due to maximal
+                * sharing factor.
+                */
+               mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
+                               mp->m_sb.sb_agblocks);
+       } else {
+               /*
+                * If there's no block sharing, compute the maximum rmapbt
+                * height assuming one rmap record per AG block.
+                */
+               mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
                                mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
+       }
+       ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
 }
 
 /* Calculate the refcount btree size for some records. */
@@ -536,7 +662,7 @@ xfs_rmapbt_calc_size(
        struct xfs_mount        *mp,
        unsigned long long      len)
 {
-       return xfs_btree_calc_size(mp, mp->m_rmap_mnr, len);
+       return xfs_btree_calc_size(mp->m_rmap_mnr, len);
 }
 
 /*
@@ -560,7 +686,8 @@ xfs_rmapbt_max_size(
 int
 xfs_rmapbt_calc_reserves(
        struct xfs_mount        *mp,
-       xfs_agnumber_t          agno,
+       struct xfs_trans        *tp,
+       struct xfs_perag        *pag,
        xfs_extlen_t            *ask,
        xfs_extlen_t            *used)
 {
@@ -570,17 +697,25 @@ xfs_rmapbt_calc_reserves(
        xfs_extlen_t            tree_len;
        int                     error;
 
-       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+       if (!xfs_has_rmapbt(mp))
                return 0;
 
-       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
        if (error)
                return error;
 
-       agf = XFS_BUF_TO_AGF(agbp);
+       agf = agbp->b_addr;
        agblocks = be32_to_cpu(agf->agf_length);
        tree_len = be32_to_cpu(agf->agf_rmap_blocks);
-       xfs_buf_relse(agbp);
+       xfs_trans_brelse(tp, agbp);
+
+       /*
+        * The log is permanently allocated, so the space it occupies will
+        * never be available for the kinds of things that would require btree
+        * expansion.  We therefore can pretend the space isn't there.
+        */
+       if (xfs_ag_contains_log(mp, pag->pag_agno))
+               agblocks -= mp->m_sb.sb_logblocks;
 
        /* Reserve 1% of the AG or enough for 1 block per record. */
        *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
@@ -588,3 +723,22 @@ xfs_rmapbt_calc_reserves(
 
        return error;
 }
+
+int __init
+xfs_rmapbt_init_cur_cache(void)
+{
+       xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur",
+                       xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()),
+                       0, 0, NULL);
+
+       if (!xfs_rmapbt_cur_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void
+xfs_rmapbt_destroy_cur_cache(void)
+{
+       kmem_cache_destroy(xfs_rmapbt_cur_cache);
+       xfs_rmapbt_cur_cache = NULL;
+}