]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blobdiff - libxfs/xfs_rmap_btree.c
xfs: convert flex-array declarations in xfs attr shortform objects
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_rmap_btree.c
index 4c281b71f5e57a5b3acfa27d3c76a1b943bfbdc9..d6e2fc0a3f94929c952634f213b7bae4ffafa67a 100644 (file)
@@ -20,6 +20,8 @@
 #include "xfs_ag.h"
 #include "xfs_ag_resv.h"
 
+static struct kmem_cache       *xfs_rmapbt_cur_cache;
+
 /*
  * Reverse map btree.
  *
@@ -86,7 +88,7 @@ xfs_rmapbt_alloc_block(
        xfs_agblock_t           bno;
 
        /* Allocate the new block from the freelist. If we can't, give up.  */
-       error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp,
+       error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp,
                                       &bno, 1);
        if (error)
                return error;
@@ -125,7 +127,7 @@ xfs_rmapbt_free_block(
                        bno, 1);
        be32_add_cpu(&agf->agf_rmap_blocks, -1);
        xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
-       error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+       error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1);
        if (error)
                return error;
 
@@ -152,6 +154,16 @@ xfs_rmapbt_get_maxrecs(
        return cur->bc_mp->m_rmap_mxr[level != 0];
 }
 
+/*
+ * Convert the ondisk record's offset field into the ondisk key's offset field.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
+{
+       return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
+}
+
 STATIC void
 xfs_rmapbt_init_key_from_rec(
        union xfs_btree_key             *key,
@@ -159,7 +171,7 @@ xfs_rmapbt_init_key_from_rec(
 {
        key->rmap.rm_startblock = rec->rmap.rm_startblock;
        key->rmap.rm_owner = rec->rmap.rm_owner;
-       key->rmap.rm_offset = rec->rmap.rm_offset;
+       key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
 }
 
 /*
@@ -182,7 +194,7 @@ xfs_rmapbt_init_high_key_from_rec(
        key->rmap.rm_startblock = rec->rmap.rm_startblock;
        be32_add_cpu(&key->rmap.rm_startblock, adj);
        key->rmap.rm_owner = rec->rmap.rm_owner;
-       key->rmap.rm_offset = rec->rmap.rm_offset;
+       key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
        if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
            XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
                return;
@@ -215,6 +227,16 @@ xfs_rmapbt_init_ptr_from_cur(
        ptr->s = agf->agf_roots[cur->bc_btnum];
 }
 
+/*
+ * Mask the appropriate parts of the ondisk key field for a key comparison.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline uint64_t offset_keymask(uint64_t offset)
+{
+       return offset & ~XFS_RMAP_OFF_UNWRITTEN;
+}
+
 STATIC int64_t
 xfs_rmapbt_key_diff(
        struct xfs_btree_cur            *cur,
@@ -236,8 +258,8 @@ xfs_rmapbt_key_diff(
        else if (y > x)
                return -1;
 
-       x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
-       y = rec->rm_offset;
+       x = offset_keymask(be64_to_cpu(kp->rm_offset));
+       y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
        if (x > y)
                return 1;
        else if (y > x)
@@ -249,31 +271,43 @@ STATIC int64_t
 xfs_rmapbt_diff_two_keys(
        struct xfs_btree_cur            *cur,
        const union xfs_btree_key       *k1,
-       const union xfs_btree_key       *k2)
+       const union xfs_btree_key       *k2,
+       const union xfs_btree_key       *mask)
 {
        const struct xfs_rmap_key       *kp1 = &k1->rmap;
        const struct xfs_rmap_key       *kp2 = &k2->rmap;
        int64_t                         d;
        __u64                           x, y;
 
+       /* Doesn't make sense to mask off the physical space part */
+       ASSERT(!mask || mask->rmap.rm_startblock);
+
        d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
-                      be32_to_cpu(kp2->rm_startblock);
+                    be32_to_cpu(kp2->rm_startblock);
        if (d)
                return d;
 
-       x = be64_to_cpu(kp1->rm_owner);
-       y = be64_to_cpu(kp2->rm_owner);
-       if (x > y)
-               return 1;
-       else if (y > x)
-               return -1;
+       if (!mask || mask->rmap.rm_owner) {
+               x = be64_to_cpu(kp1->rm_owner);
+               y = be64_to_cpu(kp2->rm_owner);
+               if (x > y)
+                       return 1;
+               else if (y > x)
+                       return -1;
+       }
+
+       if (!mask || mask->rmap.rm_offset) {
+               /* Doesn't make sense to allow offset but not owner */
+               ASSERT(!mask || mask->rmap.rm_owner);
+
+               x = offset_keymask(be64_to_cpu(kp1->rm_offset));
+               y = offset_keymask(be64_to_cpu(kp2->rm_offset));
+               if (x > y)
+                       return 1;
+               else if (y > x)
+                       return -1;
+       }
 
-       x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
-       y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
-       if (x > y)
-               return 1;
-       else if (y > x)
-               return -1;
        return 0;
 }
 
@@ -309,7 +343,7 @@ xfs_rmapbt_verify(
                return fa;
 
        level = be16_to_cpu(block->bb_level);
-       if (pag && pag->pagf_init) {
+       if (pag && xfs_perag_initialised_agf(pag)) {
                if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
                        return __this_address;
        } else if (level >= mp->m_rmap_maxlevels)
@@ -383,8 +417,8 @@ xfs_rmapbt_keys_inorder(
                return 1;
        else if (a > b)
                return 0;
-       a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
-       b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
+       a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
+       b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
        if (a <= b)
                return 1;
        return 0;
@@ -413,13 +447,33 @@ xfs_rmapbt_recs_inorder(
                return 1;
        else if (a > b)
                return 0;
-       a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
-       b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
+       a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
+       b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
        if (a <= b)
                return 1;
        return 0;
 }
 
+STATIC enum xbtree_key_contig
+xfs_rmapbt_keys_contiguous(
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_key       *key1,
+       const union xfs_btree_key       *key2,
+       const union xfs_btree_key       *mask)
+{
+       ASSERT(!mask || mask->rmap.rm_startblock);
+
+       /*
+        * We only support checking contiguity of the physical space component.
+        * If any callers ever need more specificity than that, they'll have to
+        * implement it here.
+        */
+       ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
+
+       return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
+                                be32_to_cpu(key2->rmap.rm_startblock));
+}
+
 static const struct xfs_btree_ops xfs_rmapbt_ops = {
        .rec_len                = sizeof(struct xfs_rmap_rec),
        .key_len                = 2 * sizeof(struct xfs_rmap_key),
@@ -439,6 +493,7 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
        .diff_two_keys          = xfs_rmapbt_diff_two_keys,
        .keys_inorder           = xfs_rmapbt_keys_inorder,
        .recs_inorder           = xfs_rmapbt_recs_inorder,
+       .keys_contiguous        = xfs_rmapbt_keys_contiguous,
 };
 
 static struct xfs_btree_cur *
@@ -451,15 +506,12 @@ xfs_rmapbt_init_common(
 
        /* Overlapping btree; 2 keys per pointer. */
        cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
-                       mp->m_rmap_maxlevels);
+                       mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
        cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
        cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
        cur->bc_ops = &xfs_rmapbt_ops;
 
-       /* take a reference for the cursor */
-       atomic_inc(&pag->pag_ref);
-       cur->bc_ag.pag = pag;
-
+       cur->bc_ag.pag = xfs_perag_hold(pag);
        return cur;
 }
 
@@ -517,6 +569,18 @@ xfs_rmapbt_commit_staged_btree(
        xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
 }
 
+/* Calculate number of records in a reverse mapping btree block. */
+static inline unsigned int
+xfs_rmapbt_block_maxrecs(
+       unsigned int            blocklen,
+       bool                    leaf)
+{
+       if (leaf)
+               return blocklen / sizeof(struct xfs_rmap_rec);
+       return blocklen /
+               (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+}
+
 /*
  * Calculate number of records in an rmap btree block.
  */
@@ -526,11 +590,33 @@ xfs_rmapbt_maxrecs(
        int                     leaf)
 {
        blocklen -= XFS_RMAP_BLOCK_LEN;
+       return xfs_rmapbt_block_maxrecs(blocklen, leaf);
+}
 
-       if (leaf)
-               return blocklen / sizeof(struct xfs_rmap_rec);
-       return blocklen /
-               (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+/* Compute the max possible height for reverse mapping btrees. */
+unsigned int
+xfs_rmapbt_maxlevels_ondisk(void)
+{
+       unsigned int            minrecs[2];
+       unsigned int            blocklen;
+
+       blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
+
+       minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2;
+       minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2;
+
+       /*
+        * Compute the asymptotic maxlevels for an rmapbt on any reflink fs.
+        *
+        * On a reflink filesystem, each AG block can have up to 2^32 (per the
+        * refcount record format) owners, which means that theoretically we
+        * could face up to 2^64 rmap records.  However, we're likely to run
+        * out of blocks in the AG long before that happens, which means that
+        * we must compute the max height based on what the btree will look
+        * like if it consumes almost all the blocks in the AG due to maximal
+        * sharing factor.
+        */
+       return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
 }
 
 /* Compute the maximum height of an rmap btree. */
@@ -538,26 +624,36 @@ void
 xfs_rmapbt_compute_maxlevels(
        struct xfs_mount                *mp)
 {
-       /*
-        * On a non-reflink filesystem, the maximum number of rmap
-        * records is the number of blocks in the AG, hence the max
-        * rmapbt height is log_$maxrecs($agblocks).  However, with
-        * reflink each AG block can have up to 2^32 (per the refcount
-        * record format) owners, which means that theoretically we
-        * could face up to 2^64 rmap records.
-        *
-        * That effectively means that the max rmapbt height must be
-        * XFS_BTREE_MAXLEVELS.  "Fortunately" we'll run out of AG
-        * blocks to feed the rmapbt long before the rmapbt reaches
-        * maximum height.  The reflink code uses ag_resv_critical to
-        * disallow reflinking when less than 10% of the per-AG metadata
-        * block reservation since the fallback is a regular file copy.
-        */
-       if (xfs_has_reflink(mp))
-               mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
-       else
+       if (!xfs_has_rmapbt(mp)) {
+               mp->m_rmap_maxlevels = 0;
+               return;
+       }
+
+       if (xfs_has_reflink(mp)) {
+               /*
+                * Compute the asymptotic maxlevels for an rmap btree on a
+                * filesystem that supports reflink.
+                *
+                * On a reflink filesystem, each AG block can have up to 2^32
+                * (per the refcount record format) owners, which means that
+                * theoretically we could face up to 2^64 rmap records.
+                * However, we're likely to run out of blocks in the AG long
+                * before that happens, which means that we must compute the
+                * max height based on what the btree will look like if it
+                * consumes almost all the blocks in the AG due to maximal
+                * sharing factor.
+                */
+               mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
+                               mp->m_sb.sb_agblocks);
+       } else {
+               /*
+                * If there's no block sharing, compute the maximum rmapbt
+                * height assuming one rmap record per AG block.
+                */
                mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
                                mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
+       }
+       ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
 }
 
 /* Calculate the refcount btree size for some records. */
@@ -604,7 +700,7 @@ xfs_rmapbt_calc_reserves(
        if (!xfs_has_rmapbt(mp))
                return 0;
 
-       error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp);
+       error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
        if (error)
                return error;
 
@@ -618,8 +714,7 @@ xfs_rmapbt_calc_reserves(
         * never be available for the kinds of things that would require btree
         * expansion.  We therefore can pretend the space isn't there.
         */
-       if (mp->m_sb.sb_logstart &&
-           XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno)
+       if (xfs_ag_contains_log(mp, pag->pag_agno))
                agblocks -= mp->m_sb.sb_logblocks;
 
        /* Reserve 1% of the AG or enough for 1 block per record. */
@@ -628,3 +723,22 @@ xfs_rmapbt_calc_reserves(
 
        return error;
 }
+
+int __init
+xfs_rmapbt_init_cur_cache(void)
+{
+       xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur",
+                       xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()),
+                       0, 0, NULL);
+
+       if (!xfs_rmapbt_cur_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void
+xfs_rmapbt_destroy_cur_cache(void)
+{
+       kmem_cache_destroy(xfs_rmapbt_cur_cache);
+       xfs_rmapbt_cur_cache = NULL;
+}