xfs: convert flex-array declarations in xfs attr shortform objects

[thirdparty/xfsprogs-dev.git] / libxfs / xfs_rmap_btree.c
diff --git a/libxfs/xfs_rmap_btree.c b/libxfs/xfs_rmap_btree.c

index 4c281b71f5e57a5b3acfa27d3c76a1b943bfbdc9..d6e2fc0a3f94929c952634f213b7bae4ffafa67a 100644 (file)
--- a/libxfs/xfs_rmap_btree.c
+++ b/libxfs/xfs_rmap_btree.c
@@ -20,6 +20,8 @@
  #include "xfs_ag.h"
  #include "xfs_ag_resv.h"
  
+static struct kmem_cache       *xfs_rmapbt_cur_cache;
+
  /*
   * Reverse map btree.
   *
@@ -86,7 +88,7 @@ xfs_rmapbt_alloc_block(
         xfs_agblock_t           bno;
  
         /* Allocate the new block from the freelist. If we can't, give up.  */
-       error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp,
+       error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp,
                                        &bno, 1);
         if (error)
                 return error;
@@ -125,7 +127,7 @@ xfs_rmapbt_free_block(
                         bno, 1);
         be32_add_cpu(&agf->agf_rmap_blocks, -1);
         xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
-       error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+       error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1);
         if (error)
                 return error;
  
@@ -152,6 +154,16 @@ xfs_rmapbt_get_maxrecs(
         return cur->bc_mp->m_rmap_mxr[level != 0];
  }
  
+/*
+ * Convert the ondisk record's offset field into the ondisk key's offset field.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
+{
+       return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
+}
+
  STATIC void
  xfs_rmapbt_init_key_from_rec(
         union xfs_btree_key             *key,
@@ -159,7 +171,7 @@ xfs_rmapbt_init_key_from_rec(
  {
         key->rmap.rm_startblock = rec->rmap.rm_startblock;
         key->rmap.rm_owner = rec->rmap.rm_owner;
-       key->rmap.rm_offset = rec->rmap.rm_offset;
+       key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
  }
  
  /*
@@ -182,7 +194,7 @@ xfs_rmapbt_init_high_key_from_rec(
         key->rmap.rm_startblock = rec->rmap.rm_startblock;
         be32_add_cpu(&key->rmap.rm_startblock, adj);
         key->rmap.rm_owner = rec->rmap.rm_owner;
-       key->rmap.rm_offset = rec->rmap.rm_offset;
+       key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
         if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
             XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
                 return;
@@ -215,6 +227,16 @@ xfs_rmapbt_init_ptr_from_cur(
         ptr->s = agf->agf_roots[cur->bc_btnum];
  }
  
+/*
+ * Mask the appropriate parts of the ondisk key field for a key comparison.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline uint64_t offset_keymask(uint64_t offset)
+{
+       return offset & ~XFS_RMAP_OFF_UNWRITTEN;
+}
+
  STATIC int64_t
  xfs_rmapbt_key_diff(
         struct xfs_btree_cur            *cur,
@@ -236,8 +258,8 @@ xfs_rmapbt_key_diff(
         else if (y > x)
                 return -1;
  
-       x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
-       y = rec->rm_offset;
+       x = offset_keymask(be64_to_cpu(kp->rm_offset));
+       y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
         if (x > y)
                 return 1;
         else if (y > x)
@@ -249,31 +271,43 @@ STATIC int64_t
  xfs_rmapbt_diff_two_keys(
         struct xfs_btree_cur            *cur,
         const union xfs_btree_key       *k1,
-       const union xfs_btree_key       *k2)
+       const union xfs_btree_key       *k2,
+       const union xfs_btree_key       *mask)
  {
         const struct xfs_rmap_key       *kp1 = &k1->rmap;
         const struct xfs_rmap_key       *kp2 = &k2->rmap;
         int64_t                         d;
         __u64                           x, y;
  
+       /* Doesn't make sense to mask off the physical space part */
+       ASSERT(!mask || mask->rmap.rm_startblock);
+
         d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
-                      be32_to_cpu(kp2->rm_startblock);
+                    be32_to_cpu(kp2->rm_startblock);
         if (d)
                 return d;
  
-       x = be64_to_cpu(kp1->rm_owner);
-       y = be64_to_cpu(kp2->rm_owner);
-       if (x > y)
-               return 1;
-       else if (y > x)
-               return -1;
+       if (!mask || mask->rmap.rm_owner) {
+               x = be64_to_cpu(kp1->rm_owner);
+               y = be64_to_cpu(kp2->rm_owner);
+               if (x > y)
+                       return 1;
+               else if (y > x)
+                       return -1;
+       }
+
+       if (!mask || mask->rmap.rm_offset) {
+               /* Doesn't make sense to allow offset but not owner */
+               ASSERT(!mask || mask->rmap.rm_owner);
+
+               x = offset_keymask(be64_to_cpu(kp1->rm_offset));
+               y = offset_keymask(be64_to_cpu(kp2->rm_offset));
+               if (x > y)
+                       return 1;
+               else if (y > x)
+                       return -1;
+       }
  
-       x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
-       y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
-       if (x > y)
-               return 1;
-       else if (y > x)
-               return -1;
         return 0;
  }
  
@@ -309,7 +343,7 @@ xfs_rmapbt_verify(
                 return fa;
  
         level = be16_to_cpu(block->bb_level);
-       if (pag && pag->pagf_init) {
+       if (pag && xfs_perag_initialised_agf(pag)) {
                 if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
                         return __this_address;
         } else if (level >= mp->m_rmap_maxlevels)
@@ -383,8 +417,8 @@ xfs_rmapbt_keys_inorder(
                 return 1;
         else if (a > b)
                 return 0;
-       a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
-       b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
+       a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
+       b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
         if (a <= b)
                 return 1;
         return 0;
@@ -413,13 +447,33 @@ xfs_rmapbt_recs_inorder(
                 return 1;
         else if (a > b)
                 return 0;
-       a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
-       b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
+       a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
+       b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
         if (a <= b)
                 return 1;
         return 0;
  }
  
+STATIC enum xbtree_key_contig
+xfs_rmapbt_keys_contiguous(
+       struct xfs_btree_cur            *cur,
+       const union xfs_btree_key       *key1,
+       const union xfs_btree_key       *key2,
+       const union xfs_btree_key       *mask)
+{
+       ASSERT(!mask || mask->rmap.rm_startblock);
+
+       /*
+        * We only support checking contiguity of the physical space component.
+        * If any callers ever need more specificity than that, they'll have to
+        * implement it here.
+        */
+       ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
+
+       return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
+                                be32_to_cpu(key2->rmap.rm_startblock));
+}
+
  static const struct xfs_btree_ops xfs_rmapbt_ops = {
         .rec_len                = sizeof(struct xfs_rmap_rec),
         .key_len                = 2 * sizeof(struct xfs_rmap_key),
@@ -439,6 +493,7 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
         .diff_two_keys          = xfs_rmapbt_diff_two_keys,
         .keys_inorder           = xfs_rmapbt_keys_inorder,
         .recs_inorder           = xfs_rmapbt_recs_inorder,
+       .keys_contiguous        = xfs_rmapbt_keys_contiguous,
  };
  
  static struct xfs_btree_cur *
@@ -451,15 +506,12 @@ xfs_rmapbt_init_common(
  
         /* Overlapping btree; 2 keys per pointer. */
         cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
-                       mp->m_rmap_maxlevels);
+                       mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
         cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
         cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
         cur->bc_ops = &xfs_rmapbt_ops;
  
-       /* take a reference for the cursor */
-       atomic_inc(&pag->pag_ref);
-       cur->bc_ag.pag = pag;
-
+       cur->bc_ag.pag = xfs_perag_hold(pag);
         return cur;
  }
  
@@ -517,6 +569,18 @@ xfs_rmapbt_commit_staged_btree(
         xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
  }
  
+/* Calculate number of records in a reverse mapping btree block. */
+static inline unsigned int
+xfs_rmapbt_block_maxrecs(
+       unsigned int            blocklen,
+       bool                    leaf)
+{
+       if (leaf)
+               return blocklen / sizeof(struct xfs_rmap_rec);
+       return blocklen /
+               (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+}
+
  /*
   * Calculate number of records in an rmap btree block.
   */
@@ -526,11 +590,33 @@ xfs_rmapbt_maxrecs(
         int                     leaf)
  {
         blocklen -= XFS_RMAP_BLOCK_LEN;
+       return xfs_rmapbt_block_maxrecs(blocklen, leaf);
+}
  
-       if (leaf)
-               return blocklen / sizeof(struct xfs_rmap_rec);
-       return blocklen /
-               (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+/* Compute the max possible height for reverse mapping btrees. */
+unsigned int
+xfs_rmapbt_maxlevels_ondisk(void)
+{
+       unsigned int            minrecs[2];
+       unsigned int            blocklen;
+
+       blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
+
+       minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2;
+       minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2;
+
+       /*
+        * Compute the asymptotic maxlevels for an rmapbt on any reflink fs.
+        *
+        * On a reflink filesystem, each AG block can have up to 2^32 (per the
+        * refcount record format) owners, which means that theoretically we
+        * could face up to 2^64 rmap records.  However, we're likely to run
+        * out of blocks in the AG long before that happens, which means that
+        * we must compute the max height based on what the btree will look
+        * like if it consumes almost all the blocks in the AG due to maximal
+        * sharing factor.
+        */
+       return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
  }
  
  /* Compute the maximum height of an rmap btree. */
@@ -538,26 +624,36 @@ void
  xfs_rmapbt_compute_maxlevels(
         struct xfs_mount                *mp)
  {
-       /*
-        * On a non-reflink filesystem, the maximum number of rmap
-        * records is the number of blocks in the AG, hence the max
-        * rmapbt height is log_$maxrecs($agblocks).  However, with
-        * reflink each AG block can have up to 2^32 (per the refcount
-        * record format) owners, which means that theoretically we
-        * could face up to 2^64 rmap records.
-        *
-        * That effectively means that the max rmapbt height must be
-        * XFS_BTREE_MAXLEVELS.  "Fortunately" we'll run out of AG
-        * blocks to feed the rmapbt long before the rmapbt reaches
-        * maximum height.  The reflink code uses ag_resv_critical to
-        * disallow reflinking when less than 10% of the per-AG metadata
-        * block reservation since the fallback is a regular file copy.
-        */
-       if (xfs_has_reflink(mp))
-               mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
-       else
+       if (!xfs_has_rmapbt(mp)) {
+               mp->m_rmap_maxlevels = 0;
+               return;
+       }
+
+       if (xfs_has_reflink(mp)) {
+               /*
+                * Compute the asymptotic maxlevels for an rmap btree on a
+                * filesystem that supports reflink.
+                *
+                * On a reflink filesystem, each AG block can have up to 2^32
+                * (per the refcount record format) owners, which means that
+                * theoretically we could face up to 2^64 rmap records.
+                * However, we're likely to run out of blocks in the AG long
+                * before that happens, which means that we must compute the
+                * max height based on what the btree will look like if it
+                * consumes almost all the blocks in the AG due to maximal
+                * sharing factor.
+                */
+               mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
+                               mp->m_sb.sb_agblocks);
+       } else {
+               /*
+                * If there's no block sharing, compute the maximum rmapbt
+                * height assuming one rmap record per AG block.
+                */
                 mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
                                 mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
+       }
+       ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
  }
  
  /* Calculate the refcount btree size for some records. */
@@ -604,7 +700,7 @@ xfs_rmapbt_calc_reserves(
         if (!xfs_has_rmapbt(mp))
                 return 0;
  
-       error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp);
+       error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
         if (error)
                 return error;
  
@@ -618,8 +714,7 @@ xfs_rmapbt_calc_reserves(
          * never be available for the kinds of things that would require btree
          * expansion.  We therefore can pretend the space isn't there.
          */
-       if (mp->m_sb.sb_logstart &&
-           XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno)
+       if (xfs_ag_contains_log(mp, pag->pag_agno))
                 agblocks -= mp->m_sb.sb_logblocks;
  
         /* Reserve 1% of the AG or enough for 1 block per record. */
@@ -628,3 +723,22 @@ xfs_rmapbt_calc_reserves(
  
         return error;
  }
+
+int __init
+xfs_rmapbt_init_cur_cache(void)
+{
+       xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur",
+                       xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()),
+                       0, 0, NULL);
+
+       if (!xfs_rmapbt_cur_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void
+xfs_rmapbt_destroy_cur_cache(void)
+{
+       kmem_cache_destroy(xfs_rmapbt_cur_cache);
+       xfs_rmapbt_cur_cache = NULL;
+}