]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs: preallocate blocks for worst-case btree expansion
authorDarrick J. Wong <darrick.wong@oracle.com>
Tue, 25 Oct 2016 01:43:48 +0000 (12:43 +1100)
committerDave Chinner <david@fromorbit.com>
Tue, 25 Oct 2016 01:43:48 +0000 (12:43 +1100)
Source kernel commit: 84d6961910ea7b3ae8d8338f5b4df25dea68cee9

To gracefully handle the situation where a CoW operation turns a
single refcount extent into a lot of tiny ones and then run out of
space when a tree split has to happen, use the per-AG reserved block
pool to pre-allocate all the space we'll ever need for a maximal
btree.  For a 4K block size, this only costs an overhead of 0.3% of
available disk space.

When reflink is enabled, we have an unfortunate problem with rmap --
since we can share a block billions of times, this means that the
reverse mapping btree can expand basically infinitely.  When an AG is
so full that there are no free blocks with which to expand the rmapbt,
the filesystem will shut down hard.

This is rather annoying to the user, so use the AG reservation code to
reserve a "reasonable" amount of space for rmap.  We'll prevent
reflinks and CoW operations if we think we're getting close to
exhausting an AG's free space rather than shutting down, but this
permanent reservation should be enough for "most" users.  Hopefully.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
[hch@lst.de: ensure that we invalidate the freed btree buffer]
Signed-off-by: Christoph Hellwig <hch@lst.de>
libxfs/xfs_ag_resv.c
libxfs/xfs_refcount_btree.c
libxfs/xfs_refcount_btree.h
libxfs/xfs_rmap_btree.c
libxfs/xfs_rmap_btree.h

index 0869cc70fe88ebd36f8838256cec63fe74d37310..7e03328f99a547db0fcb3f147bb51e709d2cafcd 100644 (file)
@@ -37,6 +37,7 @@
 #include "xfs_trans_space.h"
 #include "xfs_rmap_btree.h"
 #include "xfs_btree.h"
+#include "xfs_refcount_btree.h"
 
 /*
  * Per-AG Block Reservations
@@ -227,6 +228,11 @@ xfs_ag_resv_init(
        if (pag->pag_meta_resv.ar_asked == 0) {
                ask = used = 0;
 
+               error = xfs_refcountbt_calc_reserves(pag->pag_mount,
+                               pag->pag_agno, &ask, &used);
+               if (error)
+                       goto out;
+
                error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
                                ask, used);
                if (error)
@@ -237,6 +243,11 @@ xfs_ag_resv_init(
        if (pag->pag_agfl_resv.ar_asked == 0) {
                ask = used = 0;
 
+               error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno,
+                               &ask, &used);
+               if (error)
+                       goto out;
+
                error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
                if (error)
                        goto out;
index e153c53efa3ca0c6eb17ed3c3532f1d3e6af33d8..50c4682ef9068f26c79390332ad94d7b9a4e2f42 100644 (file)
@@ -78,6 +78,8 @@ xfs_refcountbt_alloc_block(
        struct xfs_alloc_arg    args;           /* block allocation args */
        int                     error;          /* error return value */
 
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
        memset(&args, 0, sizeof(args));
        args.tp = cur->bc_tp;
        args.mp = cur->bc_mp;
@@ -87,6 +89,7 @@ xfs_refcountbt_alloc_block(
        args.firstblock = args.fsbno;
        xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_REFC);
        args.minlen = args.maxlen = args.prod = 1;
+       args.resv = XFS_AG_RESV_METADATA;
 
        error = xfs_alloc_vextent(&args);
        if (error)
@@ -124,16 +127,19 @@ xfs_refcountbt_free_block(
        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
        xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
        struct xfs_owner_info   oinfo;
+       int                     error;
 
        trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
                        XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
        xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
        be32_add_cpu(&agf->agf_refcount_blocks, -1);
        xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
-       xfs_bmap_add_free(mp, cur->bc_private.a.dfops, fsbno, 1,
-                       &oinfo);
+       error = xfs_free_extent(cur->bc_tp, fsbno, 1, &oinfo,
+                       XFS_AG_RESV_METADATA);
+       if (error)
+               return error;
 
-       return 0;
+       return error;
 }
 
 STATIC int
@@ -386,3 +392,59 @@ xfs_refcountbt_compute_maxlevels(
        mp->m_refc_maxlevels = xfs_btree_compute_maxlevels(mp,
                        mp->m_refc_mnr, mp->m_sb.sb_agblocks);
 }
+
+/* Calculate the refcount btree size for some records. */
+xfs_extlen_t
+xfs_refcountbt_calc_size(
+       struct xfs_mount        *mp,
+       unsigned long long      len)
+{
+       return xfs_btree_calc_size(mp, mp->m_refc_mnr, len);
+}
+
+/*
+ * Calculate the maximum refcount btree size.
+ */
+xfs_extlen_t
+xfs_refcountbt_max_size(
+       struct xfs_mount        *mp)
+{
+       /* Bail out if we're uninitialized, which can happen in mkfs. */
+       if (mp->m_refc_mxr[0] == 0)
+               return 0;
+
+       return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks);
+}
+
+/*
+ * Figure out how many blocks to reserve and how many are used by this btree.
+ */
+int
+xfs_refcountbt_calc_reserves(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_extlen_t            *ask,
+       xfs_extlen_t            *used)
+{
+       struct xfs_buf          *agbp;
+       struct xfs_agf          *agf;
+       xfs_extlen_t            tree_len;
+       int                     error;
+
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return 0;
+
+       *ask += xfs_refcountbt_max_size(mp);
+
+       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       if (error)
+               return error;
+
+       agf = XFS_BUF_TO_AGF(agbp);
+       tree_len = be32_to_cpu(agf->agf_refcount_blocks);
+       xfs_buf_relse(agbp);
+
+       *used += tree_len;
+
+       return error;
+}
index 9e9ad7c6d267eb2f1fc7fb5ed296dbe3d7211d38..3be7768bd51a1c0ebd8c2ccc6e930a730bd39b54 100644 (file)
@@ -64,4 +64,11 @@ extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen,
                bool leaf);
 extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp);
 
+extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp,
+               unsigned long long len);
+extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp);
+
+extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp,
+               xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
+
 #endif /* __XFS_REFCOUNT_BTREE_H__ */
index e4ace774ab8ffef1bcc224e071b410593ff6ae63..d11112aaceb5a31900abf81e4c027acd7a573c30 100644 (file)
@@ -33,6 +33,7 @@
 #include "xfs_rmap_btree.h"
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
+#include "xfs_ag_resv.h"
 
 /*
  * Reverse map btree.
@@ -531,3 +532,62 @@ xfs_rmapbt_compute_maxlevels(
                mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp,
                                mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
 }
+
+/* Calculate the refcount btree size for some records. */
+xfs_extlen_t
+xfs_rmapbt_calc_size(
+       struct xfs_mount        *mp,
+       unsigned long long      len)
+{
+       return xfs_btree_calc_size(mp, mp->m_rmap_mnr, len);
+}
+
+/*
+ * Calculate the maximum refcount btree size.
+ */
+xfs_extlen_t
+xfs_rmapbt_max_size(
+       struct xfs_mount        *mp)
+{
+       /* Bail out if we're uninitialized, which can happen in mkfs. */
+       if (mp->m_rmap_mxr[0] == 0)
+               return 0;
+
+       return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks);
+}
+
+/*
+ * Figure out how many blocks to reserve and how many are used by this btree.
+ */
+int
+xfs_rmapbt_calc_reserves(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_extlen_t            *ask,
+       xfs_extlen_t            *used)
+{
+       struct xfs_buf          *agbp;
+       struct xfs_agf          *agf;
+       xfs_extlen_t            pool_len;
+       xfs_extlen_t            tree_len;
+       int                     error;
+
+       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return 0;
+
+       /* Reserve 1% of the AG or enough for 1 block per record. */
+       pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp));
+       *ask += pool_len;
+
+       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       if (error)
+               return error;
+
+       agf = XFS_BUF_TO_AGF(agbp);
+       tree_len = be32_to_cpu(agf->agf_rmap_blocks);
+       xfs_buf_relse(agbp);
+
+       *used += tree_len;
+
+       return error;
+}
index 5ff9cfa18cfd3e5918d97d8c77ae9dff24c58ad1..f3137a3118532595c883c0c434e399dbdb8f5a77 100644 (file)
@@ -58,4 +58,11 @@ struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
 int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
 extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
 
+extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
+               unsigned long long len);
+extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp);
+
+extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp,
+               xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
+
 #endif /* __XFS_RMAP_BTREE_H__ */