xfs: rmap btree requires more reserved free space

author Darrick J. Wong <darrick.wong@oracle.com>

Wed, 10 Aug 2016 04:35:53 +0000 (14:35 +1000)

committer Dave Chinner <david@fromorbit.com>

Wed, 10 Aug 2016 04:35:53 +0000 (14:35 +1000)
author Darrick J. Wong <darrick.wong@oracle.com>
Wed, 10 Aug 2016 04:35:53 +0000 (14:35 +1000)
committer Dave Chinner <david@fromorbit.com>
Wed, 10 Aug 2016 04:35:53 +0000 (14:35 +1000)
diff --git a/include/xfs_mount.h b/include/xfs_mount.h

index 7d63c93d3cc84937c5205ac986f521aaebbb7e31..5cd94644109c9f28b57cd6ecafc4a2e6b1847124 100644 (file)
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -71,6 +71,8 @@ typedef struct xfs_mount {
         uint                    m_in_maxlevels; /* XFS_IN_MAXLEVELS */
         uint                    m_rmap_maxlevels; /* max rmap btree levels */
         xfs_extlen_t            m_ag_prealloc_blocks; /* reserved ag blocks */
+       uint                    m_alloc_set_aside; /* space we can't use */
+       uint                    m_ag_max_usable; /* max space per AG */
         struct radix_tree_root  m_perag_tree;
         uint                    m_flags;        /* global mount flags */
         uint                    m_qflags;       /* quota status flags */
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c

index 1678bc18e31a612b4107f07cb4931e10d58e7954..a884e434be0e3533bbbc4c56f4fd29231c147eaf 100644 (file)
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -58,6 +58,70 @@ xfs_prealloc_blocks(
         return XFS_IBT_BLOCK(mp) + 1;
  }
  
+/*
+ * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
+ * AGF buffer (PV 947395), we place constraints on the relationship among
+ * actual allocations for data blocks, freelist blocks, and potential file data
+ * bmap btree blocks. However, these restrictions may result in no actual space
+ * allocated for a delayed extent, for example, a data block in a certain AG is
+ * allocated but there is no additional block for the additional bmap btree
+ * block due to a split of the bmap btree of the file. The result of this may
+ * lead to an infinite loop when the file gets flushed to disk and all delayed
+ * extents need to be actually allocated. To get around this, we explicitly set
+ * aside a few blocks which will not be reserved in delayed allocation.
+ *
+ * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist
+ * and 4 more to handle a potential split of the file's bmap btree.
+ *
+ * When rmap is enabled, we must also be able to handle two rmap btree inserts
+ * to record both the file data extent and a new bmbt block.  The bmbt block
+ * might not be in the same AG as the file data extent.  In the worst case
+ * the bmap btree splits multiple levels and all the new blocks come from
+ * different AGs, so set aside enough to handle rmap btree splits in all AGs.
+ */
+unsigned int
+xfs_alloc_set_aside(
+       struct xfs_mount        *mp)
+{
+       unsigned int            blocks;
+
+       blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
+       return blocks;
+}
+
+/*
+ * When deciding how much space to allocate out of an AG, we limit the
+ * allocation maximum size to the size the AG. However, we cannot use all the
+ * blocks in the AG - some are permanently used by metadata. These
+ * blocks are generally:
+ *     - the AG superblock, AGF, AGI and AGFL
+ *     - the AGF (bno and cnt) and AGI btree root blocks, and optionally
+ *       the AGI free inode and rmap btree root blocks.
+ *     - blocks on the AGFL according to xfs_alloc_set_aside() limits
+ *     - the rmapbt root block
+ *
+ * The AG headers are sector sized, so the amount of space they take up is
+ * dependent on filesystem geometry. The others are all single blocks.
+ */
+unsigned int
+xfs_alloc_ag_max_usable(
+       struct xfs_mount        *mp)
+{
+       unsigned int            blocks;
+
+       blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
+       blocks += XFS_ALLOC_AGFL_RESERVE;
+       blocks += 3;                    /* AGF, AGI btree root blocks */
+       if (xfs_sb_version_hasfinobt(&mp->m_sb))
+               blocks++;               /* finobt root block */
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               blocks++;               /* rmap root block */
+
+       return mp->m_sb.sb_agblocks - blocks;
+}
+
  /*
   * Lookup the record equal to [bno, len] in the btree given by cur.
   */
@@ -1900,6 +1964,11 @@ xfs_alloc_min_freelist(
         /* space needed by-size freespace btree */
         min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
                                        mp->m_ag_maxlevels);
+       /* space needed reverse mapping used space btree */
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               min_free += min_t(unsigned int,
+                                 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
+                                 mp->m_rmap_maxlevels);
  
         return min_free;
  }
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h

index 88053422fda036478aa1c677ee77f323ae0a7677..360f9e42553612867cfe7f196c33a50a6959292c 100644 (file)
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -55,42 +55,6 @@ typedef unsigned int xfs_alloctype_t;
  #define        XFS_ALLOC_FLAG_TRYLOCK  0x00000001  /* use trylock for buffer locking */
  #define        XFS_ALLOC_FLAG_FREEING  0x00000002  /* indicate caller is freeing extents*/
  
-/*
- * In order to avoid ENOSPC-related deadlock caused by
- * out-of-order locking of AGF buffer (PV 947395), we place
- * constraints on the relationship among actual allocations for
- * data blocks, freelist blocks, and potential file data bmap
- * btree blocks. However, these restrictions may result in no
- * actual space allocated for a delayed extent, for example, a data
- * block in a certain AG is allocated but there is no additional
- * block for the additional bmap btree block due to a split of the
- * bmap btree of the file. The result of this may lead to an
- * infinite loop in xfssyncd when the file gets flushed to disk and
- * all delayed extents need to be actually allocated. To get around
- * this, we explicitly set aside a few blocks which will not be
- * reserved in delayed allocation. Considering the minimum number of
- * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
- * btree requires 1 fsb, so we set the number of set-aside blocks
- * to 4 + 4*agcount.
- */
-#define XFS_ALLOC_SET_ASIDE(mp)  (4 + ((mp)->m_sb.sb_agcount * 4))
-
-/*
- * When deciding how much space to allocate out of an AG, we limit the
- * allocation maximum size to the size the AG. However, we cannot use all the
- * blocks in the AG - some are permanently used by metadata. These
- * blocks are generally:
- *     - the AG superblock, AGF, AGI and AGFL
- *     - the AGF (bno and cnt) and AGI btree root blocks
- *     - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
- *
- * The AG headers are sector sized, so the amount of space they take up is
- * dependent on filesystem geometry. The others are all single blocks.
- */
-#define XFS_ALLOC_AG_MAX_USABLE(mp)    \
-       ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
-
-
  /*
   * Argument structure for xfs_alloc routines.
   * This is turned into a structure to avoid having 20 arguments passed
@@ -133,6 +97,11 @@ typedef struct xfs_alloc_arg {
  #define XFS_ALLOC_INITIAL_USER_DATA    (1 << 1)/* special case start of file */
  #define XFS_ALLOC_USERDATA_ZERO                (1 << 2)/* zero extent on allocation */
  
+/* freespace limit calculations */
+#define XFS_ALLOC_AGFL_RESERVE 4
+unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
+unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
+
  xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
                 struct xfs_perag *pag, xfs_extlen_t need);
  unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c

index 1ec2d0a63e1c93f97834fa2886f3ceccd49edb62..8c075fcf33a2fdd76948a7c8dcb0d0745c9c546e 100644 (file)
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -3666,7 +3666,7 @@ xfs_bmap_btalloc(
         xfs_rmap_skip_owner_update(&args.oinfo);
  
         /* Trim the allocation back to the maximum an AG can fit. */
-       args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
+       args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
         args.firstblock = *ap->firstblock;
         blen = 0;
         if (nullfb) {
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c

index 67e099bb42dddd3a7bd0c4dc8ec06e9e69ac902e..4ff3677247b9e3cfe04602b2b781edcb1a590965 100644 (file)
--- a/libxfs/xfs_sb.c
+++ b/libxfs/xfs_sb.c
@@ -730,6 +730,8 @@ xfs_sb_mount_common(
                 mp->m_ialloc_min_blks = sbp->sb_spino_align;
         else
                 mp->m_ialloc_min_blks = mp->m_ialloc_blks;
+       mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+       mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
  }
  
  /*
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c

index 0852a5344b2a364295ed8a683c8a3894c1eaf710..dad809a0024bf8ab7132fafe8c56aaa24e1c551c 100644 (file)
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -2655,7 +2655,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
                  */
                 if (!logsize) {
                         logblocks = MIN(logblocks,
-                                       XFS_ALLOC_AG_MAX_USABLE(mp));
+                                       xfs_alloc_ag_max_usable(mp));
  
                         /* revalidate the log size is valid if we changed it */
                         validate_log_size(logblocks, blocklog, min_logblocks);
author	Darrick J. Wong <darrick.wong@oracle.com>
	Wed, 10 Aug 2016 04:35:53 +0000 (14:35 +1000)
committer	Dave Chinner <david@fromorbit.com>
	Wed, 10 Aug 2016 04:35:53 +0000 (14:35 +1000)
include/xfs_mount.h		patch \| blob \| blame \| history
libxfs/xfs_alloc.c		patch \| blob \| blame \| history
libxfs/xfs_alloc.h		patch \| blob \| blame \| history
libxfs/xfs_bmap.c		patch \| blob \| blame \| history
libxfs/xfs_sb.c		patch \| blob \| blame \| history
mkfs/xfs_mkfs.c		patch \| blob \| blame \| history