]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs: improve handling of busy extents in the low-level allocator
authorChristoph Hellwig <hch@lst.de>
Tue, 4 Apr 2017 20:37:44 +0000 (15:37 -0500)
committerEric Sandeen <sandeen@redhat.com>
Tue, 4 Apr 2017 20:37:44 +0000 (15:37 -0500)
Source kernel commit: ebf55872616c7d4754db5a318591a72a8d5e6896

Currently we force the log and simply try again if we hit a busy extent,
but especially with online discard enabled it might take a while after
the log force for the busy extents to disappear, and we might have
already completed our second pass.

So instead we add a new waitqueue and a generation counter to the pag
structure so that we can do wakeups once we've removed busy extents,
and we replace the single retry with an unconditional one - after
all we hold the AGF buffer lock, so no other allocations or frees
can be racing with us in this AG.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
libxfs/libxfs_priv.h
libxfs/xfs_alloc.c

index 14ae78897d46708ce61839455a4ce74a9bd1129c..839150088098d39649c9dd3680e6635403767da3 100644 (file)
@@ -411,11 +411,12 @@ roundup_64(__uint64_t x, __uint32_t y)
        xfs_agnumber_t __foo = ag;                      \
        __foo = __foo; /* no set-but-unused warning */  \
 })
-#define xfs_extent_busy_trim(args,fbno,flen,bno,len) \
-do { \
-       *(bno) = (fbno); \
-       *(len) = (flen); \
-} while (0)
+#define xfs_extent_busy_trim(args,bno,len,busy_gen)    ({      \
+       unsigned __foo = *(busy_gen);                           \
+       *(busy_gen) = __foo;                                    \
+       false;                                                  \
+})
+#define xfs_extent_busy_flush(mp,pag,busy_gen)         (0)
 
 /* avoid unused variable warning */
 #define xfs_alloc_busy_insert(tp,ag,b,len)     ({      \
index c8797571286abcab3da626849a35f5650f72fe32..81695bfb18c6cd21e368c8ddf8bd3f016eff83f1 100644 (file)
@@ -217,20 +217,22 @@ xfs_alloc_get_rec(
  * Compute aligned version of the found extent.
  * Takes alignment and min length into account.
  */
-STATIC void
+STATIC bool
 xfs_alloc_compute_aligned(
        xfs_alloc_arg_t *args,          /* allocation argument structure */
        xfs_agblock_t   foundbno,       /* starting block in found extent */
        xfs_extlen_t    foundlen,       /* length in found extent */
        xfs_agblock_t   *resbno,        /* result block number */
-       xfs_extlen_t    *reslen)        /* result length */
+       xfs_extlen_t    *reslen,        /* result length */
+       unsigned        *busy_gen)
 {
-       xfs_agblock_t   bno;
-       xfs_extlen_t    len;
+       xfs_agblock_t   bno = foundbno;
+       xfs_extlen_t    len = foundlen;
        xfs_extlen_t    diff;
+       bool            busy;
 
        /* Trim busy sections out of found extent */
-       xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
+       busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
 
        /*
         * If we have a largish extent that happens to start before min_agbno,
@@ -255,6 +257,8 @@ xfs_alloc_compute_aligned(
                *resbno = bno;
                *reslen = len;
        }
+
+       return busy;
 }
 
 /*
@@ -733,10 +737,11 @@ xfs_alloc_ag_vextent_exact(
        int             error;
        xfs_agblock_t   fbno;   /* start block of found extent */
        xfs_extlen_t    flen;   /* length of found extent */
-       xfs_agblock_t   tbno;   /* start block of trimmed extent */
-       xfs_extlen_t    tlen;   /* length of trimmed extent */
-       xfs_agblock_t   tend;   /* end block of trimmed extent */
+       xfs_agblock_t   tbno;   /* start block of busy extent */
+       xfs_extlen_t    tlen;   /* length of busy extent */
+       xfs_agblock_t   tend;   /* end block of busy extent */
        int             i;      /* success/failure of operation */
+       unsigned        busy_gen;
 
        ASSERT(args->alignment == 1);
 
@@ -769,7 +774,9 @@ xfs_alloc_ag_vextent_exact(
        /*
         * Check for overlapping busy extents.
         */
-       xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
+       tbno = fbno;
+       tlen = flen;
+       xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
 
        /*
         * Give up if the start of the extent is busy, or the freespace isn't
@@ -849,6 +856,7 @@ xfs_alloc_find_best_extent(
        xfs_agblock_t           sdiff;
        int                     error;
        int                     i;
+       unsigned                busy_gen;
 
        /* The good extent is perfect, no need to  search. */
        if (!gdiff)
@@ -862,7 +870,8 @@ xfs_alloc_find_best_extent(
                if (error)
                        goto error0;
                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
-               xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
+               xfs_alloc_compute_aligned(args, *sbno, *slen,
+                               sbnoa, slena, &busy_gen);
 
                /*
                 * The good extent is closer than this one.
@@ -951,7 +960,8 @@ xfs_alloc_ag_vextent_near(
        xfs_extlen_t    ltlena;         /* aligned ... */
        xfs_agblock_t   ltnew;          /* useful start bno of left side */
        xfs_extlen_t    rlen;           /* length of returned extent */
-       int             forced = 0;
+       bool            busy;
+       unsigned        busy_gen;
 #ifdef DEBUG
        /*
         * Randomly don't execute the first algorithm.
@@ -978,6 +988,7 @@ restart:
        ltlen = 0;
        gtlena = 0;
        ltlena = 0;
+       busy = false;
 
        /*
         * Get a cursor for the by-size btree.
@@ -1060,8 +1071,8 @@ restart:
                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
-                       xfs_alloc_compute_aligned(args, ltbno, ltlen,
-                                                 &ltbnoa, &ltlena);
+                       busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
+                                       &ltbnoa, &ltlena, &busy_gen);
                        if (ltlena < args->minlen)
                                continue;
                        if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
@@ -1179,8 +1190,8 @@ restart:
                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
-                       xfs_alloc_compute_aligned(args, ltbno, ltlen,
-                                                 &ltbnoa, &ltlena);
+                       busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
+                                       &ltbnoa, &ltlena, &busy_gen);
                        if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
                                break;
                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
@@ -1195,8 +1206,8 @@ restart:
                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
-                       xfs_alloc_compute_aligned(args, gtbno, gtlen,
-                                                 &gtbnoa, &gtlena);
+                       busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
+                                       &gtbnoa, &gtlena, &busy_gen);
                        if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
                                break;
                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
@@ -1257,9 +1268,9 @@ restart:
        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 
-               if (!forced++) {
+               if (busy) {
                        trace_xfs_alloc_near_busy(args);
-                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                       xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
                        goto restart;
                }
                trace_xfs_alloc_size_neither(args);
@@ -1340,7 +1351,8 @@ xfs_alloc_ag_vextent_size(
        int             i;              /* temp status variable */
        xfs_agblock_t   rbno;           /* returned block number */
        xfs_extlen_t    rlen;           /* length of returned extent */
-       int             forced = 0;
+       bool            busy;
+       unsigned        busy_gen;
 
 restart:
        /*
@@ -1349,6 +1361,7 @@ restart:
        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
                args->agno, XFS_BTNUM_CNT);
        bno_cur = NULL;
+       busy = false;
 
        /*
         * Look for an entry >= maxlen+alignment-1 blocks.
@@ -1358,14 +1371,13 @@ restart:
                goto error0;
 
        /*
-        * If none or we have busy extents that we cannot allocate from, then
-        * we have to settle for a smaller extent. In the case that there are
-        * no large extents, this will return the last entry in the tree unless
-        * the tree is empty. In the case that there are only busy large
-        * extents, this will return the largest small extent unless there
+        * If none then we have to settle for a smaller extent. In the case that
+        * there are no large extents, this will return the last entry in the
+        * tree unless the tree is empty. In the case that there are only busy
+        * large extents, this will return the largest small extent unless there
         * are no smaller extents available.
         */
-       if (!i || forced > 1) {
+       if (!i) {
                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
                                                   &fbno, &flen, &i);
                if (error)
@@ -1376,13 +1388,11 @@ restart:
                        return 0;
                }
                ASSERT(i == 1);
-               xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
+               busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
+                               &rlen, &busy_gen);
        } else {
                /*
                 * Search for a non-busy extent that is large enough.
-                * If we are at low space, don't check, or if we fall of
-                * the end of the btree, turn off the busy check and
-                * restart.
                 */
                for (;;) {
                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
@@ -1390,8 +1400,8 @@ restart:
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 
-                       xfs_alloc_compute_aligned(args, fbno, flen,
-                                                 &rbno, &rlen);
+                       busy = xfs_alloc_compute_aligned(args, fbno, flen,
+                                       &rbno, &rlen, &busy_gen);
 
                        if (rlen >= args->maxlen)
                                break;
@@ -1403,18 +1413,13 @@ restart:
                                /*
                                 * Our only valid extents must have been busy.
                                 * Make it unbusy by forcing the log out and
-                                * retrying. If we've been here before, forcing
-                                * the log isn't making the extents available,
-                                * which means they have probably been freed in
-                                * this transaction.  In that case, we have to
-                                * give up on them and we'll attempt a minlen
-                                * allocation the next time around.
+                                * retrying.
                                 */
                                xfs_btree_del_cursor(cnt_cur,
                                                     XFS_BTREE_NOERROR);
                                trace_xfs_alloc_size_busy(args);
-                               if (!forced++)
-                                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                               xfs_extent_busy_flush(args->mp,
+                                                       args->pag, busy_gen);
                                goto restart;
                        }
                }
@@ -1450,8 +1455,8 @@ restart:
                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                        if (flen < bestrlen)
                                break;
-                       xfs_alloc_compute_aligned(args, fbno, flen,
-                                                 &rbno, &rlen);
+                       busy = xfs_alloc_compute_aligned(args, fbno, flen,
+                                       &rbno, &rlen, &busy_gen);
                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
                        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
                                (rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1480,10 +1485,10 @@ restart:
         */
        args->len = rlen;
        if (rlen < args->minlen) {
-               if (!forced++) {
+               if (busy) {
                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
                        trace_xfs_alloc_size_busy(args);
-                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                       xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
                        goto restart;
                }
                goto out_nominleft;