]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
blk-wbt: don't throttle swap writes in direct reclaim
authorBaokun Li <libaokun1@huawei.com>
Tue, 4 Jun 2024 03:05:22 +0000 (11:05 +0800)
committerJens Axboe <axboe@kernel.dk>
Mon, 1 Jul 2024 12:51:53 +0000 (06:51 -0600)
Now we avoid throttling swap writes by determining whether the current
process is kswapd (aka current_is_kswapd()), but swap writes can come
from either kswapd or direct reclaim, so the swap writes from direct
reclaim will still be throttled.

When a process holds a lock to allocate a free page, and enters direct
reclaim because there is no free memory, then it might trigger a hung
due to the wbt throttling that causes other processes to fail to get
the lock.

Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP
instead of current_is_kswapd() to avoid throttling swap writes. Also
renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP.

Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240604030522.3686177-1-libaokun@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-wbt.c

index 1a5e4b049ecd1dcfee6c5a033bfd32947acd5753..6dfc659d22e2b744bde01a7e24426e291d6f7b53 100644 (file)
@@ -37,7 +37,7 @@
 enum wbt_flags {
        WBT_TRACKED             = 1,    /* write, tracked for throttling */
        WBT_READ                = 2,    /* read */
-       WBT_KSWAPD              = 4,    /* write, from kswapd */
+       WBT_SWAP                = 4,    /* write, from swap_writepage() */
        WBT_DISCARD             = 8,    /* discard */
 
        WBT_NR_BITS             = 4,    /* number of bits */
@@ -45,7 +45,7 @@ enum wbt_flags {
 
 enum {
        WBT_RWQ_BG              = 0,
-       WBT_RWQ_KSWAPD,
+       WBT_RWQ_SWAP,
        WBT_RWQ_DISCARD,
        WBT_NUM_RWQ,
 };
@@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb)
 static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
                                          enum wbt_flags wb_acct)
 {
-       if (wb_acct & WBT_KSWAPD)
-               return &rwb->rq_wait[WBT_RWQ_KSWAPD];
+       if (wb_acct & WBT_SWAP)
+               return &rwb->rq_wait[WBT_RWQ_SWAP];
        else if (wb_acct & WBT_DISCARD)
                return &rwb->rq_wait[WBT_RWQ_DISCARD];
 
@@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb)
                time_before(now, rwb->last_comp + HZ / 10);
 }
 
-#define REQ_HIPRIO     (REQ_SYNC | REQ_META | REQ_PRIO)
+#define REQ_HIPRIO     (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP)
 
 static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
 {
@@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
 
        /*
         * At this point we know it's a buffered write. If this is
-        * kswapd trying to free memory, or REQ_SYNC is set, then
+        * swap trying to free memory, or REQ_SYNC is set, then
         * it's WB_SYNC_ALL writeback, and we'll use the max limit for
         * that. If the write is marked as a background write, then use
         * the idle limit, or go to normal if we haven't had competing
         * IO for a bit.
         */
-       if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
+       if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb))
                limit = rwb->rq_depth.max_depth;
        else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
                /*
@@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
        if (bio_op(bio) == REQ_OP_READ) {
                flags = WBT_READ;
        } else if (wbt_should_throttle(bio)) {
-               if (current_is_kswapd())
-                       flags |= WBT_KSWAPD;
+               if (bio->bi_opf & REQ_SWAP)
+                       flags |= WBT_SWAP;
                if (bio_op(bio) == REQ_OP_DISCARD)
                        flags |= WBT_DISCARD;
                flags |= WBT_TRACKED;