]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
iomap: optimize pending async writeback accounting
authorJoanne Koong <joannelkoong@gmail.com>
Tue, 11 Nov 2025 19:36:53 +0000 (11:36 -0800)
committerChristian Brauner <brauner@kernel.org>
Wed, 12 Nov 2025 09:50:32 +0000 (10:50 +0100)
Pending writebacks must be accounted for to determine when all requests
have completed and writeback on the folio should be ended. Currently
this is done by atomically incrementing ifs->write_bytes_pending for
every range to be written back.

Instead, the number of atomic operations can be minimized by setting
ifs->write_bytes_pending to the folio size, internally tracking how many
bytes are written back asynchronously, and then after sending off all
the requests, decrementing ifs->write_bytes_pending by the number of
bytes not written back asynchronously. Now, for N ranges written back,
only N + 2 atomic operations are required instead of 2N + 2.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://patch.msgid.link/20251111193658.3495942-5-joannelkoong@gmail.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/fuse/file.c
fs/iomap/buffered-io.c
fs/iomap/ioend.c
include/linux/iomap.h

index 8275b6681b9baf404c8ebf2d781e0ae5851c6644..b343a6f37563af68215d5e737b076234b4e7ff96 100644 (file)
@@ -1885,7 +1885,8 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
                 * scope of the fi->lock alleviates xarray lock
                 * contention and noticeably improves performance.
                 */
-               iomap_finish_folio_write(inode, ap->folios[i], 1);
+               iomap_finish_folio_write(inode, ap->folios[i],
+                                        ap->descs[i].length);
 
        wake_up(&fi->page_waitq);
 }
@@ -2221,7 +2222,6 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
                ap = &wpa->ia.ap;
        }
 
-       iomap_start_folio_write(inode, folio, 1);
        fuse_writepage_args_page_fill(wpa, folio, ap->num_folios,
                                      offset, len);
        data->nr_bytes += len;
index 0eb439b523b14c6b829cb19db6dfba9bfff7c5b3..1873a2f74883e2ce5103c9d05beb6c1884d1c6d6 100644 (file)
@@ -1641,16 +1641,25 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
 
-void iomap_start_folio_write(struct inode *inode, struct folio *folio,
-               size_t len)
+static void iomap_writeback_init(struct inode *inode, struct folio *folio)
 {
        struct iomap_folio_state *ifs = folio->private;
 
        WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs);
-       if (ifs)
-               atomic_add(len, &ifs->write_bytes_pending);
+       if (ifs) {
+               WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0);
+               /*
+                * Set this to the folio size. After processing the folio for
+                * writeback in iomap_writeback_folio(), we'll subtract any
+                * ranges not written back.
+                *
+                * We do this because otherwise, we would have to atomically
+                * increment ifs->write_bytes_pending every time a range in the
+                * folio needs to be written back.
+                */
+               atomic_set(&ifs->write_bytes_pending, folio_size(folio));
+       }
 }
-EXPORT_SYMBOL_GPL(iomap_start_folio_write);
 
 void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
                size_t len)
@@ -1667,7 +1676,7 @@ EXPORT_SYMBOL_GPL(iomap_finish_folio_write);
 
 static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
                struct folio *folio, u64 pos, u32 rlen, u64 end_pos,
-               bool *wb_pending)
+               size_t *bytes_submitted)
 {
        do {
                ssize_t ret;
@@ -1681,11 +1690,11 @@ static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
                pos += ret;
 
                /*
-                * Holes are not be written back by ->writeback_range, so track
+                * Holes are not written back by ->writeback_range, so track
                 * if we did handle anything that is not a hole here.
                 */
                if (wpc->iomap.type != IOMAP_HOLE)
-                       *wb_pending = true;
+                       *bytes_submitted += ret;
        } while (rlen);
 
        return 0;
@@ -1756,7 +1765,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
        u64 pos = folio_pos(folio);
        u64 end_pos = pos + folio_size(folio);
        u64 end_aligned = 0;
-       bool wb_pending = false;
+       size_t bytes_submitted = 0;
        int error = 0;
        u32 rlen;
 
@@ -1776,14 +1785,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
                        iomap_set_range_dirty(folio, 0, end_pos - pos);
                }
 
-               /*
-                * Keep the I/O completion handler from clearing the writeback
-                * bit until we have submitted all blocks by adding a bias to
-                * ifs->write_bytes_pending, which is dropped after submitting
-                * all blocks.
-                */
-               WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0);
-               iomap_start_folio_write(inode, folio, 1);
+               iomap_writeback_init(inode, folio);
        }
 
        /*
@@ -1798,13 +1800,13 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
        end_aligned = round_up(end_pos, i_blocksize(inode));
        while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
                error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos,
-                               &wb_pending);
+                               &bytes_submitted);
                if (error)
                        break;
                pos += rlen;
        }
 
-       if (wb_pending)
+       if (bytes_submitted)
                wpc->nr_folios++;
 
        /*
@@ -1822,12 +1824,20 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
         * bit ourselves right after unlocking the page.
         */
        if (ifs) {
-               if (atomic_dec_and_test(&ifs->write_bytes_pending))
-                       folio_end_writeback(folio);
-       } else {
-               if (!wb_pending)
-                       folio_end_writeback(folio);
+               /*
+                * Subtract any bytes that were initially accounted to
+                * write_bytes_pending but skipped for writeback.
+                */
+               size_t bytes_not_submitted = folio_size(folio) -
+                               bytes_submitted;
+
+               if (bytes_not_submitted)
+                       iomap_finish_folio_write(inode, folio,
+                                       bytes_not_submitted);
+       } else if (!bytes_submitted) {
+               folio_end_writeback(folio);
        }
+
        mapping_set_error(inode->i_mapping, error);
        return error;
 }
index b49fa75eab260a2e046996c468a91e09628a2687..86f44922ed3b6a52ad1529bd69c9160ff52e8975 100644 (file)
@@ -194,8 +194,6 @@ new_ioend:
        if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff))
                goto new_ioend;
 
-       iomap_start_folio_write(wpc->inode, folio, map_len);
-
        /*
         * Clamp io_offset and io_size to the incore EOF so that ondisk
         * file size updates in the ioend completion are byte-accurate.
index a5032e45607986ae51999794820eed96a8e8c837..b49e47f069dbd1fbe3af3df5d5b0c067df11b454 100644 (file)
@@ -478,8 +478,6 @@ int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error);
 
 void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
                int error);
-void iomap_start_folio_write(struct inode *inode, struct folio *folio,
-               size_t len);
 void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
                size_t len);