]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
iomap: track pending read bytes more optimally
authorJoanne Koong <joannelkoong@gmail.com>
Fri, 26 Sep 2025 00:26:02 +0000 (17:26 -0700)
committerChristian Brauner <brauner@kernel.org>
Wed, 5 Nov 2025 11:57:23 +0000 (12:57 +0100)
Instead of incrementing read_bytes_pending for every folio range read in
(which requires acquiring the spinlock to do so), set read_bytes_pending
to the folio size when the first range is asynchronously read in, keep
track of how many bytes total are asynchronously read in, and adjust
read_bytes_pending accordingly after issuing requests to read in all the
necessary ranges.

iomap_read_folio_ctx->cur_folio_in_bio can be removed since a non-zero
value for pending bytes necessarily indicates the folio is in the bio.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Suggested-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/iomap/buffered-io.c

index 09e65771a9472b6665934365d10d3b012449407d..f8b17ce549eb60d67174c5cf6e1528dc7b14331f 100644 (file)
@@ -362,7 +362,6 @@ static void iomap_read_end_io(struct bio *bio)
 
 struct iomap_read_folio_ctx {
        struct folio            *cur_folio;
-       bool                    cur_folio_in_bio;
        void                    *read_ctx;
        struct readahead_control *rac;
 };
@@ -380,19 +379,11 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
 {
        struct folio *folio = ctx->cur_folio;
        const struct iomap *iomap = &iter->iomap;
-       struct iomap_folio_state *ifs = folio->private;
        size_t poff = offset_in_folio(folio, pos);
        loff_t length = iomap_length(iter);
        sector_t sector;
        struct bio *bio = ctx->read_ctx;
 
-       ctx->cur_folio_in_bio = true;
-       if (ifs) {
-               spin_lock_irq(&ifs->state_lock);
-               ifs->read_bytes_pending += plen;
-               spin_unlock_irq(&ifs->state_lock);
-       }
-
        sector = iomap_sector(iomap, pos);
        if (!bio || bio_end_sector(bio) != sector ||
            !bio_add_folio(bio, folio, plen, poff)) {
@@ -422,8 +413,93 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
        }
 }
 
+static void iomap_read_init(struct folio *folio)
+{
+       struct iomap_folio_state *ifs = folio->private;
+
+       if (ifs) {
+               size_t len = folio_size(folio);
+
+               /*
+                * ifs->read_bytes_pending is used to track how many bytes are
+                * read in asynchronously by the IO helper. We need to track
+                * this so that we can know when the IO helper has finished
+                * reading in all the necessary ranges of the folio and can end
+                * the read.
+                *
+                * Increase ->read_bytes_pending by the folio size to start, and
+                * add a +1 bias. We'll subtract the bias and any uptodate /
+                * zeroed ranges that did not require IO in iomap_read_end()
+                * after we're done processing the folio.
+                *
+                * We do this because otherwise, we would have to increment
+                * ifs->read_bytes_pending every time a range in the folio needs
+                * to be read in, which can get expensive since the spinlock
+                * needs to be held whenever modifying ifs->read_bytes_pending.
+                *
+                * We add the bias to ensure the read has not been ended on the
+                * folio when iomap_read_end() is called, even if the IO helper
+                * has already finished reading in the entire folio.
+                */
+               spin_lock_irq(&ifs->state_lock);
+               ifs->read_bytes_pending += len + 1;
+               spin_unlock_irq(&ifs->state_lock);
+       }
+}
+
+/*
+ * This ends IO if no bytes were submitted to an IO helper.
+ *
+ * Otherwise, this calibrates ifs->read_bytes_pending to represent only the
+ * submitted bytes (see comment in iomap_read_init()). If all bytes submitted
+ * have already been completed by the IO helper, then this will end the read.
+ * Else the IO helper will end the read after all submitted ranges have been
+ * read.
+ */
+static void iomap_read_end(struct folio *folio, size_t bytes_pending)
+{
+       struct iomap_folio_state *ifs;
+
+       /*
+        * If there are no bytes pending, this means we are responsible for
+        * unlocking the folio here, since no IO helper has taken ownership of
+        * it.
+        */
+       if (!bytes_pending) {
+               folio_unlock(folio);
+               return;
+       }
+
+       ifs = folio->private;
+       if (ifs) {
+               bool end_read, uptodate;
+               /*
+                * Subtract any bytes that were initially accounted to
+                * read_bytes_pending but skipped for IO.
+                * The +1 accounts for the bias we added in iomap_read_init().
+                */
+               size_t bytes_accounted = folio_size(folio) + 1 -
+                               bytes_pending;
+
+               spin_lock_irq(&ifs->state_lock);
+               ifs->read_bytes_pending -= bytes_accounted;
+               /*
+                * If !ifs->read_bytes_pending, this means all pending reads
+                * by the IO helper have already completed, which means we need
+                * to end the folio read here. If ifs->read_bytes_pending != 0,
+                * the IO helper will end the folio read.
+                */
+               end_read = !ifs->read_bytes_pending;
+               if (end_read)
+                       uptodate = ifs_is_fully_uptodate(folio, ifs);
+               spin_unlock_irq(&ifs->state_lock);
+               if (end_read)
+                       folio_end_read(folio, uptodate);
+       }
+}
+
 static int iomap_read_folio_iter(struct iomap_iter *iter,
-               struct iomap_read_folio_ctx *ctx)
+               struct iomap_read_folio_ctx *ctx, size_t *bytes_pending)
 {
        const struct iomap *iomap = &iter->iomap;
        loff_t pos = iter->pos;
@@ -460,6 +536,9 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
                        folio_zero_range(folio, poff, plen);
                        iomap_set_range_uptodate(folio, poff, plen);
                } else {
+                       if (!*bytes_pending)
+                               iomap_read_init(folio);
+                       *bytes_pending += plen;
                        iomap_bio_read_folio_range(iter, ctx, pos, plen);
                }
 
@@ -482,17 +561,18 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
        struct iomap_read_folio_ctx ctx = {
                .cur_folio      = folio,
        };
+       size_t bytes_pending = 0;
        int ret;
 
        trace_iomap_readpage(iter.inode, 1);
 
        while ((ret = iomap_iter(&iter, ops)) > 0)
-               iter.status = iomap_read_folio_iter(&iter, &ctx);
+               iter.status = iomap_read_folio_iter(&iter, &ctx,
+                               &bytes_pending);
 
        iomap_bio_submit_read(&ctx);
 
-       if (!ctx.cur_folio_in_bio)
-               folio_unlock(folio);
+       iomap_read_end(folio, bytes_pending);
 
        /*
         * Just like mpage_readahead and block_read_full_folio, we always
@@ -504,24 +584,23 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
 EXPORT_SYMBOL_GPL(iomap_read_folio);
 
 static int iomap_readahead_iter(struct iomap_iter *iter,
-               struct iomap_read_folio_ctx *ctx)
+               struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_pending)
 {
        int ret;
 
        while (iomap_length(iter)) {
                if (ctx->cur_folio &&
                    offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
-                       if (!ctx->cur_folio_in_bio)
-                               folio_unlock(ctx->cur_folio);
+                       iomap_read_end(ctx->cur_folio, *cur_bytes_pending);
                        ctx->cur_folio = NULL;
                }
                if (!ctx->cur_folio) {
                        ctx->cur_folio = readahead_folio(ctx->rac);
                        if (WARN_ON_ONCE(!ctx->cur_folio))
                                return -EINVAL;
-                       ctx->cur_folio_in_bio = false;
+                       *cur_bytes_pending = 0;
                }
-               ret = iomap_read_folio_iter(iter, ctx);
+               ret = iomap_read_folio_iter(iter, ctx, cur_bytes_pending);
                if (ret)
                        return ret;
        }
@@ -554,16 +633,18 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
        struct iomap_read_folio_ctx ctx = {
                .rac    = rac,
        };
+       size_t cur_bytes_pending;
 
        trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
 
        while (iomap_iter(&iter, ops) > 0)
-               iter.status = iomap_readahead_iter(&iter, &ctx);
+               iter.status = iomap_readahead_iter(&iter, &ctx,
+                                       &cur_bytes_pending);
 
        iomap_bio_submit_read(&ctx);
 
-       if (ctx.cur_folio && !ctx.cur_folio_in_bio)
-               folio_unlock(ctx.cur_folio);
+       if (ctx.cur_folio)
+               iomap_read_end(ctx.cur_folio, cur_bytes_pending);
 }
 EXPORT_SYMBOL_GPL(iomap_readahead);