6.16-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 22 Aug 2025 12:31:08 +0000 (14:31 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 22 Aug 2025 12:31:08 +0000 (14:31 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 Aug 2025 12:31:08 +0000 (14:31 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 Aug 2025 12:31:08 +0000 (14:31 +0200)
diff --git a/queue-6.16/ext4-correct-the-reserved-credits-for-extent-conversion.patch b/queue-6.16/ext4-correct-the-reserved-credits-for-extent-conversion.patch

new file mode 100644 (file)

index 0000000..494e6c1
--- /dev/null
+++ b/queue-6.16/ext4-correct-the-reserved-credits-for-extent-conversion.patch
@@ -0,0 +1,46 @@
+From 95ad8ee45cdbc321c135a2db895d48b374ef0f87 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:10 +0800
+Subject: ext4: correct the reserved credits for extent conversion
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 95ad8ee45cdbc321c135a2db895d48b374ef0f87 upstream.
+
+Now, we reserve journal credits for converting extents in only one page
+to written state when the I/O operation is complete. This is
+insufficient when large folio is enabled.
+
+Fix this by reserving credits for converting up to one extent per block in
+the largest 2MB folio, this calculation should only involve extents index
+and leaf blocks, so it should not estimate too many credits.
+
+Fixes: 7ac67301e82f ("ext4: enable large folio for regular file")
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Link: https://patch.msgid.link/20250707140814.542883-8-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2850,12 +2850,12 @@ static int ext4_do_writepages(struct mpa
+       mpd->journalled_more_data = 0;
+ 
+       if (ext4_should_dioread_nolock(inode)) {
++              int bpf = ext4_journal_blocks_per_folio(inode);
+               /*
+                * We may need to convert up to one extent per block in
+-               * the page and we may dirty the inode.
++               * the folio and we may dirty the inode.
+                */
+-              rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
+-                                              PAGE_SIZE >> inode->i_blkbits);
++              rsv_blocks = 1 + ext4_ext_index_trans_blocks(inode, bpf);
+       }
+ 
+       if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
diff --git a/queue-6.16/ext4-enhance-tracepoints-during-the-folios-writeback.patch b/queue-6.16/ext4-enhance-tracepoints-during-the-folios-writeback.patch

new file mode 100644 (file)

index 0000000..4a4f523
--- /dev/null
+++ b/queue-6.16/ext4-enhance-tracepoints-during-the-folios-writeback.patch
@@ -0,0 +1,121 @@
+From 6b132759b0fe78e518abafb62190c294100db6d6 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:09 +0800
+Subject: ext4: enhance tracepoints during the folios writeback
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 6b132759b0fe78e518abafb62190c294100db6d6 upstream.
+
+After mpage_map_and_submit_extent() supports restarting handle if
+credits are insufficient during allocating blocks, it is more likely to
+exit the current mapping iteration and continue to process the current
+processing partially mapped folio again. The existing tracepoints are
+not sufficient to track this situation, so enhance the tracepoints to
+track the writeback position and the return value before and after
+submitting the folios.
+
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20250707140814.542883-7-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c             |    5 ++++-
+ include/trace/events/ext4.h |   42 +++++++++++++++++++++++++++++++++++++-----
+ 2 files changed, 41 insertions(+), 6 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2936,7 +2936,8 @@ retry:
+               }
+               mpd->do_map = 1;
+ 
+-              trace_ext4_da_write_pages(inode, mpd->start_pos, wbc);
++              trace_ext4_da_write_folios_start(inode, mpd->start_pos,
++                              mpd->next_pos, wbc);
+               ret = mpage_prepare_extent_to_map(mpd);
+               if (!ret && mpd->map.m_len)
+                       ret = mpage_map_and_submit_extent(handle, mpd,
+@@ -2974,6 +2975,8 @@ retry:
+               } else
+                       ext4_put_io_end(mpd->io_submit.io_end);
+               mpd->io_submit.io_end = NULL;
++              trace_ext4_da_write_folios_end(inode, mpd->start_pos,
++                              mpd->next_pos, wbc, ret);
+ 
+               if (ret == -ENOSPC && sbi->s_journal) {
+                       /*
+--- a/include/trace/events/ext4.h
++++ b/include/trace/events/ext4.h
+@@ -482,16 +482,17 @@ TRACE_EVENT(ext4_writepages,
+                 (unsigned long) __entry->writeback_index)
+ );
+ 
+-TRACE_EVENT(ext4_da_write_pages,
+-      TP_PROTO(struct inode *inode, loff_t start_pos,
++TRACE_EVENT(ext4_da_write_folios_start,
++      TP_PROTO(struct inode *inode, loff_t start_pos, loff_t next_pos,
+                struct writeback_control *wbc),
+ 
+-      TP_ARGS(inode, start_pos, wbc),
++      TP_ARGS(inode, start_pos, next_pos, wbc),
+ 
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(       loff_t,  start_pos               )
++              __field(       loff_t,  next_pos                )
+               __field(         long,  nr_to_write             )
+               __field(          int,  sync_mode               )
+       ),
+@@ -500,16 +501,47 @@ TRACE_EVENT(ext4_da_write_pages,
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->ino            = inode->i_ino;
+               __entry->start_pos      = start_pos;
++              __entry->next_pos       = next_pos;
+               __entry->nr_to_write    = wbc->nr_to_write;
+               __entry->sync_mode      = wbc->sync_mode;
+       ),
+ 
+-      TP_printk("dev %d,%d ino %lu start_pos 0x%llx nr_to_write %ld sync_mode %d",
++      TP_printk("dev %d,%d ino %lu start_pos 0x%llx next_pos 0x%llx nr_to_write %ld sync_mode %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+-                (unsigned long) __entry->ino, __entry->start_pos,
++                (unsigned long) __entry->ino, __entry->start_pos, __entry->next_pos,
+                 __entry->nr_to_write, __entry->sync_mode)
+ );
+ 
++TRACE_EVENT(ext4_da_write_folios_end,
++      TP_PROTO(struct inode *inode, loff_t start_pos, loff_t next_pos,
++               struct writeback_control *wbc, int ret),
++
++      TP_ARGS(inode, start_pos, next_pos, wbc, ret),
++
++      TP_STRUCT__entry(
++              __field(        dev_t,  dev                     )
++              __field(        ino_t,  ino                     )
++              __field(       loff_t,  start_pos               )
++              __field(       loff_t,  next_pos                )
++              __field(         long,  nr_to_write             )
++              __field(          int,  ret                     )
++      ),
++
++      TP_fast_assign(
++              __entry->dev            = inode->i_sb->s_dev;
++              __entry->ino            = inode->i_ino;
++              __entry->start_pos      = start_pos;
++              __entry->next_pos       = next_pos;
++              __entry->nr_to_write    = wbc->nr_to_write;
++              __entry->ret            = ret;
++      ),
++
++      TP_printk("dev %d,%d ino %lu start_pos 0x%llx next_pos 0x%llx nr_to_write %ld ret %d",
++                MAJOR(__entry->dev), MINOR(__entry->dev),
++                (unsigned long) __entry->ino, __entry->start_pos, __entry->next_pos,
++                __entry->nr_to_write, __entry->ret)
++);
++
+ TRACE_EVENT(ext4_da_write_pages_extent,
+       TP_PROTO(struct inode *inode, struct ext4_map_blocks *map),
+ 
diff --git a/queue-6.16/ext4-fix-stale-data-if-it-bail-out-of-the-extents-mapping-loop.patch b/queue-6.16/ext4-fix-stale-data-if-it-bail-out-of-the-extents-mapping-loop.patch

new file mode 100644 (file)

index 0000000..f9b9656
--- /dev/null
+++ b/queue-6.16/ext4-fix-stale-data-if-it-bail-out-of-the-extents-mapping-loop.patch
@@ -0,0 +1,102 @@
+From ded2d726a3041fce8afd88005cbfe15cd4737702 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:06 +0800
+Subject: ext4: fix stale data if it bail out of the extents mapping loop
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit ded2d726a3041fce8afd88005cbfe15cd4737702 upstream.
+
+During the process of writing back folios, if
+mpage_map_and_submit_extent() exits the extent mapping loop due to an
+ENOSPC or ENOMEM error, it may result in stale data or filesystem
+inconsistency in environments where the block size is smaller than the
+folio size.
+
+When mapping a discontinuous folio in mpage_map_and_submit_extent(),
+some buffers may have already be mapped. If we exit the mapping loop
+prematurely, the folio data within the mapped range will not be written
+back, and the file's disk size will not be updated. Once the transaction
+that includes this range of extents is committed, this can lead to stale
+data or filesystem inconsistency.
+
+Fix this by submitting the current processing partially mapped folio.
+
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20250707140814.542883-4-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 50 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2363,6 +2363,47 @@ static int mpage_map_one_extent(handle_t
+ }
+ 
+ /*
++ * This is used to submit mapped buffers in a single folio that is not fully
++ * mapped for various reasons, such as insufficient space or journal credits.
++ */
++static int mpage_submit_partial_folio(struct mpage_da_data *mpd)
++{
++      struct inode *inode = mpd->inode;
++      struct folio *folio;
++      loff_t pos;
++      int ret;
++
++      folio = filemap_get_folio(inode->i_mapping,
++                                mpd->start_pos >> PAGE_SHIFT);
++      if (IS_ERR(folio))
++              return PTR_ERR(folio);
++      /*
++       * The mapped position should be within the current processing folio
++       * but must not be the folio start position.
++       */
++      pos = ((loff_t)mpd->map.m_lblk) << inode->i_blkbits;
++      if (WARN_ON_ONCE((folio_pos(folio) == pos) ||
++                       !folio_contains(folio, pos >> PAGE_SHIFT)))
++              return -EINVAL;
++
++      ret = mpage_submit_folio(mpd, folio);
++      if (ret)
++              goto out;
++      /*
++       * Update start_pos to prevent this folio from being released in
++       * mpage_release_unused_pages(), it will be reset to the aligned folio
++       * pos when this folio is written again in the next round. Additionally,
++       * do not update wbc->nr_to_write here, as it will be updated once the
++       * entire folio has finished processing.
++       */
++      mpd->start_pos = pos;
++out:
++      folio_unlock(folio);
++      folio_put(folio);
++      return ret;
++}
++
++/*
+  * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length
+  *                             mpd->len and submit pages underlying it for IO
+  *
+@@ -2412,8 +2453,16 @@ static int mpage_map_and_submit_extent(h
+                        */
+                       if ((err == -ENOMEM) ||
+                           (err == -ENOSPC && ext4_count_free_clusters(sb))) {
+-                              if (progress)
++                              /*
++                               * We may have already allocated extents for
++                               * some bhs inside the folio, issue the
++                               * corresponding data to prevent stale data.
++                               */
++                              if (progress) {
++                                      if (mpage_submit_partial_folio(mpd))
++                                              goto invalidate_dirty_pages;
+                                       goto update_disksize;
++                              }
+                               return err;
+                       }
+                       ext4_msg(sb, KERN_CRIT,
diff --git a/queue-6.16/ext4-move-the-calculation-of-wbc-nr_to_write-to-mpage_folio_done.patch b/queue-6.16/ext4-move-the-calculation-of-wbc-nr_to_write-to-mpage_folio_done.patch

new file mode 100644 (file)

index 0000000..57d8218
--- /dev/null
+++ b/queue-6.16/ext4-move-the-calculation-of-wbc-nr_to_write-to-mpage_folio_done.patch
@@ -0,0 +1,43 @@
+From f922c8c2461b022a2efd9914484901fb358a5b2a Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:05 +0800
+Subject: ext4: move the calculation of wbc->nr_to_write to mpage_folio_done()
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit f922c8c2461b022a2efd9914484901fb358a5b2a upstream.
+
+mpage_folio_done() should be a more appropriate place than
+mpage_submit_folio() for updating the wbc->nr_to_write after we have
+submitted a fully mapped folio. Preparing to make mpage_submit_folio()
+allows to submit partially mapped folio that is still under processing.
+
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Link: https://patch.msgid.link/20250707140814.542883-3-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2027,6 +2027,7 @@ int ext4_da_get_block_prep(struct inode
+ static void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio)
+ {
+       mpd->start_pos += folio_size(folio);
++      mpd->wbc->nr_to_write -= folio_nr_pages(folio);
+       folio_unlock(folio);
+ }
+ 
+@@ -2057,8 +2058,6 @@ static int mpage_submit_folio(struct mpa
+           !ext4_verity_in_progress(mpd->inode))
+               len = size & (len - 1);
+       err = ext4_bio_write_folio(&mpd->io_submit, folio, len);
+-      if (!err)
+-              mpd->wbc->nr_to_write -= folio_nr_pages(folio);
+ 
+       return err;
+ }
diff --git a/queue-6.16/ext4-process-folios-writeback-in-bytes.patch b/queue-6.16/ext4-process-folios-writeback-in-bytes.patch

new file mode 100644 (file)

index 0000000..f6c8159
--- /dev/null
+++ b/queue-6.16/ext4-process-folios-writeback-in-bytes.patch
@@ -0,0 +1,254 @@
+From 1bfe6354e0975fe89c3d25e81b6546d205556a4b Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:04 +0800
+Subject: ext4: process folios writeback in bytes
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 1bfe6354e0975fe89c3d25e81b6546d205556a4b upstream.
+
+Since ext4 supports large folios, processing writebacks in pages is no
+longer appropriate, it can be modified to process writebacks in bytes.
+
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20250707140814.542883-2-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c             |   70 ++++++++++++++++++++++----------------------
+ include/trace/events/ext4.h |   13 +++-----
+ 2 files changed, 42 insertions(+), 41 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1668,11 +1668,12 @@ struct mpage_da_data {
+       unsigned int can_map:1; /* Can writepages call map blocks? */
+ 
+       /* These are internal state of ext4_do_writepages() */
+-      pgoff_t first_page;     /* The first page to write */
+-      pgoff_t next_page;      /* Current page to examine */
+-      pgoff_t last_page;      /* Last page to examine */
++      loff_t start_pos;       /* The start pos to write */
++      loff_t next_pos;        /* Current pos to examine */
++      loff_t end_pos;         /* Last pos to examine */
++
+       /*
+-       * Extent to map - this can be after first_page because that can be
++       * Extent to map - this can be after start_pos because that can be
+        * fully mapped. We somewhat abuse m_flags to store whether the extent
+        * is delalloc or unwritten.
+        */
+@@ -1692,38 +1693,38 @@ static void mpage_release_unused_pages(s
+       struct inode *inode = mpd->inode;
+       struct address_space *mapping = inode->i_mapping;
+ 
+-      /* This is necessary when next_page == 0. */
+-      if (mpd->first_page >= mpd->next_page)
++      /* This is necessary when next_pos == 0. */
++      if (mpd->start_pos >= mpd->next_pos)
+               return;
+ 
+       mpd->scanned_until_end = 0;
+-      index = mpd->first_page;
+-      end   = mpd->next_page - 1;
+       if (invalidate) {
+               ext4_lblk_t start, last;
+-              start = index << (PAGE_SHIFT - inode->i_blkbits);
+-              last = end << (PAGE_SHIFT - inode->i_blkbits);
++              start = EXT4_B_TO_LBLK(inode, mpd->start_pos);
++              last = mpd->next_pos >> inode->i_blkbits;
+ 
+               /*
+                * avoid racing with extent status tree scans made by
+                * ext4_insert_delayed_block()
+                */
+               down_write(&EXT4_I(inode)->i_data_sem);
+-              ext4_es_remove_extent(inode, start, last - start + 1);
++              ext4_es_remove_extent(inode, start, last - start);
+               up_write(&EXT4_I(inode)->i_data_sem);
+       }
+ 
+       folio_batch_init(&fbatch);
+-      while (index <= end) {
+-              nr = filemap_get_folios(mapping, &index, end, &fbatch);
++      index = mpd->start_pos >> PAGE_SHIFT;
++      end = mpd->next_pos >> PAGE_SHIFT;
++      while (index < end) {
++              nr = filemap_get_folios(mapping, &index, end - 1, &fbatch);
+               if (nr == 0)
+                       break;
+               for (i = 0; i < nr; i++) {
+                       struct folio *folio = fbatch.folios[i];
+ 
+-                      if (folio->index < mpd->first_page)
++                      if (folio_pos(folio) < mpd->start_pos)
+                               continue;
+-                      if (folio_next_index(folio) - 1 > end)
++                      if (folio_next_index(folio) > end)
+                               continue;
+                       BUG_ON(!folio_test_locked(folio));
+                       BUG_ON(folio_test_writeback(folio));
+@@ -2025,7 +2026,7 @@ int ext4_da_get_block_prep(struct inode
+ 
+ static void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio)
+ {
+-      mpd->first_page += folio_nr_pages(folio);
++      mpd->start_pos += folio_size(folio);
+       folio_unlock(folio);
+ }
+ 
+@@ -2035,7 +2036,7 @@ static int mpage_submit_folio(struct mpa
+       loff_t size;
+       int err;
+ 
+-      BUG_ON(folio->index != mpd->first_page);
++      WARN_ON_ONCE(folio_pos(folio) != mpd->start_pos);
+       folio_clear_dirty_for_io(folio);
+       /*
+        * We have to be very careful here!  Nothing protects writeback path
+@@ -2447,7 +2448,7 @@ update_disksize:
+        * Update on-disk size after IO is submitted.  Races with
+        * truncate are avoided by checking i_size under i_data_sem.
+        */
+-      disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
++      disksize = mpd->start_pos;
+       if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
+               int err2;
+               loff_t i_size;
+@@ -2550,8 +2551,8 @@ static int mpage_prepare_extent_to_map(s
+       struct address_space *mapping = mpd->inode->i_mapping;
+       struct folio_batch fbatch;
+       unsigned int nr_folios;
+-      pgoff_t index = mpd->first_page;
+-      pgoff_t end = mpd->last_page;
++      pgoff_t index = mpd->start_pos >> PAGE_SHIFT;
++      pgoff_t end = mpd->end_pos >> PAGE_SHIFT;
+       xa_mark_t tag;
+       int i, err = 0;
+       int blkbits = mpd->inode->i_blkbits;
+@@ -2566,7 +2567,7 @@ static int mpage_prepare_extent_to_map(s
+               tag = PAGECACHE_TAG_DIRTY;
+ 
+       mpd->map.m_len = 0;
+-      mpd->next_page = index;
++      mpd->next_pos = mpd->start_pos;
+       if (ext4_should_journal_data(mpd->inode)) {
+               handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE,
+                                           bpp);
+@@ -2597,7 +2598,8 @@ static int mpage_prepare_extent_to_map(s
+                               goto out;
+ 
+                       /* If we can't merge this page, we are done. */
+-                      if (mpd->map.m_len > 0 && mpd->next_page != folio->index)
++                      if (mpd->map.m_len > 0 &&
++                          mpd->next_pos != folio_pos(folio))
+                               goto out;
+ 
+                       if (handle) {
+@@ -2643,8 +2645,8 @@ static int mpage_prepare_extent_to_map(s
+                       }
+ 
+                       if (mpd->map.m_len == 0)
+-                              mpd->first_page = folio->index;
+-                      mpd->next_page = folio_next_index(folio);
++                              mpd->start_pos = folio_pos(folio);
++                      mpd->next_pos = folio_pos(folio) + folio_size(folio);
+                       /*
+                        * Writeout when we cannot modify metadata is simple.
+                        * Just submit the page. For data=journal mode we
+@@ -2787,18 +2789,18 @@ static int ext4_do_writepages(struct mpa
+               writeback_index = mapping->writeback_index;
+               if (writeback_index)
+                       cycled = 0;
+-              mpd->first_page = writeback_index;
+-              mpd->last_page = -1;
++              mpd->start_pos = writeback_index << PAGE_SHIFT;
++              mpd->end_pos = LLONG_MAX;
+       } else {
+-              mpd->first_page = wbc->range_start >> PAGE_SHIFT;
+-              mpd->last_page = wbc->range_end >> PAGE_SHIFT;
++              mpd->start_pos = wbc->range_start;
++              mpd->end_pos = wbc->range_end;
+       }
+ 
+       ext4_io_submit_init(&mpd->io_submit, wbc);
+ retry:
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+-              tag_pages_for_writeback(mapping, mpd->first_page,
+-                                      mpd->last_page);
++              tag_pages_for_writeback(mapping, mpd->start_pos >> PAGE_SHIFT,
++                                      mpd->end_pos >> PAGE_SHIFT);
+       blk_start_plug(&plug);
+ 
+       /*
+@@ -2858,7 +2860,7 @@ retry:
+               }
+               mpd->do_map = 1;
+ 
+-              trace_ext4_da_write_pages(inode, mpd->first_page, wbc);
++              trace_ext4_da_write_pages(inode, mpd->start_pos, wbc);
+               ret = mpage_prepare_extent_to_map(mpd);
+               if (!ret && mpd->map.m_len)
+                       ret = mpage_map_and_submit_extent(handle, mpd,
+@@ -2915,8 +2917,8 @@ unplug:
+       blk_finish_plug(&plug);
+       if (!ret && !cycled && wbc->nr_to_write > 0) {
+               cycled = 1;
+-              mpd->last_page = writeback_index - 1;
+-              mpd->first_page = 0;
++              mpd->end_pos = (writeback_index << PAGE_SHIFT) - 1;
++              mpd->start_pos = 0;
+               goto retry;
+       }
+ 
+@@ -2926,7 +2928,7 @@ unplug:
+                * Set the writeback_index so that range_cyclic
+                * mode will write it back later
+                */
+-              mapping->writeback_index = mpd->first_page;
++              mapping->writeback_index = mpd->start_pos >> PAGE_SHIFT;
+ 
+ out_writepages:
+       trace_ext4_writepages_result(inode, wbc, ret,
+--- a/include/trace/events/ext4.h
++++ b/include/trace/events/ext4.h
+@@ -483,15 +483,15 @@ TRACE_EVENT(ext4_writepages,
+ );
+ 
+ TRACE_EVENT(ext4_da_write_pages,
+-      TP_PROTO(struct inode *inode, pgoff_t first_page,
++      TP_PROTO(struct inode *inode, loff_t start_pos,
+                struct writeback_control *wbc),
+ 
+-      TP_ARGS(inode, first_page, wbc),
++      TP_ARGS(inode, start_pos, wbc),
+ 
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+-              __field(      pgoff_t,  first_page              )
++              __field(       loff_t,  start_pos               )
+               __field(         long,  nr_to_write             )
+               __field(          int,  sync_mode               )
+       ),
+@@ -499,15 +499,14 @@ TRACE_EVENT(ext4_da_write_pages,
+       TP_fast_assign(
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->ino            = inode->i_ino;
+-              __entry->first_page     = first_page;
++              __entry->start_pos      = start_pos;
+               __entry->nr_to_write    = wbc->nr_to_write;
+               __entry->sync_mode      = wbc->sync_mode;
+       ),
+ 
+-      TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld "
+-                "sync_mode %d",
++      TP_printk("dev %d,%d ino %lu start_pos 0x%llx nr_to_write %ld sync_mode %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+-                (unsigned long) __entry->ino, __entry->first_page,
++                (unsigned long) __entry->ino, __entry->start_pos,
+                 __entry->nr_to_write, __entry->sync_mode)
+ );
+ 
diff --git a/queue-6.16/ext4-refactor-the-block-allocation-process-of-ext4_page_mkwrite.patch b/queue-6.16/ext4-refactor-the-block-allocation-process-of-ext4_page_mkwrite.patch

new file mode 100644 (file)

index 0000000..cbef2a7
--- /dev/null
+++ b/queue-6.16/ext4-refactor-the-block-allocation-process-of-ext4_page_mkwrite.patch
@@ -0,0 +1,153 @@
+From 2bddafea3d0d85ee9ac3cf5ba9a4b2f2d2f50257 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:07 +0800
+Subject: ext4: refactor the block allocation process of ext4_page_mkwrite()
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 2bddafea3d0d85ee9ac3cf5ba9a4b2f2d2f50257 upstream.
+
+The block allocation process and error handling in ext4_page_mkwrite()
+is complex now. Refactor it by introducing a new helper function,
+ext4_block_page_mkwrite(). It will call ext4_block_write_begin() to
+allocate blocks instead of directly calling block_page_mkwrite().
+Preparing to implement retry logic in a subsequent patch to address
+situations where the reserved journal credits are insufficient.
+Additionally, this modification will help prevent potential deadlocks
+that may occur when waiting for folio writeback while holding the
+transaction handle.
+
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20250707140814.542883-5-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c |   95 +++++++++++++++++++++++++++++---------------------------
+ 1 file changed, 50 insertions(+), 45 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -6622,6 +6622,53 @@ static int ext4_bh_unmapped(handle_t *ha
+       return !buffer_mapped(bh);
+ }
+ 
++static int ext4_block_page_mkwrite(struct inode *inode, struct folio *folio,
++                                 get_block_t get_block)
++{
++      handle_t *handle;
++      loff_t size;
++      unsigned long len;
++      int ret;
++
++      handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
++                                  ext4_writepage_trans_blocks(inode));
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++
++      folio_lock(folio);
++      size = i_size_read(inode);
++      /* Page got truncated from under us? */
++      if (folio->mapping != inode->i_mapping || folio_pos(folio) > size) {
++              ret = -EFAULT;
++              goto out_error;
++      }
++
++      len = folio_size(folio);
++      if (folio_pos(folio) + len > size)
++              len = size - folio_pos(folio);
++
++      ret = ext4_block_write_begin(handle, folio, 0, len, get_block);
++      if (ret)
++              goto out_error;
++
++      if (!ext4_should_journal_data(inode)) {
++              block_commit_write(folio, 0, len);
++              folio_mark_dirty(folio);
++      } else {
++              ret = ext4_journal_folio_buffers(handle, folio, len);
++              if (ret)
++                      goto out_error;
++      }
++      ext4_journal_stop(handle);
++      folio_wait_stable(folio);
++      return ret;
++
++out_error:
++      folio_unlock(folio);
++      ext4_journal_stop(handle);
++      return ret;
++}
++
+ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
+ {
+       struct vm_area_struct *vma = vmf->vma;
+@@ -6633,8 +6680,7 @@ vm_fault_t ext4_page_mkwrite(struct vm_f
+       struct file *file = vma->vm_file;
+       struct inode *inode = file_inode(file);
+       struct address_space *mapping = inode->i_mapping;
+-      handle_t *handle;
+-      get_block_t *get_block;
++      get_block_t *get_block = ext4_get_block;
+       int retries = 0;
+ 
+       if (unlikely(IS_IMMUTABLE(inode)))
+@@ -6702,46 +6748,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_f
+       /* OK, we need to fill the hole... */
+       if (ext4_should_dioread_nolock(inode))
+               get_block = ext4_get_block_unwritten;
+-      else
+-              get_block = ext4_get_block;
+ retry_alloc:
+-      handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+-                                  ext4_writepage_trans_blocks(inode));
+-      if (IS_ERR(handle)) {
+-              ret = VM_FAULT_SIGBUS;
+-              goto out;
+-      }
+-      /*
+-       * Data journalling can't use block_page_mkwrite() because it
+-       * will set_buffer_dirty() before do_journal_get_write_access()
+-       * thus might hit warning messages for dirty metadata buffers.
+-       */
+-      if (!ext4_should_journal_data(inode)) {
+-              err = block_page_mkwrite(vma, vmf, get_block);
+-      } else {
+-              folio_lock(folio);
+-              size = i_size_read(inode);
+-              /* Page got truncated from under us? */
+-              if (folio->mapping != mapping || folio_pos(folio) > size) {
+-                      ret = VM_FAULT_NOPAGE;
+-                      goto out_error;
+-              }
+-
+-              len = folio_size(folio);
+-              if (folio_pos(folio) + len > size)
+-                      len = size - folio_pos(folio);
+-
+-              err = ext4_block_write_begin(handle, folio, 0, len,
+-                                           ext4_get_block);
+-              if (!err) {
+-                      ret = VM_FAULT_SIGBUS;
+-                      if (ext4_journal_folio_buffers(handle, folio, len))
+-                              goto out_error;
+-              } else {
+-                      folio_unlock(folio);
+-              }
+-      }
+-      ext4_journal_stop(handle);
++      /* Start journal and allocate blocks */
++      err = ext4_block_page_mkwrite(inode, folio, get_block);
+       if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+               goto retry_alloc;
+ out_ret:
+@@ -6750,8 +6759,4 @@ out:
+       filemap_invalidate_unlock_shared(mapping);
+       sb_end_pagefault(inode->i_sb);
+       return ret;
+-out_error:
+-      folio_unlock(folio);
+-      ext4_journal_stop(handle);
+-      goto out;
+ }
diff --git a/queue-6.16/ext4-replace-ext4_writepage_trans_blocks.patch b/queue-6.16/ext4-replace-ext4_writepage_trans_blocks.patch

new file mode 100644 (file)

index 0000000..7108fe2
--- /dev/null
+++ b/queue-6.16/ext4-replace-ext4_writepage_trans_blocks.patch
@@ -0,0 +1,202 @@
+From 57661f28756c59510e31543520b5b8f5e591f384 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:12 +0800
+Subject: ext4: replace ext4_writepage_trans_blocks()
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 57661f28756c59510e31543520b5b8f5e591f384 upstream.
+
+After ext4 supports large folios, the semantics of reserving credits in
+pages is no longer applicable. In most scenarios, reserving credits in
+extents is sufficient. Therefore, introduce ext4_chunk_trans_extent()
+to replace ext4_writepage_trans_blocks(). move_extent_per_page() is the
+only remaining location where we are still processing extents in pages.
+
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20250707140814.542883-10-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/ext4.h        |    2 +-
+ fs/ext4/extents.c     |    6 +++---
+ fs/ext4/inline.c      |    6 +++---
+ fs/ext4/inode.c       |   33 +++++++++++++++------------------
+ fs/ext4/move_extent.c |    3 ++-
+ fs/ext4/xattr.c       |    2 +-
+ 6 files changed, 25 insertions(+), 27 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -3064,9 +3064,9 @@ extern int ext4_punch_hole(struct file *
+ extern void ext4_set_inode_flags(struct inode *, bool init);
+ extern int ext4_alloc_da_blocks(struct inode *inode);
+ extern void ext4_set_aops(struct inode *inode);
+-extern int ext4_writepage_trans_blocks(struct inode *);
+ extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode);
+ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
++extern int ext4_chunk_trans_extent(struct inode *inode, int nrblocks);
+ extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
+                                 int pextents);
+ extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -5171,7 +5171,7 @@ ext4_ext_shift_path_extents(struct ext4_
+                               credits = depth + 2;
+                       }
+ 
+-                      restart_credits = ext4_writepage_trans_blocks(inode);
++                      restart_credits = ext4_chunk_trans_extent(inode, 0);
+                       err = ext4_datasem_ensure_credits(handle, inode, credits,
+                                       restart_credits, 0);
+                       if (err) {
+@@ -5431,7 +5431,7 @@ static int ext4_collapse_range(struct fi
+ 
+       truncate_pagecache(inode, start);
+ 
+-      credits = ext4_writepage_trans_blocks(inode);
++      credits = ext4_chunk_trans_extent(inode, 0);
+       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -5527,7 +5527,7 @@ static int ext4_insert_range(struct file
+ 
+       truncate_pagecache(inode, start);
+ 
+-      credits = ext4_writepage_trans_blocks(inode);
++      credits = ext4_chunk_trans_extent(inode, 0);
+       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -570,7 +570,7 @@ static int ext4_convert_inline_data_to_e
+               return 0;
+       }
+ 
+-      needed_blocks = ext4_writepage_trans_blocks(inode);
++      needed_blocks = ext4_chunk_trans_extent(inode, 1);
+ 
+       ret = ext4_get_inode_loc(inode, &iloc);
+       if (ret)
+@@ -1874,7 +1874,7 @@ int ext4_inline_data_truncate(struct ino
+       };
+ 
+ 
+-      needed_blocks = ext4_writepage_trans_blocks(inode);
++      needed_blocks = ext4_chunk_trans_extent(inode, 1);
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1994,7 +1994,7 @@ int ext4_convert_inline_data(struct inod
+                       return 0;
+       }
+ 
+-      needed_blocks = ext4_writepage_trans_blocks(inode);
++      needed_blocks = ext4_chunk_trans_extent(inode, 1);
+ 
+       iloc.bh = NULL;
+       error = ext4_get_inode_loc(inode, &iloc);
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1296,7 +1296,8 @@ static int ext4_write_begin(struct file
+        * Reserve one block more for addition to orphan list in case
+        * we allocate blocks but write fails for some reason
+        */
+-      needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
++      needed_blocks = ext4_chunk_trans_extent(inode,
++                      ext4_journal_blocks_per_folio(inode)) + 1;
+       index = pos >> PAGE_SHIFT;
+ 
+       if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
+@@ -4464,7 +4465,7 @@ int ext4_punch_hole(struct file *file, l
+               return ret;
+ 
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+-              credits = ext4_writepage_trans_blocks(inode);
++              credits = ext4_chunk_trans_extent(inode, 2);
+       else
+               credits = ext4_blocks_for_truncate(inode);
+       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+@@ -4613,7 +4614,7 @@ int ext4_truncate(struct inode *inode)
+       }
+ 
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+-              credits = ext4_writepage_trans_blocks(inode);
++              credits = ext4_chunk_trans_extent(inode, 1);
+       else
+               credits = ext4_blocks_for_truncate(inode);
+ 
+@@ -6256,25 +6257,19 @@ int ext4_meta_trans_blocks(struct inode
+ }
+ 
+ /*
+- * Calculate the total number of credits to reserve to fit
+- * the modification of a single pages into a single transaction,
+- * which may include multiple chunks of block allocations.
+- *
+- * This could be called via ext4_write_begin()
+- *
+- * We need to consider the worse case, when
+- * one new block per extent.
++ * Calculate the journal credits for modifying the number of blocks
++ * in a single extent within one transaction. 'nrblocks' is used only
++ * for non-extent inodes. For extent type inodes, 'nrblocks' can be
++ * zero if the exact number of blocks is unknown.
+  */
+-int ext4_writepage_trans_blocks(struct inode *inode)
++int ext4_chunk_trans_extent(struct inode *inode, int nrblocks)
+ {
+-      int bpp = ext4_journal_blocks_per_folio(inode);
+       int ret;
+ 
+-      ret = ext4_meta_trans_blocks(inode, bpp, bpp);
+-
++      ret = ext4_meta_trans_blocks(inode, nrblocks, 1);
+       /* Account for data blocks for journalled mode */
+       if (ext4_should_journal_data(inode))
+-              ret += bpp;
++              ret += nrblocks;
+       return ret;
+ }
+ 
+@@ -6652,10 +6647,12 @@ static int ext4_block_page_mkwrite(struc
+       handle_t *handle;
+       loff_t size;
+       unsigned long len;
++      int credits;
+       int ret;
+ 
+-      handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+-                                  ext4_writepage_trans_blocks(inode));
++      credits = ext4_chunk_trans_extent(inode,
++                      ext4_journal_blocks_per_folio(inode));
++      handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, credits);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+ 
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -280,7 +280,8 @@ move_extent_per_page(struct file *o_filp
+        */
+ again:
+       *err = 0;
+-      jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
++      jblocks = ext4_meta_trans_blocks(orig_inode, block_len_in_page,
++                                       block_len_in_page) * 2;
+       handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
+       if (IS_ERR(handle)) {
+               *err = PTR_ERR(handle);
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -962,7 +962,7 @@ int __ext4_xattr_set_credits(struct supe
+        * so we need to reserve credits for this eventuality
+        */
+       if (inode && ext4_has_inline_data(inode))
+-              credits += ext4_writepage_trans_blocks(inode) + 1;
++              credits += ext4_chunk_trans_extent(inode, 1) + 1;
+ 
+       /* We are done if ea_inode feature is not enabled. */
+       if (!ext4_has_feature_ea_inode(sb))
diff --git a/queue-6.16/ext4-reserved-credits-for-one-extent-during-the-folio-writeback.patch b/queue-6.16/ext4-reserved-credits-for-one-extent-during-the-folio-writeback.patch

new file mode 100644 (file)

index 0000000..a75edd3
--- /dev/null
+++ b/queue-6.16/ext4-reserved-credits-for-one-extent-during-the-folio-writeback.patch
@@ -0,0 +1,76 @@
+From bbbf150f3f85619569ac19dc6458cca7c492e715 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:11 +0800
+Subject: ext4: reserved credits for one extent during the folio writeback
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit bbbf150f3f85619569ac19dc6458cca7c492e715 upstream.
+
+After ext4 supports large folios, reserving journal credits for one
+maximum-ordered folio based on the worst case cenario during the
+writeback process can easily exceed the maximum transaction credits.
+Additionally, reserving journal credits for one page is also no
+longer appropriate.
+
+Currently, the folio writeback process can either extend the journal
+credits or initiate a new transaction if the currently reserved journal
+credits are insufficient. Therefore, it can be modified to reserve
+credits for only one extent at the outset. In most cases involving
+continuous mapping, these credits are generally adequate, and we may
+only need to perform some basic credit expansion. However, in extreme
+cases where the block size and folio size differ significantly, or when
+the folios are sufficiently discontinuous, it may be necessary to
+restart a new transaction and resubmit the folios.
+
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20250707140814.542883-9-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c |   25 ++++++++-----------------
+ 1 file changed, 8 insertions(+), 17 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2548,21 +2548,6 @@ update_disksize:
+       return err;
+ }
+ 
+-/*
+- * Calculate the total number of credits to reserve for one writepages
+- * iteration. This is called from ext4_writepages(). We map an extent of
+- * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
+- * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
+- * bpp - 1 blocks in bpp different extents.
+- */
+-static int ext4_da_writepages_trans_blocks(struct inode *inode)
+-{
+-      int bpp = ext4_journal_blocks_per_folio(inode);
+-
+-      return ext4_meta_trans_blocks(inode,
+-                              MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
+-}
+-
+ static int ext4_journal_folio_buffers(handle_t *handle, struct folio *folio,
+                                    size_t len)
+ {
+@@ -2919,8 +2904,14 @@ retry:
+                * not supported by delalloc.
+                */
+               BUG_ON(ext4_should_journal_data(inode));
+-              needed_blocks = ext4_da_writepages_trans_blocks(inode);
+-
++              /*
++               * Calculate the number of credits needed to reserve for one
++               * extent of up to MAX_WRITEPAGES_EXTENT_LEN blocks. It will
++               * attempt to extend the transaction or start a new iteration
++               * if the reserved credits are insufficient.
++               */
++              needed_blocks = ext4_chunk_trans_blocks(inode,
++                                              MAX_WRITEPAGES_EXTENT_LEN);
+               /* start a new transaction */
+               handle = ext4_journal_start_with_reserve(inode,
+                               EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
diff --git a/queue-6.16/ext4-restart-handle-if-credits-are-insufficient-during-allocating-blocks.patch b/queue-6.16/ext4-restart-handle-if-credits-are-insufficient-during-allocating-blocks.patch

new file mode 100644 (file)

index 0000000..829691a
--- /dev/null
+++ b/queue-6.16/ext4-restart-handle-if-credits-are-insufficient-during-allocating-blocks.patch
@@ -0,0 +1,128 @@
+From e2c4c49dee64ca2f42ad2958cbe1805de96b6732 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 7 Jul 2025 22:08:08 +0800
+Subject: ext4: restart handle if credits are insufficient during allocating blocks
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit e2c4c49dee64ca2f42ad2958cbe1805de96b6732 upstream.
+
+After large folios are supported on ext4, writing back a sufficiently
+large and discontinuous folio may consume a significant number of
+journal credits, placing considerable strain on the journal. For
+example, in a 20GB filesystem with 1K block size and 1MB journal size,
+writing back a 2MB folio could require thousands of credits in the
+worst-case scenario (when each block is discontinuous and distributed
+across different block groups), potentially exceeding the journal size.
+This issue can also occur in ext4_write_begin() and ext4_page_mkwrite()
+when delalloc is not enabled.
+
+Fix this by ensuring that there are sufficient journal credits before
+allocating an extent in mpage_map_one_extent() and
+ext4_block_write_begin(). If there are not enough credits, return
+-EAGAIN, exit the current mapping loop, restart a new handle and a new
+transaction, and allocating blocks on this folio again in the next
+iteration.
+
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20250707140814.542883-6-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c |   41 ++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 36 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -877,6 +877,26 @@ static void ext4_update_bh_state(struct
+       } while (unlikely(!try_cmpxchg(&bh->b_state, &old_state, new_state)));
+ }
+ 
++/*
++ * Make sure that the current journal transaction has enough credits to map
++ * one extent. Return -EAGAIN if it cannot extend the current running
++ * transaction.
++ */
++static inline int ext4_journal_ensure_extent_credits(handle_t *handle,
++                                                   struct inode *inode)
++{
++      int credits;
++      int ret;
++
++      /* Called from ext4_da_write_begin() which has no handle started? */
++      if (!handle)
++              return 0;
++
++      credits = ext4_chunk_trans_blocks(inode, 1);
++      ret = __ext4_journal_ensure_credits(handle, credits, credits, 0);
++      return ret <= 0 ? ret : -EAGAIN;
++}
++
+ static int _ext4_get_block(struct inode *inode, sector_t iblock,
+                          struct buffer_head *bh, int flags)
+ {
+@@ -1175,7 +1195,9 @@ int ext4_block_write_begin(handle_t *han
+                       clear_buffer_new(bh);
+               if (!buffer_mapped(bh)) {
+                       WARN_ON(bh->b_size != blocksize);
+-                      err = get_block(inode, block, bh, 1);
++                      err = ext4_journal_ensure_extent_credits(handle, inode);
++                      if (!err)
++                              err = get_block(inode, block, bh, 1);
+                       if (err)
+                               break;
+                       if (buffer_new(bh)) {
+@@ -1374,8 +1396,9 @@ retry_journal:
+                               ext4_orphan_del(NULL, inode);
+               }
+ 
+-              if (ret == -ENOSPC &&
+-                  ext4_should_retry_alloc(inode->i_sb, &retries))
++              if (ret == -EAGAIN ||
++                  (ret == -ENOSPC &&
++                   ext4_should_retry_alloc(inode->i_sb, &retries)))
+                       goto retry_journal;
+               folio_put(folio);
+               return ret;
+@@ -2324,6 +2347,11 @@ static int mpage_map_one_extent(handle_t
+       int get_blocks_flags;
+       int err, dioread_nolock;
+ 
++      /* Make sure transaction has enough credits for this extent */
++      err = ext4_journal_ensure_extent_credits(handle, inode);
++      if (err < 0)
++              return err;
++
+       trace_ext4_da_write_pages_extent(inode, map);
+       /*
+        * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
+@@ -2451,7 +2479,7 @@ static int mpage_map_and_submit_extent(h
+                        * In the case of ENOSPC, if ext4_count_free_blocks()
+                        * is non-zero, a commit should free up blocks.
+                        */
+-                      if ((err == -ENOMEM) ||
++                      if ((err == -ENOMEM) || (err == -EAGAIN) ||
+                           (err == -ENOSPC && ext4_count_free_clusters(sb))) {
+                               /*
+                                * We may have already allocated extents for
+@@ -2957,6 +2985,8 @@ retry:
+                       ret = 0;
+                       continue;
+               }
++              if (ret == -EAGAIN)
++                      ret = 0;
+               /* Fatal error - ENOMEM, EIO... */
+               if (ret)
+                       break;
+@@ -6751,7 +6781,8 @@ vm_fault_t ext4_page_mkwrite(struct vm_f
+ retry_alloc:
+       /* Start journal and allocate blocks */
+       err = ext4_block_page_mkwrite(inode, folio, get_block);
+-      if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
++      if (err == -EAGAIN ||
++          (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)))
+               goto retry_alloc;
+ out_ret:
+       ret = vmf_fs_error(err);
diff --git a/queue-6.16/series b/queue-6.16/series

new file mode 100644 (file)

index 0000000..19be47c
--- /dev/null
+++ b/queue-6.16/series
@@ -0,0 +1,9 @@
+ext4-process-folios-writeback-in-bytes.patch
+ext4-move-the-calculation-of-wbc-nr_to_write-to-mpage_folio_done.patch
+ext4-fix-stale-data-if-it-bail-out-of-the-extents-mapping-loop.patch
+ext4-refactor-the-block-allocation-process-of-ext4_page_mkwrite.patch
+ext4-restart-handle-if-credits-are-insufficient-during-allocating-blocks.patch
+ext4-enhance-tracepoints-during-the-folios-writeback.patch
+ext4-correct-the-reserved-credits-for-extent-conversion.patch
+ext4-reserved-credits-for-one-extent-during-the-folio-writeback.patch
+ext4-replace-ext4_writepage_trans_blocks.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 22 Aug 2025 12:31:08 +0000 (14:31 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 22 Aug 2025 12:31:08 +0000 (14:31 +0200)
queue-6.16/ext4-correct-the-reserved-credits-for-extent-conversion.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ext4-enhance-tracepoints-during-the-folios-writeback.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ext4-fix-stale-data-if-it-bail-out-of-the-extents-mapping-loop.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ext4-move-the-calculation-of-wbc-nr_to_write-to-mpage_folio_done.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ext4-process-folios-writeback-in-bytes.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ext4-refactor-the-block-allocation-process-of-ext4_page_mkwrite.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ext4-replace-ext4_writepage_trans_blocks.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ext4-reserved-credits-for-one-extent-during-the-folio-writeback.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ext4-restart-handle-if-credits-are-insufficient-during-allocating-blocks.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/series	[new file with mode: 0644]	patch \| blob