]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 16 Oct 2025 14:19:26 +0000 (16:19 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 16 Oct 2025 14:19:26 +0000 (16:19 +0200)
added patches:
ext4-correctly-handle-queries-for-metadata-mappings.patch
ext4-guard-against-ea-inode-refcount-underflow-in-xattr-update.patch
ext4-increase-i_disksize-to-offset-len-in-ext4_update_disksize_before_punch.patch
ext4-verify-orphan-file-size-is-not-too-big.patch

queue-5.15/ext4-correctly-handle-queries-for-metadata-mappings.patch [new file with mode: 0644]
queue-5.15/ext4-guard-against-ea-inode-refcount-underflow-in-xattr-update.patch [new file with mode: 0644]
queue-5.15/ext4-increase-i_disksize-to-offset-len-in-ext4_update_disksize_before_punch.patch [new file with mode: 0644]
queue-5.15/ext4-verify-orphan-file-size-is-not-too-big.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/ext4-correctly-handle-queries-for-metadata-mappings.patch b/queue-5.15/ext4-correctly-handle-queries-for-metadata-mappings.patch
new file mode 100644 (file)
index 0000000..8279eeb
--- /dev/null
@@ -0,0 +1,98 @@
+From 46c22a8bb4cb03211da1100d7ee4a2005bf77c70 Mon Sep 17 00:00:00 2001
+From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Date: Fri, 5 Sep 2025 13:44:46 +0530
+Subject: ext4: correctly handle queries for metadata mappings
+
+From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+
+commit 46c22a8bb4cb03211da1100d7ee4a2005bf77c70 upstream.
+
+Currently, our handling of metadata is _ambiguous_ in some scenarios,
+that is, we end up returning unknown if the range only covers the
+mapping partially.
+
+For example, in the following case:
+
+$ xfs_io -c fsmap -d
+
+  0: 254:16 [0..7]: static fs metadata 8
+  1: 254:16 [8..15]: special 102:1 8
+  2: 254:16 [16..5127]: special 102:2 5112
+  3: 254:16 [5128..5255]: special 102:3 128
+  4: 254:16 [5256..5383]: special 102:4 128
+  5: 254:16 [5384..70919]: inodes 65536
+  6: 254:16 [70920..70967]: unknown 48
+  ...
+
+$ xfs_io -c fsmap -d 24 33
+
+  0: 254:16 [24..39]: unknown 16  <--- incomplete reporting
+
+$ xfs_io -c fsmap -d 24 33  (With patch)
+
+    0: 254:16 [16..5127]: special 102:2 5112
+
+This is because earlier in ext4_getfsmap_meta_helper, we end up ignoring
+any extent that starts before our queried range, but overlaps it. While
+the man page [1] is a bit ambiguous on this, this fix makes the output
+make more sense since we are anyways returning an "unknown" extent. This
+is also consistent to how XFS does it:
+
+$ xfs_io -c fsmap -d
+
+  ...
+  6: 254:16 [104..127]: free space 24
+  7: 254:16 [128..191]: inodes 64
+  ...
+
+$ xfs_io -c fsmap -d 137 150
+
+  0: 254:16 [128..191]: inodes 64   <-- full extent returned
+
+ [1] https://man7.org/linux/man-pages/man2/ioctl_getfsmap.2.html
+
+Reported-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Cc: stable@kernel.org
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Message-ID: <023f37e35ee280cd9baac0296cbadcbe10995cab.1757058211.git.ojaswin@linux.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/fsmap.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/fsmap.c
++++ b/fs/ext4/fsmap.c
+@@ -74,7 +74,8 @@ static int ext4_getfsmap_dev_compare(con
+ static bool ext4_getfsmap_rec_before_low_key(struct ext4_getfsmap_info *info,
+                                            struct ext4_fsmap *rec)
+ {
+-      return rec->fmr_physical < info->gfi_low.fmr_physical;
++      return rec->fmr_physical + rec->fmr_length <=
++             info->gfi_low.fmr_physical;
+ }
+ /*
+@@ -200,15 +201,18 @@ static int ext4_getfsmap_meta_helper(str
+                         ext4_group_first_block_no(sb, agno));
+       fs_end = fs_start + EXT4_C2B(sbi, len);
+-      /* Return relevant extents from the meta_list */
++      /*
++       * Return relevant extents from the meta_list. We emit all extents that
++       * partially/fully overlap with the query range
++       */
+       list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) {
+-              if (p->fmr_physical < info->gfi_next_fsblk) {
++              if (p->fmr_physical + p->fmr_length <= info->gfi_next_fsblk) {
+                       list_del(&p->fmr_list);
+                       kfree(p);
+                       continue;
+               }
+-              if (p->fmr_physical <= fs_start ||
+-                  p->fmr_physical + p->fmr_length <= fs_end) {
++              if (p->fmr_physical <= fs_end &&
++                  p->fmr_physical + p->fmr_length > fs_start) {
+                       /* Emit the retained free extent record if present */
+                       if (info->gfi_lastfree.fmr_owner) {
+                               error = ext4_getfsmap_helper(sb, info,
diff --git a/queue-5.15/ext4-guard-against-ea-inode-refcount-underflow-in-xattr-update.patch b/queue-5.15/ext4-guard-against-ea-inode-refcount-underflow-in-xattr-update.patch
new file mode 100644 (file)
index 0000000..6659973
--- /dev/null
@@ -0,0 +1,80 @@
+From 57295e835408d8d425bef58da5253465db3d6888 Mon Sep 17 00:00:00 2001
+From: Ahmet Eray Karadag <eraykrdg1@gmail.com>
+Date: Sat, 20 Sep 2025 05:13:43 +0300
+Subject: ext4: guard against EA inode refcount underflow in xattr update
+
+From: Ahmet Eray Karadag <eraykrdg1@gmail.com>
+
+commit 57295e835408d8d425bef58da5253465db3d6888 upstream.
+
+syzkaller found a path where ext4_xattr_inode_update_ref() reads an EA
+inode refcount that is already <= 0 and then applies ref_change (often
+-1). That lets the refcount underflow and we proceed with a bogus value,
+triggering errors like:
+
+  EXT4-fs error: EA inode <n> ref underflow: ref_count=-1 ref_change=-1
+  EXT4-fs warning: ea_inode dec ref err=-117
+
+Make the invariant explicit: if the current refcount is non-positive,
+treat this as on-disk corruption, emit ext4_error_inode(), and fail the
+operation with -EFSCORRUPTED instead of updating the refcount. Delete the
+WARN_ONCE() as negative refcounts are now impossible; keep error reporting
+in ext4_error_inode().
+
+This prevents the underflow and the follow-on orphan/cleanup churn.
+
+Reported-by: syzbot+0be4f339a8218d2a5bb1@syzkaller.appspotmail.com
+Fixes: https://syzbot.org/bug?extid=0be4f339a8218d2a5bb1
+Cc: stable@kernel.org
+Co-developed-by: Albin Babu Varghese <albinbabuvarghese20@gmail.com>
+Signed-off-by: Albin Babu Varghese <albinbabuvarghese20@gmail.com>
+Signed-off-by: Ahmet Eray Karadag <eraykrdg1@gmail.com>
+Message-ID: <20250920021342.45575-1-eraykrdg1@gmail.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/xattr.c |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -987,7 +987,7 @@ static int ext4_xattr_inode_update_ref(h
+                                      int ref_change)
+ {
+       struct ext4_iloc iloc;
+-      s64 ref_count;
++      u64 ref_count;
+       int ret;
+       inode_lock_nested(ea_inode, I_MUTEX_XATTR);
+@@ -997,13 +997,17 @@ static int ext4_xattr_inode_update_ref(h
+               goto out;
+       ref_count = ext4_xattr_inode_get_ref(ea_inode);
++      if ((ref_count == 0 && ref_change < 0) || (ref_count == U64_MAX && ref_change > 0)) {
++              ext4_error_inode(ea_inode, __func__, __LINE__, 0,
++                      "EA inode %lu ref wraparound: ref_count=%lld ref_change=%d",
++                      ea_inode->i_ino, ref_count, ref_change);
++              ret = -EFSCORRUPTED;
++              goto out;
++      }
+       ref_count += ref_change;
+       ext4_xattr_inode_set_ref(ea_inode, ref_count);
+       if (ref_change > 0) {
+-              WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
+-                        ea_inode->i_ino, ref_count);
+-
+               if (ref_count == 1) {
+                       WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
+                                 ea_inode->i_ino, ea_inode->i_nlink);
+@@ -1012,9 +1016,6 @@ static int ext4_xattr_inode_update_ref(h
+                       ext4_orphan_del(handle, ea_inode);
+               }
+       } else {
+-              WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
+-                        ea_inode->i_ino, ref_count);
+-
+               if (ref_count == 0) {
+                       WARN_ONCE(ea_inode->i_nlink != 1,
+                                 "EA inode %lu i_nlink=%u",
diff --git a/queue-5.15/ext4-increase-i_disksize-to-offset-len-in-ext4_update_disksize_before_punch.patch b/queue-5.15/ext4-increase-i_disksize-to-offset-len-in-ext4_update_disksize_before_punch.patch
new file mode 100644 (file)
index 0000000..d574f58
--- /dev/null
@@ -0,0 +1,91 @@
+From 9d80eaa1a1d37539224982b76c9ceeee736510b9 Mon Sep 17 00:00:00 2001
+From: Yongjian Sun <sunyongjian1@huawei.com>
+Date: Thu, 11 Sep 2025 21:30:24 +0800
+Subject: ext4: increase i_disksize to offset + len in ext4_update_disksize_before_punch()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yongjian Sun <sunyongjian1@huawei.com>
+
+commit 9d80eaa1a1d37539224982b76c9ceeee736510b9 upstream.
+
+After running a stress test combined with fault injection,
+we performed fsck -a followed by fsck -fn on the filesystem
+image. During the second pass, fsck -fn reported:
+
+Inode 131512, end of extent exceeds allowed value
+       (logical block 405, physical block 1180540, len 2)
+
+This inode was not in the orphan list. Analysis revealed the
+following call chain that leads to the inconsistency:
+
+                             ext4_da_write_end()
+                              //does not update i_disksize
+                             ext4_punch_hole()
+                              //truncate folio, keep size
+ext4_page_mkwrite()
+ ext4_block_page_mkwrite()
+  ext4_block_write_begin()
+    ext4_get_block()
+     //insert written extent without update i_disksize
+journal commit
+echo 1 > /sys/block/xxx/device/delete
+
+da-write path updates i_size but does not update i_disksize. Then
+ext4_punch_hole truncates the da-folio yet still leaves i_disksize
+unchanged(in the ext4_update_disksize_before_punch function, the
+condition offset + len < size is met). Then ext4_page_mkwrite sees
+ext4_nonda_switch return 1 and takes the nodioread_nolock path, the
+folio about to be written has just been punched out, and it’s offset
+sits beyond the current i_disksize. This may result in a written
+extent being inserted, but again does not update i_disksize. If the
+journal gets committed and then the block device is yanked, we might
+run into this. It should be noted that replacing ext4_punch_hole with
+ext4_zero_range in the call sequence may also trigger this issue, as
+neither will update i_disksize under these circumstances.
+
+To fix this, we can modify ext4_update_disksize_before_punch to
+increase i_disksize to min(i_size, offset + len) when both i_size and
+(offset + len) are greater than i_disksize.
+
+Cc: stable@kernel.org
+Signed-off-by: Yongjian Sun <sunyongjian1@huawei.com>
+Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Message-ID: <20250911133024.1841027-1-sunyongjian@huaweicloud.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3906,7 +3906,11 @@ int ext4_can_truncate(struct inode *inod
+  * We have to make sure i_disksize gets properly updated before we truncate
+  * page cache due to hole punching or zero range. Otherwise i_disksize update
+  * can get lost as it may have been postponed to submission of writeback but
+- * that will never happen after we truncate page cache.
++ * that will never happen if we remove the folio containing i_size from the
++ * page cache. Also if we punch hole within i_size but above i_disksize,
++ * following ext4_page_mkwrite() may mistakenly allocate written blocks over
++ * the hole and thus introduce allocated blocks beyond i_disksize which is
++ * not allowed (e2fsck would complain in case of crash).
+  */
+ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len)
+@@ -3917,9 +3921,11 @@ int ext4_update_disksize_before_punch(st
+       loff_t size = i_size_read(inode);
+       WARN_ON(!inode_is_locked(inode));
+-      if (offset > size || offset + len < size)
++      if (offset > size)
+               return 0;
++      if (offset + len < size)
++              size = offset + len;
+       if (EXT4_I(inode)->i_disksize >= size)
+               return 0;
diff --git a/queue-5.15/ext4-verify-orphan-file-size-is-not-too-big.patch b/queue-5.15/ext4-verify-orphan-file-size-is-not-too-big.patch
new file mode 100644 (file)
index 0000000..c7176a5
--- /dev/null
@@ -0,0 +1,51 @@
+From 0a6ce20c156442a4ce2a404747bb0fb05d54eeb3 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Tue, 9 Sep 2025 13:22:07 +0200
+Subject: ext4: verify orphan file size is not too big
+
+From: Jan Kara <jack@suse.cz>
+
+commit 0a6ce20c156442a4ce2a404747bb0fb05d54eeb3 upstream.
+
+In principle orphan file can be arbitrarily large. However orphan replay
+needs to traverse it all and we also pin all its buffers in memory. Thus
+filesystems with absurdly large orphan files can lead to big amounts of
+memory consumed. Limit orphan file size to a sane value and also use
+kvmalloc() for allocating array of block descriptor structures to avoid
+large order allocations for sane but large orphan files.
+
+Reported-by: syzbot+0b92850d68d9b12934f5@syzkaller.appspotmail.com
+Fixes: 02f310fcf47f ("ext4: Speedup ext4 orphan inode handling")
+Cc: stable@kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Message-ID: <20250909112206.10459-2-jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/orphan.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/orphan.c
++++ b/fs/ext4/orphan.c
+@@ -584,9 +584,20 @@ int ext4_init_orphan_info(struct super_b
+               ext4_msg(sb, KERN_ERR, "get orphan inode failed");
+               return PTR_ERR(inode);
+       }
++      /*
++       * This is just an artificial limit to prevent corrupted fs from
++       * consuming absurd amounts of memory when pinning blocks of orphan
++       * file in memory.
++       */
++      if (inode->i_size > 8 << 20) {
++              ext4_msg(sb, KERN_ERR, "orphan file too big: %llu",
++                       (unsigned long long)inode->i_size);
++              ret = -EFSCORRUPTED;
++              goto out_put;
++      }
+       oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
+       oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
+-      oi->of_binfo = kmalloc_array(oi->of_blocks,
++      oi->of_binfo = kvmalloc_array(oi->of_blocks,
+                                    sizeof(struct ext4_orphan_block),
+                                    GFP_KERNEL);
+       if (!oi->of_binfo) {
index c85f8e06455539ac8f29dfc82680441c05e6f14a..8623ce03a9c068eaca9f68b4d81fabaabc0d3276 100644 (file)
@@ -217,3 +217,7 @@ selftests-mptcp-join-validate-c-flag-def-limit.patch
 mm-page_alloc-only-set-alloc_highatomic-for-__gpf_high-allocations.patch
 nfsd-fix-destination-buffer-size-in-nfsd4_ssc_setup_dul.patch
 nfsd-nfserr_jukebox-in-nlm_fopen-should-lead-to-a-retry.patch
+ext4-verify-orphan-file-size-is-not-too-big.patch
+ext4-increase-i_disksize-to-offset-len-in-ext4_update_disksize_before_punch.patch
+ext4-correctly-handle-queries-for-metadata-mappings.patch
+ext4-guard-against-ea-inode-refcount-underflow-in-xattr-update.patch