From: Greg Kroah-Hartman Date: Sun, 16 Oct 2022 13:24:15 +0000 (+0200) Subject: 5.19-stable patches X-Git-Tag: v5.4.219~97 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e0cd5adf518920c5b5683de696943dd84eb8a6ed;p=thirdparty%2Fkernel%2Fstable-queue.git 5.19-stable patches added patches: ext4-don-t-increase-iversion-counter-for-ea_inodes.patch ext4-ext4_read_bh_lock-should-submit-io-if-the-buffer-isn-t-uptodate.patch ext4-fix-check-for-block-being-out-of-directory-size.patch ext4-fix-dir-corruption-when-ext4_dx_add_entry-fails.patch ext4-fix-i_version-handling-in-ext4.patch ext4-fix-miss-release-buffer-head-in-ext4_fc_write_inode.patch ext4-fix-null-ptr-deref-in-ext4_write_info.patch ext4-fix-potential-memory-leak-in-ext4_fc_record_modified_inode.patch ext4-fix-potential-memory-leak-in-ext4_fc_record_regions.patch ext4-make-ext4_lazyinit_thread-freezable.patch ext4-place-buffer-head-allocation-before-handle-start.patch ext4-unconditionally-enable-the-i_version-counter.patch ext4-update-state-fc_regions_size-after-successful-memory-allocation.patch --- diff --git a/queue-5.19/ext4-don-t-increase-iversion-counter-for-ea_inodes.patch b/queue-5.19/ext4-don-t-increase-iversion-counter-for-ea_inodes.patch new file mode 100644 index 00000000000..bffdf884806 --- /dev/null +++ b/queue-5.19/ext4-don-t-increase-iversion-counter-for-ea_inodes.patch @@ -0,0 +1,44 @@ +From 50f094a5580e6297bf10a807d16f0ee23fa576cf Mon Sep 17 00:00:00 2001 +From: Lukas Czerner +Date: Wed, 24 Aug 2022 18:03:47 +0200 +Subject: ext4: don't increase iversion counter for ea_inodes + +From: Lukas Czerner + +commit 50f094a5580e6297bf10a807d16f0ee23fa576cf upstream. + +ea_inodes are using i_version for storing part of the reference count so +we really need to leave it alone. + +The problem can be reproduced by xfstest ext4/026 when iversion is +enabled. Fix it by not calling inode_inc_iversion() for EXT4_EA_INODE_FL +inodes in ext4_mark_iloc_dirty(). + +Cc: stable@kernel.org +Signed-off-by: Lukas Czerner +Reviewed-by: Jan Kara +Reviewed-by: Jeff Layton +Reviewed-by: Christian Brauner (Microsoft) +Link: https://lore.kernel.org/r/20220824160349.39664-1-lczerner@redhat.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -5731,7 +5731,12 @@ int ext4_mark_iloc_dirty(handle_t *handl + } + ext4_fc_track_inode(handle, inode); + +- if (IS_I_VERSION(inode)) ++ /* ++ * ea_inodes are using i_version for storing reference count, don't ++ * mess with it ++ */ ++ if (IS_I_VERSION(inode) && ++ !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) + inode_inc_iversion(inode); + + /* the do_update_inode consumes one bh->b_count */ diff --git a/queue-5.19/ext4-ext4_read_bh_lock-should-submit-io-if-the-buffer-isn-t-uptodate.patch b/queue-5.19/ext4-ext4_read_bh_lock-should-submit-io-if-the-buffer-isn-t-uptodate.patch new file mode 100644 index 00000000000..562ecbfbc9e --- /dev/null +++ b/queue-5.19/ext4-ext4_read_bh_lock-should-submit-io-if-the-buffer-isn-t-uptodate.patch @@ -0,0 +1,80 @@ +From 0b73284c564d3ae4feef4bc920292f004acf4980 Mon Sep 17 00:00:00 2001 +From: Zhang Yi +Date: Wed, 31 Aug 2022 15:46:29 +0800 +Subject: ext4: ext4_read_bh_lock() should submit IO if the buffer isn't uptodate + +From: Zhang Yi + +commit 0b73284c564d3ae4feef4bc920292f004acf4980 upstream. + +Recently we notice that ext4 filesystem would occasionally fail to read +metadata from disk and report error message, but the disk and block +layer looks fine. After analyse, we lockon commit 88dbcbb3a484 +("blkdev: avoid migration stalls for blkdev pages"). It provide a +migration method for the bdev, we could move page that has buffers +without extra users now, but it lock the buffers on the page, which +breaks the fragile metadata read operation on ext4 filesystem, +ext4_read_bh_lock() was copied from ll_rw_block(), it depends on the +assumption of that locked buffer means it is under IO. So it just +trylock the buffer and skip submit IO if it lock failed, after +wait_on_buffer() we conclude IO error because the buffer is not +uptodate. + +This issue could be easily reproduced by add some delay just after +buffer_migrate_lock_buffers() in __buffer_migrate_folio() and do +fsstress on ext4 filesystem. + + EXT4-fs error (device pmem1): __ext4_find_entry:1658: inode #73193: + comm fsstress: reading directory lblock 0 + EXT4-fs error (device pmem1): __ext4_find_entry:1658: inode #75334: + comm fsstress: reading directory lblock 0 + +Fix it by removing the trylock logic in ext4_read_bh_lock(), just lock +the buffer and submit IO if it's not uptodate, and also leave over +readahead helper. + +Cc: stable@kernel.org +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220831074629.3755110-1-yi.zhang@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 16 +++++----------- + 1 file changed, 5 insertions(+), 11 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -205,19 +205,12 @@ int ext4_read_bh(struct buffer_head *bh, + + int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait) + { +- if (trylock_buffer(bh)) { +- if (wait) +- return ext4_read_bh(bh, op_flags, NULL); ++ lock_buffer(bh); ++ if (!wait) { + ext4_read_bh_nowait(bh, op_flags, NULL); + return 0; + } +- if (wait) { +- wait_on_buffer(bh); +- if (buffer_uptodate(bh)) +- return 0; +- return -EIO; +- } +- return 0; ++ return ext4_read_bh(bh, op_flags, NULL); + } + + /* +@@ -264,7 +257,8 @@ void ext4_sb_breadahead_unmovable(struct + struct buffer_head *bh = sb_getblk_gfp(sb, block, 0); + + if (likely(bh)) { +- ext4_read_bh_lock(bh, REQ_RAHEAD, false); ++ if (trylock_buffer(bh)) ++ ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL); + brelse(bh); + } + } diff --git a/queue-5.19/ext4-fix-check-for-block-being-out-of-directory-size.patch b/queue-5.19/ext4-fix-check-for-block-being-out-of-directory-size.patch new file mode 100644 index 00000000000..89defcbc12d --- /dev/null +++ b/queue-5.19/ext4-fix-check-for-block-being-out-of-directory-size.patch @@ -0,0 +1,36 @@ +From 61a1d87a324ad5e3ed27c6699dfc93218fcf3201 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 22 Aug 2022 13:48:32 +0200 +Subject: ext4: fix check for block being out of directory size + +From: Jan Kara + +commit 61a1d87a324ad5e3ed27c6699dfc93218fcf3201 upstream. + +The check in __ext4_read_dirblock() for block being outside of directory +size was wrong because it compared block number against directory size +in bytes. Fix it. + +Fixes: 65f8ea4cd57d ("ext4: check if directory block is within i_size") +CVE: CVE-2022-1184 +CC: stable@vger.kernel.org +Signed-off-by: Jan Kara +Reviewed-by: Lukas Czerner +Link: https://lore.kernel.org/r/20220822114832.1482-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -126,7 +126,7 @@ static struct buffer_head *__ext4_read_d + struct ext4_dir_entry *dirent; + int is_dx_block = 0; + +- if (block >= inode->i_size) { ++ if (block >= inode->i_size >> inode->i_blkbits) { + ext4_error_inode(inode, func, line, block, + "Attempting to read directory block (%u) that is past i_size (%llu)", + block, inode->i_size); diff --git a/queue-5.19/ext4-fix-dir-corruption-when-ext4_dx_add_entry-fails.patch b/queue-5.19/ext4-fix-dir-corruption-when-ext4_dx_add_entry-fails.patch new file mode 100644 index 00000000000..6080e865ddf --- /dev/null +++ b/queue-5.19/ext4-fix-dir-corruption-when-ext4_dx_add_entry-fails.patch @@ -0,0 +1,95 @@ +From 7177dd009c7c04290891e9a534cd47d1b620bd04 Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Sun, 11 Sep 2022 12:52:04 +0800 +Subject: ext4: fix dir corruption when ext4_dx_add_entry() fails + +From: Zhihao Cheng + +commit 7177dd009c7c04290891e9a534cd47d1b620bd04 upstream. + +Following process may lead to fs corruption: +1. ext4_create(dir/foo) + ext4_add_nondir + ext4_add_entry + ext4_dx_add_entry + a. add_dirent_to_buf + ext4_mark_inode_dirty + ext4_handle_dirty_metadata // dir inode bh is recorded into journal + b. ext4_append // dx_get_count(entries) == dx_get_limit(entries) + ext4_bread(EXT4_GET_BLOCKS_CREATE) + ext4_getblk + ext4_map_blocks + ext4_ext_map_blocks + ext4_mb_new_blocks + dquot_alloc_block + dquot_alloc_space_nodirty + inode_add_bytes // update dir's i_blocks + ext4_ext_insert_extent + ext4_ext_dirty // record extent bh into journal + ext4_handle_dirty_metadata(bh) + // record new block into journal + inode->i_size += inode->i_sb->s_blocksize // new size(in mem) + c. ext4_handle_dirty_dx_node(bh2) + // record dir's new block(dx_node) into journal + d. ext4_handle_dirty_dx_node((frame - 1)->bh) + e. ext4_handle_dirty_dx_node(frame->bh) + f. do_split // ret err! + g. add_dirent_to_buf + ext4_mark_inode_dirty(dir) // update raw_inode on disk(skipped) +2. fsck -a /dev/sdb + drop last block(dx_node) which beyonds dir's i_size. + /dev/sdb: recovering journal + /dev/sdb contains a file system with errors, check forced. + /dev/sdb: Inode 12, end of extent exceeds allowed value + (logical block 128, physical block 3938, len 1) +3. fsck -fn /dev/sdb + dx_node->entry[i].blk > dir->i_size + Pass 2: Checking directory structure + Problem in HTREE directory inode 12 (/dir): bad block number 128. + Clear HTree index? no + Problem in HTREE directory inode 12: block #3 has invalid depth (2) + Problem in HTREE directory inode 12: block #3 has bad max hash + Problem in HTREE directory inode 12: block #3 not referenced + +Fix it by marking inode dirty directly inside ext4_append(). +Fetch a reproducer in [Link]. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216466 +Cc: stable@vger.kernel.org +Signed-off-by: Zhihao Cheng +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220911045204.516460-1-chengzhihao1@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -85,15 +85,20 @@ static struct buffer_head *ext4_append(h + return bh; + inode->i_size += inode->i_sb->s_blocksize; + EXT4_I(inode)->i_disksize = inode->i_size; ++ err = ext4_mark_inode_dirty(handle, inode); ++ if (err) ++ goto out; + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, inode->i_sb, bh, + EXT4_JTR_NONE); +- if (err) { +- brelse(bh); +- ext4_std_error(inode->i_sb, err); +- return ERR_PTR(err); +- } ++ if (err) ++ goto out; + return bh; ++ ++out: ++ brelse(bh); ++ ext4_std_error(inode->i_sb, err); ++ return ERR_PTR(err); + } + + static int ext4_dx_csum_verify(struct inode *inode, diff --git a/queue-5.19/ext4-fix-i_version-handling-in-ext4.patch b/queue-5.19/ext4-fix-i_version-handling-in-ext4.patch new file mode 100644 index 00000000000..0078b6b5de1 --- /dev/null +++ b/queue-5.19/ext4-fix-i_version-handling-in-ext4.patch @@ -0,0 +1,131 @@ +From a642c2c0827f5604a93f9fa1e5701eecdce4ae22 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Thu, 8 Sep 2022 13:24:42 -0400 +Subject: ext4: fix i_version handling in ext4 + +From: Jeff Layton + +commit a642c2c0827f5604a93f9fa1e5701eecdce4ae22 upstream. + +ext4 currently updates the i_version counter when the atime is updated +during a read. This is less than ideal as it can cause unnecessary cache +invalidations with NFSv4 and unnecessary remeasurements for IMA. + +The increment in ext4_mark_iloc_dirty is also problematic since it can +corrupt the i_version counter for ea_inodes. We aren't bumping the file +times in ext4_mark_iloc_dirty, so changing the i_version there seems +wrong, and is the cause of both problems. + +Remove that callsite and add increments to the setattr, setxattr and +ioctl codepaths, at the same times that we update the ctime. The +i_version bump that already happens during timestamp updates should take +care of the rest. + +In ext4_move_extents, increment the i_version on both inodes, and also +add in missing ctime updates. + +[ Some minor updates since we've already enabled the i_version counter + unconditionally already via another patch series. -- TYT ] + +Cc: stable@kernel.org +Cc: Lukas Czerner +Reviewed-by: Jan Kara +Reviewed-by: Christian Brauner (Microsoft) +Signed-off-by: Jeff Layton +Link: https://lore.kernel.org/r/20220908172448.208585-3-jlayton@kernel.org +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 14 +++++--------- + fs/ext4/ioctl.c | 4 ++++ + fs/ext4/xattr.c | 1 + + 3 files changed, 10 insertions(+), 9 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -5347,6 +5347,7 @@ int ext4_setattr(struct user_namespace * + int error, rc = 0; + int orphan = 0; + const unsigned int ia_valid = attr->ia_valid; ++ bool inc_ivers = true; + + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; +@@ -5432,8 +5433,8 @@ int ext4_setattr(struct user_namespace * + return -EINVAL; + } + +- if (attr->ia_size != inode->i_size) +- inode_inc_iversion(inode); ++ if (attr->ia_size == inode->i_size) ++ inc_ivers = false; + + if (shrink) { + if (ext4_should_order_data(inode)) { +@@ -5535,6 +5536,8 @@ out_mmap_sem: + } + + if (!error) { ++ if (inc_ivers) ++ inode_inc_iversion(inode); + setattr_copy(mnt_userns, inode, attr); + mark_inode_dirty(inode); + } +@@ -5738,13 +5741,6 @@ int ext4_mark_iloc_dirty(handle_t *handl + } + ext4_fc_track_inode(handle, inode); + +- /* +- * ea_inodes are using i_version for storing reference count, don't +- * mess with it +- */ +- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) +- inode_inc_iversion(inode); +- + /* the do_update_inode consumes one bh->b_count */ + get_bh(iloc->bh); + +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -442,6 +442,7 @@ static long swap_inode_boot_loader(struc + swap_inode_data(inode, inode_bl); + + inode->i_ctime = inode_bl->i_ctime = current_time(inode); ++ inode_inc_iversion(inode); + + inode->i_generation = prandom_u32(); + inode_bl->i_generation = prandom_u32(); +@@ -655,6 +656,7 @@ static int ext4_ioctl_setflags(struct in + ext4_set_inode_flags(inode, false); + + inode->i_ctime = current_time(inode); ++ inode_inc_iversion(inode); + + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + flags_err: +@@ -765,6 +767,7 @@ static int ext4_ioctl_setproject(struct + + EXT4_I(inode)->i_projid = kprojid; + inode->i_ctime = current_time(inode); ++ inode_inc_iversion(inode); + out_dirty: + rc = ext4_mark_iloc_dirty(handle, inode, &iloc); + if (!err) +@@ -1178,6 +1181,7 @@ static long __ext4_ioctl(struct file *fi + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err == 0) { + inode->i_ctime = current_time(inode); ++ inode_inc_iversion(inode); + inode->i_generation = generation; + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + } +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -2412,6 +2412,7 @@ retry_inode: + if (!error) { + ext4_xattr_update_super_block(handle, inode->i_sb); + inode->i_ctime = current_time(inode); ++ inode_inc_iversion(inode); + if (!value) + no_expand = 0; + error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); diff --git a/queue-5.19/ext4-fix-miss-release-buffer-head-in-ext4_fc_write_inode.patch b/queue-5.19/ext4-fix-miss-release-buffer-head-in-ext4_fc_write_inode.patch new file mode 100644 index 00000000000..21bcdf24557 --- /dev/null +++ b/queue-5.19/ext4-fix-miss-release-buffer-head-in-ext4_fc_write_inode.patch @@ -0,0 +1,57 @@ +From ccbf8eeb39f2ff00b54726a2b20b35d788c4ecb5 Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Wed, 14 Sep 2022 18:08:59 +0800 +Subject: ext4: fix miss release buffer head in ext4_fc_write_inode + +From: Ye Bin + +commit ccbf8eeb39f2ff00b54726a2b20b35d788c4ecb5 upstream. + +In 'ext4_fc_write_inode' function first call 'ext4_get_inode_loc' get 'iloc', +after use it miss release 'iloc.bh'. +So just release 'iloc.bh' before 'ext4_fc_write_inode' return. + +Cc: stable@kernel.org +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220914100859.1415196-1-yebin10@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/fast_commit.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -874,22 +874,25 @@ static int ext4_fc_write_inode(struct in + tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); + tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); + ++ ret = -ECANCELED; + dst = ext4_fc_reserve_space(inode->i_sb, + sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); + if (!dst) +- return -ECANCELED; ++ goto err; + + if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) +- return -ECANCELED; ++ goto err; + dst += sizeof(tl); + if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) +- return -ECANCELED; ++ goto err; + dst += sizeof(fc_inode); + if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), + inode_len, crc)) +- return -ECANCELED; +- +- return 0; ++ goto err; ++ ret = 0; ++err: ++ brelse(iloc.bh); ++ return ret; + } + + /* diff --git a/queue-5.19/ext4-fix-null-ptr-deref-in-ext4_write_info.patch b/queue-5.19/ext4-fix-null-ptr-deref-in-ext4_write_info.patch new file mode 100644 index 00000000000..444f32ede95 --- /dev/null +++ b/queue-5.19/ext4-fix-null-ptr-deref-in-ext4_write_info.patch @@ -0,0 +1,79 @@ +From f9c1f248607d5546075d3f731e7607d5571f2b60 Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Fri, 5 Aug 2022 20:39:47 +0800 +Subject: ext4: fix null-ptr-deref in ext4_write_info + +From: Baokun Li + +commit f9c1f248607d5546075d3f731e7607d5571f2b60 upstream. + +I caught a null-ptr-deref bug as follows: +================================================================== +KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] +CPU: 1 PID: 1589 Comm: umount Not tainted 5.10.0-02219-dirty #339 +RIP: 0010:ext4_write_info+0x53/0x1b0 +[...] +Call Trace: + dquot_writeback_dquots+0x341/0x9a0 + ext4_sync_fs+0x19e/0x800 + __sync_filesystem+0x83/0x100 + sync_filesystem+0x89/0xf0 + generic_shutdown_super+0x79/0x3e0 + kill_block_super+0xa1/0x110 + deactivate_locked_super+0xac/0x130 + deactivate_super+0xb6/0xd0 + cleanup_mnt+0x289/0x400 + __cleanup_mnt+0x16/0x20 + task_work_run+0x11c/0x1c0 + exit_to_user_mode_prepare+0x203/0x210 + syscall_exit_to_user_mode+0x5b/0x3a0 + do_syscall_64+0x59/0x70 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + ================================================================== + +Above issue may happen as follows: +------------------------------------- +exit_to_user_mode_prepare + task_work_run + __cleanup_mnt + cleanup_mnt + deactivate_super + deactivate_locked_super + kill_block_super + generic_shutdown_super + shrink_dcache_for_umount + dentry = sb->s_root + sb->s_root = NULL <--- Here set NULL + sync_filesystem + __sync_filesystem + sb->s_op->sync_fs > ext4_sync_fs + dquot_writeback_dquots + sb->dq_op->write_info > ext4_write_info + ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2) + d_inode(sb->s_root) + s_root->d_inode <--- Null pointer dereference + +To solve this problem, we use ext4_journal_start_sb directly +to avoid s_root being used. + +Cc: stable@kernel.org +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220805123947.565152-1-libaokun1@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -6643,7 +6643,7 @@ static int ext4_write_info(struct super_ + handle_t *handle; + + /* Data block + inode block */ +- handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2); ++ handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_commit_info(sb, type); diff --git a/queue-5.19/ext4-fix-potential-memory-leak-in-ext4_fc_record_modified_inode.patch b/queue-5.19/ext4-fix-potential-memory-leak-in-ext4_fc_record_modified_inode.patch new file mode 100644 index 00000000000..e4a308ba81b --- /dev/null +++ b/queue-5.19/ext4-fix-potential-memory-leak-in-ext4_fc_record_modified_inode.patch @@ -0,0 +1,44 @@ +From 9305721a309fa1bd7c194e0d4a2335bf3b29dca4 Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Wed, 21 Sep 2022 14:40:38 +0800 +Subject: ext4: fix potential memory leak in ext4_fc_record_modified_inode() + +From: Ye Bin + +commit 9305721a309fa1bd7c194e0d4a2335bf3b29dca4 upstream. + +As krealloc may return NULL, in this case 'state->fc_modified_inodes' +may not be freed by krealloc, but 'state->fc_modified_inodes' already +set NULL. Then will lead to 'state->fc_modified_inodes' memory leak. + +Cc: stable@kernel.org +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220921064040.3693255-2-yebin10@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/fast_commit.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -1494,13 +1494,15 @@ static int ext4_fc_record_modified_inode + if (state->fc_modified_inodes[i] == ino) + return 0; + if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { +- state->fc_modified_inodes = krealloc( +- state->fc_modified_inodes, ++ int *fc_modified_inodes; ++ ++ fc_modified_inodes = krealloc(state->fc_modified_inodes, + sizeof(int) * (state->fc_modified_inodes_size + + EXT4_FC_REPLAY_REALLOC_INCREMENT), + GFP_KERNEL); +- if (!state->fc_modified_inodes) ++ if (!fc_modified_inodes) + return -ENOMEM; ++ state->fc_modified_inodes = fc_modified_inodes; + state->fc_modified_inodes_size += + EXT4_FC_REPLAY_REALLOC_INCREMENT; + } diff --git a/queue-5.19/ext4-fix-potential-memory-leak-in-ext4_fc_record_regions.patch b/queue-5.19/ext4-fix-potential-memory-leak-in-ext4_fc_record_regions.patch new file mode 100644 index 00000000000..02ac8668918 --- /dev/null +++ b/queue-5.19/ext4-fix-potential-memory-leak-in-ext4_fc_record_regions.patch @@ -0,0 +1,49 @@ +From 7069d105c1f15c442b68af43f7fde784f3126739 Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Wed, 21 Sep 2022 14:40:39 +0800 +Subject: ext4: fix potential memory leak in ext4_fc_record_regions() + +From: Ye Bin + +commit 7069d105c1f15c442b68af43f7fde784f3126739 upstream. + +As krealloc may return NULL, in this case 'state->fc_regions' may not be +freed by krealloc, but 'state->fc_regions' already set NULL. Then will +lead to 'state->fc_regions' memory leak. + +Cc: stable@kernel.org +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220921064040.3693255-3-yebin10@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/fast_commit.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -1687,15 +1687,17 @@ int ext4_fc_record_regions(struct super_ + if (replay && state->fc_regions_used != state->fc_regions_valid) + state->fc_regions_used = state->fc_regions_valid; + if (state->fc_regions_used == state->fc_regions_size) { ++ struct ext4_fc_alloc_region *fc_regions; ++ + state->fc_regions_size += + EXT4_FC_REPLAY_REALLOC_INCREMENT; +- state->fc_regions = krealloc( +- state->fc_regions, +- state->fc_regions_size * +- sizeof(struct ext4_fc_alloc_region), +- GFP_KERNEL); +- if (!state->fc_regions) ++ fc_regions = krealloc(state->fc_regions, ++ state->fc_regions_size * ++ sizeof(struct ext4_fc_alloc_region), ++ GFP_KERNEL); ++ if (!fc_regions) + return -ENOMEM; ++ state->fc_regions = fc_regions; + } + region = &state->fc_regions[state->fc_regions_used++]; + region->ino = ino; diff --git a/queue-5.19/ext4-make-ext4_lazyinit_thread-freezable.patch b/queue-5.19/ext4-make-ext4_lazyinit_thread-freezable.patch new file mode 100644 index 00000000000..e51a4aad31f --- /dev/null +++ b/queue-5.19/ext4-make-ext4_lazyinit_thread-freezable.patch @@ -0,0 +1,32 @@ +From 3b575495ab8dbb4dbe85b4ac7f991693c3668ff5 Mon Sep 17 00:00:00 2001 +From: Lalith Rajendran +Date: Thu, 18 Aug 2022 21:40:49 +0000 +Subject: ext4: make ext4_lazyinit_thread freezable + +From: Lalith Rajendran + +commit 3b575495ab8dbb4dbe85b4ac7f991693c3668ff5 upstream. + +ext4_lazyinit_thread is not set freezable. Hence when the thread calls +try_to_freeze it doesn't freeze during suspend and continues to send +requests to the storage during suspend, resulting in suspend failures. + +Cc: stable@kernel.org +Signed-off-by: Lalith Rajendran +Link: https://lore.kernel.org/r/20220818214049.1519544-1-lalithkraj@google.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -3758,6 +3758,7 @@ static int ext4_lazyinit_thread(void *ar + unsigned long next_wakeup, cur; + + BUG_ON(NULL == eli); ++ set_freezable(); + + cont_thread: + while (true) { diff --git a/queue-5.19/ext4-place-buffer-head-allocation-before-handle-start.patch b/queue-5.19/ext4-place-buffer-head-allocation-before-handle-start.patch new file mode 100644 index 00000000000..bde270737f8 --- /dev/null +++ b/queue-5.19/ext4-place-buffer-head-allocation-before-handle-start.patch @@ -0,0 +1,49 @@ +From d1052d236eddf6aa851434db1897b942e8db9921 Mon Sep 17 00:00:00 2001 +From: Jinke Han +Date: Sat, 3 Sep 2022 09:24:29 +0800 +Subject: ext4: place buffer head allocation before handle start + +From: Jinke Han + +commit d1052d236eddf6aa851434db1897b942e8db9921 upstream. + +In our product environment, we encounter some jbd hung waiting handles to +stop while several writters were doing memory reclaim for buffer head +allocation in delay alloc write path. Ext4 do buffer head allocation with +holding transaction handle which may be blocked too long if the reclaim +works not so smooth. According to our bcc trace, the reclaim time in +buffer head allocation can reach 258s and the jbd transaction commit also +take almost the same time meanwhile. Except for these extreme cases, +we often see several seconds delays for cgroup memory reclaim on our +servers. This is more likely to happen considering docker environment. + +One thing to note, the allocation of buffer heads is as often as page +allocation or more often when blocksize less than page size. Just like +page cache allocation, we should also place the buffer head allocation +before startting the handle. + +Cc: stable@kernel.org +Signed-off-by: Jinke Han +Link: https://lore.kernel.org/r/20220903012429.22555-1-hanjinke.666@bytedance.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1188,6 +1188,13 @@ retry_grab: + page = grab_cache_page_write_begin(mapping, index); + if (!page) + return -ENOMEM; ++ /* ++ * The same as page allocation, we prealloc buffer heads before ++ * starting the handle. ++ */ ++ if (!page_has_buffers(page)) ++ create_empty_buffers(page, inode->i_sb->s_blocksize, 0); ++ + unlock_page(page); + + retry_journal: diff --git a/queue-5.19/ext4-unconditionally-enable-the-i_version-counter.patch b/queue-5.19/ext4-unconditionally-enable-the-i_version-counter.patch new file mode 100644 index 00000000000..03e77afa2f9 --- /dev/null +++ b/queue-5.19/ext4-unconditionally-enable-the-i_version-counter.patch @@ -0,0 +1,129 @@ +From 1ff20307393e17dc57fde62226df625a3a3c36e9 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Wed, 24 Aug 2022 18:03:49 +0200 +Subject: ext4: unconditionally enable the i_version counter + +From: Jeff Layton + +commit 1ff20307393e17dc57fde62226df625a3a3c36e9 upstream. + +The original i_version implementation was pretty expensive, requiring a +log flush on every change. Because of this, it was gated behind a mount +option (implemented via the MS_I_VERSION mountoption flag). + +Commit ae5e165d855d (fs: new API for handling inode->i_version) made the +i_version flag much less expensive, so there is no longer a performance +penalty from enabling it. xfs and btrfs already enable it +unconditionally when the on-disk format can support it. + +Have ext4 ignore the SB_I_VERSION flag, and just enable it +unconditionally. While we're in here, mark the i_version mount +option Opt_removed. + +[ Removed leftover bits of i_version from ext4_apply_options() since it + now can't ever be set in ctx->mask_s_flags -- lczerner ] + +Cc: stable@kernel.org +Cc: Dave Chinner +Cc: Benjamin Coddington +Cc: Christoph Hellwig +Cc: Darrick J. Wong +Signed-off-by: Jeff Layton +Signed-off-by: Lukas Czerner +Reviewed-by: Christian Brauner (Microsoft) +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220824160349.39664-3-lczerner@redhat.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 5 ++--- + fs/ext4/super.c | 22 +++++----------------- + 2 files changed, 7 insertions(+), 20 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -5425,7 +5425,7 @@ int ext4_setattr(struct user_namespace * + return -EINVAL; + } + +- if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) ++ if (attr->ia_size != inode->i_size) + inode_inc_iversion(inode); + + if (shrink) { +@@ -5735,8 +5735,7 @@ int ext4_mark_iloc_dirty(handle_t *handl + * ea_inodes are using i_version for storing reference count, don't + * mess with it + */ +- if (IS_I_VERSION(inode) && +- !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) ++ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) + inode_inc_iversion(inode); + + /* the do_update_inode consumes one bh->b_count */ +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1585,7 +1585,7 @@ enum { + Opt_inlinecrypt, + Opt_usrjquota, Opt_grpjquota, Opt_quota, + Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, +- Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, ++ Opt_usrquota, Opt_grpquota, Opt_prjquota, + Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never, + Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, + Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize, +@@ -1694,7 +1694,7 @@ static const struct fs_parameter_spec ex + fsparam_flag ("barrier", Opt_barrier), + fsparam_u32 ("barrier", Opt_barrier), + fsparam_flag ("nobarrier", Opt_nobarrier), +- fsparam_flag ("i_version", Opt_i_version), ++ fsparam_flag ("i_version", Opt_removed), + fsparam_flag ("dax", Opt_dax), + fsparam_enum ("dax", Opt_dax_type, ext4_param_dax), + fsparam_u32 ("stripe", Opt_stripe), +@@ -2140,11 +2140,6 @@ static int ext4_parse_param(struct fs_co + case Opt_abort: + ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); + return 0; +- case Opt_i_version: +- ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); +- ext4_msg(NULL, KERN_WARNING, "Use iversion instead\n"); +- ctx_set_flags(ctx, SB_I_VERSION); +- return 0; + case Opt_inlinecrypt: + #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + ctx_set_flags(ctx, SB_INLINECRYPT); +@@ -2814,14 +2809,6 @@ static void ext4_apply_options(struct fs + sb->s_flags &= ~ctx->mask_s_flags; + sb->s_flags |= ctx->vals_s_flags; + +- /* +- * i_version differs from common mount option iversion so we have +- * to let vfs know that it was set, otherwise it would get cleared +- * on remount +- */ +- if (ctx->mask_s_flags & SB_I_VERSION) +- fc->sb_flags |= SB_I_VERSION; +- + #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; }) + APPLY(s_commit_interval); + APPLY(s_stripe); +@@ -2970,8 +2957,6 @@ static int _ext4_show_options(struct seq + SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); + if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) + SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); +- if (sb->s_flags & SB_I_VERSION) +- SEQ_OPTS_PUTS("i_version"); + if (nodefs || sbi->s_stripe) + SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); + if (nodefs || EXT4_MOUNT_DATA_FLAGS & +@@ -4631,6 +4616,9 @@ static int __ext4_fill_super(struct fs_c + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); + ++ /* i_version is always enabled now */ ++ sb->s_flags |= SB_I_VERSION; ++ + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && + (ext4_has_compat_features(sb) || + ext4_has_ro_compat_features(sb) || diff --git a/queue-5.19/ext4-update-state-fc_regions_size-after-successful-memory-allocation.patch b/queue-5.19/ext4-update-state-fc_regions_size-after-successful-memory-allocation.patch new file mode 100644 index 00000000000..5f820931fae --- /dev/null +++ b/queue-5.19/ext4-update-state-fc_regions_size-after-successful-memory-allocation.patch @@ -0,0 +1,45 @@ +From 27cd49780381c6ccbf248798e5e8fd076200ffba Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Wed, 21 Sep 2022 14:40:40 +0800 +Subject: ext4: update 'state->fc_regions_size' after successful memory allocation + +From: Ye Bin + +commit 27cd49780381c6ccbf248798e5e8fd076200ffba upstream. + +To avoid to 'state->fc_regions_size' mismatch with 'state->fc_regions' +when fail to reallocate 'fc_reqions',only update 'state->fc_regions_size' +after 'state->fc_regions' is allocated successfully. + +Cc: stable@kernel.org +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220921064040.3693255-4-yebin10@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/fast_commit.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -1689,14 +1689,15 @@ int ext4_fc_record_regions(struct super_ + if (state->fc_regions_used == state->fc_regions_size) { + struct ext4_fc_alloc_region *fc_regions; + +- state->fc_regions_size += +- EXT4_FC_REPLAY_REALLOC_INCREMENT; + fc_regions = krealloc(state->fc_regions, +- state->fc_regions_size * +- sizeof(struct ext4_fc_alloc_region), ++ sizeof(struct ext4_fc_alloc_region) * ++ (state->fc_regions_size + ++ EXT4_FC_REPLAY_REALLOC_INCREMENT), + GFP_KERNEL); + if (!fc_regions) + return -ENOMEM; ++ state->fc_regions_size += ++ EXT4_FC_REPLAY_REALLOC_INCREMENT; + state->fc_regions = fc_regions; + } + region = &state->fc_regions[state->fc_regions_used++]; diff --git a/queue-5.19/series b/queue-5.19/series index 2b5ef1dd304..a0b70cdb246 100644 --- a/queue-5.19/series +++ b/queue-5.19/series @@ -121,3 +121,16 @@ jbd2-fix-potential-use-after-free-in-jbd2_fc_wait_bufs.patch jbd2-add-miss-release-buffer-head-in-fc_do_one_pass.patch ext2-add-sanity-checks-for-group-and-filesystem-size.patch ext4-avoid-crash-when-inline-data-creation-follows-dio-write.patch +ext4-fix-null-ptr-deref-in-ext4_write_info.patch +ext4-make-ext4_lazyinit_thread-freezable.patch +ext4-fix-check-for-block-being-out-of-directory-size.patch +ext4-don-t-increase-iversion-counter-for-ea_inodes.patch +ext4-unconditionally-enable-the-i_version-counter.patch +ext4-ext4_read_bh_lock-should-submit-io-if-the-buffer-isn-t-uptodate.patch +ext4-place-buffer-head-allocation-before-handle-start.patch +ext4-fix-i_version-handling-in-ext4.patch +ext4-fix-dir-corruption-when-ext4_dx_add_entry-fails.patch +ext4-fix-miss-release-buffer-head-in-ext4_fc_write_inode.patch +ext4-fix-potential-memory-leak-in-ext4_fc_record_modified_inode.patch +ext4-fix-potential-memory-leak-in-ext4_fc_record_regions.patch +ext4-update-state-fc_regions_size-after-successful-memory-allocation.patch