From: Greg Kroah-Hartman Date: Mon, 6 Jun 2022 12:16:41 +0000 (+0200) Subject: 5.17-stable patches X-Git-Tag: v5.10.121~100 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f4aaac4cf74d411ed0e7bd6164fa07bc98a54bd5;p=thirdparty%2Fkernel%2Fstable-queue.git 5.17-stable patches added patches: acpi-property-release-subnode-properties-with-data-nodes.patch ext4-avoid-cycles-in-directory-h-tree.patch ext4-filter-out-ext4_fc_replay-from-on-disk-superblock-field-s_state.patch ext4-fix-bug_on-in-__es_tree_search.patch ext4-fix-bug_on-in-ext4_writepages.patch ext4-fix-journal_ioprio-mount-option-handling.patch ext4-fix-memory-leak-in-parse_apply_sb_mount_options.patch ext4-fix-race-condition-between-ext4_write-and-ext4_convert_inline_data.patch ext4-fix-use-after-free-in-ext4_rename_dir_prepare.patch ext4-fix-warning-in-ext4_handle_inode_extension.patch ext4-mark-group-as-trimmed-only-if-it-was-fully-scanned.patch ext4-verify-dir-block-before-splitting-it.patch --- diff --git a/queue-5.17/acpi-property-release-subnode-properties-with-data-nodes.patch b/queue-5.17/acpi-property-release-subnode-properties-with-data-nodes.patch new file mode 100644 index 00000000000..47ba0775d7d --- /dev/null +++ b/queue-5.17/acpi-property-release-subnode-properties-with-data-nodes.patch @@ -0,0 +1,70 @@ +From 3bd561e1572ee02a50cd1a5be339abf1a5b78d56 Mon Sep 17 00:00:00 2001 +From: Sakari Ailus +Date: Wed, 6 Apr 2022 16:12:08 +0300 +Subject: ACPI: property: Release subnode properties with data nodes + +From: Sakari Ailus + +commit 3bd561e1572ee02a50cd1a5be339abf1a5b78d56 upstream. + +struct acpi_device_properties describes one source of properties present +on either struct acpi_device or struct acpi_data_node. When properties are +parsed, both are populated but when released, only those properties that +are associated with the device node are freed. + +Fix this by also releasing memory of the data node properties. + +Fixes: 5f5e4890d57a ("ACPI / property: Allow multiple property compatible _DSD entries") +Cc: 4.20+ # 4.20+ +Signed-off-by: Sakari Ailus +Reviewed-by: Andy Shevchenko +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/property.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/drivers/acpi/property.c ++++ b/drivers/acpi/property.c +@@ -433,6 +433,16 @@ void acpi_init_properties(struct acpi_de + acpi_extract_apple_properties(adev); + } + ++static void acpi_free_device_properties(struct list_head *list) ++{ ++ struct acpi_device_properties *props, *tmp; ++ ++ list_for_each_entry_safe(props, tmp, list, list) { ++ list_del(&props->list); ++ kfree(props); ++ } ++} ++ + static void acpi_destroy_nondev_subnodes(struct list_head *list) + { + struct acpi_data_node *dn, *next; +@@ -445,22 +455,18 @@ static void acpi_destroy_nondev_subnodes + wait_for_completion(&dn->kobj_done); + list_del(&dn->sibling); + ACPI_FREE((void *)dn->data.pointer); ++ acpi_free_device_properties(&dn->data.properties); + kfree(dn); + } + } + + void acpi_free_properties(struct acpi_device *adev) + { +- struct acpi_device_properties *props, *tmp; +- + acpi_destroy_nondev_subnodes(&adev->data.subnodes); + ACPI_FREE((void *)adev->data.pointer); + adev->data.of_compatible = NULL; + adev->data.pointer = NULL; +- list_for_each_entry_safe(props, tmp, &adev->data.properties, list) { +- list_del(&props->list); +- kfree(props); +- } ++ acpi_free_device_properties(&adev->data.properties); + } + + /** diff --git a/queue-5.17/ext4-avoid-cycles-in-directory-h-tree.patch b/queue-5.17/ext4-avoid-cycles-in-directory-h-tree.patch new file mode 100644 index 00000000000..284b752e665 --- /dev/null +++ b/queue-5.17/ext4-avoid-cycles-in-directory-h-tree.patch @@ -0,0 +1,81 @@ +From 3ba733f879c2a88910744647e41edeefbc0d92b2 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Wed, 18 May 2022 11:33:29 +0200 +Subject: ext4: avoid cycles in directory h-tree + +From: Jan Kara + +commit 3ba733f879c2a88910744647e41edeefbc0d92b2 upstream. + +A maliciously corrupted filesystem can contain cycles in the h-tree +stored inside a directory. That can easily lead to the kernel corrupting +tree nodes that were already verified under its hands while doing a node +split and consequently accessing unallocated memory. Fix the problem by +verifying traversed block numbers are unique. + +Cc: stable@vger.kernel.org +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220518093332.13986-2-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 22 +++++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -777,12 +777,14 @@ static struct dx_frame * + dx_probe(struct ext4_filename *fname, struct inode *dir, + struct dx_hash_info *hinfo, struct dx_frame *frame_in) + { +- unsigned count, indirect; ++ unsigned count, indirect, level, i; + struct dx_entry *at, *entries, *p, *q, *m; + struct dx_root *root; + struct dx_frame *frame = frame_in; + struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); + u32 hash; ++ ext4_lblk_t block; ++ ext4_lblk_t blocks[EXT4_HTREE_LEVEL]; + + memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); + frame->bh = ext4_read_dirblock(dir, 0, INDEX); +@@ -854,6 +856,8 @@ dx_probe(struct ext4_filename *fname, st + } + + dxtrace(printk("Look up %x", hash)); ++ level = 0; ++ blocks[0] = 0; + while (1) { + count = dx_get_count(entries); + if (!count || count > dx_get_limit(entries)) { +@@ -882,15 +886,27 @@ dx_probe(struct ext4_filename *fname, st + dx_get_block(at))); + frame->entries = entries; + frame->at = at; +- if (!indirect--) ++ ++ block = dx_get_block(at); ++ for (i = 0; i <= level; i++) { ++ if (blocks[i] == block) { ++ ext4_warning_inode(dir, ++ "dx entry: tree cycle block %u points back to block %u", ++ blocks[level], block); ++ goto fail; ++ } ++ } ++ if (++level > indirect) + return frame; ++ blocks[level] = block; + frame++; +- frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); ++ frame->bh = ext4_read_dirblock(dir, block, INDEX); + if (IS_ERR(frame->bh)) { + ret_err = (struct dx_frame *) frame->bh; + frame->bh = NULL; + goto fail; + } ++ + entries = ((struct dx_node *) frame->bh->b_data)->entries; + + if (dx_get_limit(entries) != dx_node_limit(dir)) { diff --git a/queue-5.17/ext4-filter-out-ext4_fc_replay-from-on-disk-superblock-field-s_state.patch b/queue-5.17/ext4-filter-out-ext4_fc_replay-from-on-disk-superblock-field-s_state.patch new file mode 100644 index 00000000000..2eca9c78142 --- /dev/null +++ b/queue-5.17/ext4-filter-out-ext4_fc_replay-from-on-disk-superblock-field-s_state.patch @@ -0,0 +1,56 @@ +From c878bea3c9d724ddfa05a813f30de3d25a0ba83f Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Tue, 17 May 2022 13:27:55 -0400 +Subject: ext4: filter out EXT4_FC_REPLAY from on-disk superblock field s_state + +From: Theodore Ts'o + +commit c878bea3c9d724ddfa05a813f30de3d25a0ba83f upstream. + +The EXT4_FC_REPLAY bit in sbi->s_mount_state is used to indicate that +we are in the middle of replay the fast commit journal. This was +actually a mistake, since the sbi->s_mount_info is initialized from +es->s_state. Arguably s_mount_state is misleadingly named, but the +name is historical --- s_mount_state and s_state dates back to ext2. + +What should have been used is the ext4_{set,clear,test}_mount_flag() +inline functions, which sets EXT4_MF_* bits in sbi->s_mount_flags. + +The problem with using EXT4_FC_REPLAY is that a maliciously corrupted +superblock could result in EXT4_FC_REPLAY getting set in +s_mount_state. This bypasses some sanity checks, and this can trigger +a BUG() in ext4_es_cache_extent(). As a easy-to-backport-fix, filter +out the EXT4_FC_REPLAY bit for now. We should eventually transition +away from EXT4_FC_REPLAY to something like EXT4_MF_REPLAY. + +Cc: stable@kernel.org +Signed-off-by: Theodore Ts'o +Link: https://lore.kernel.org/r/20220420192312.1655305-1-phind.uet@gmail.com +Link: https://lore.kernel.org/r/20220517174028.942119-1-tytso@mit.edu +Reported-by: syzbot+c7358a3cd05ee786eb31@syzkaller.appspotmail.com +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4874,7 +4874,7 @@ static int __ext4_fill_super(struct fs_c + sbi->s_inodes_per_block; + sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); + sbi->s_sbh = bh; +- sbi->s_mount_state = le16_to_cpu(es->s_state); ++ sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY; + sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); + sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); + +@@ -6437,7 +6437,8 @@ static int __ext4_remount(struct fs_cont + if (err) + goto restore_opts; + } +- sbi->s_mount_state = le16_to_cpu(es->s_state); ++ sbi->s_mount_state = (le16_to_cpu(es->s_state) & ++ ~EXT4_FC_REPLAY); + + err = ext4_setup_super(sb, es, 0); + if (err) diff --git a/queue-5.17/ext4-fix-bug_on-in-__es_tree_search.patch b/queue-5.17/ext4-fix-bug_on-in-__es_tree_search.patch new file mode 100644 index 00000000000..100d16a3cb3 --- /dev/null +++ b/queue-5.17/ext4-fix-bug_on-in-__es_tree_search.patch @@ -0,0 +1,138 @@ +From d36f6ed761b53933b0b4126486c10d3da7751e7f Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Wed, 18 May 2022 20:08:16 +0800 +Subject: ext4: fix bug_on in __es_tree_search + +From: Baokun Li + +commit d36f6ed761b53933b0b4126486c10d3da7751e7f upstream. + +Hulk Robot reported a BUG_ON: +================================================================== +kernel BUG at fs/ext4/extents_status.c:199! +[...] +RIP: 0010:ext4_es_end fs/ext4/extents_status.c:199 [inline] +RIP: 0010:__es_tree_search+0x1e0/0x260 fs/ext4/extents_status.c:217 +[...] +Call Trace: + ext4_es_cache_extent+0x109/0x340 fs/ext4/extents_status.c:766 + ext4_cache_extents+0x239/0x2e0 fs/ext4/extents.c:561 + ext4_find_extent+0x6b7/0xa20 fs/ext4/extents.c:964 + ext4_ext_map_blocks+0x16b/0x4b70 fs/ext4/extents.c:4384 + ext4_map_blocks+0xe26/0x19f0 fs/ext4/inode.c:567 + ext4_getblk+0x320/0x4c0 fs/ext4/inode.c:980 + ext4_bread+0x2d/0x170 fs/ext4/inode.c:1031 + ext4_quota_read+0x248/0x320 fs/ext4/super.c:6257 + v2_read_header+0x78/0x110 fs/quota/quota_v2.c:63 + v2_check_quota_file+0x76/0x230 fs/quota/quota_v2.c:82 + vfs_load_quota_inode+0x5d1/0x1530 fs/quota/dquot.c:2368 + dquot_enable+0x28a/0x330 fs/quota/dquot.c:2490 + ext4_quota_enable fs/ext4/super.c:6137 [inline] + ext4_enable_quotas+0x5d7/0x960 fs/ext4/super.c:6163 + ext4_fill_super+0xa7c9/0xdc00 fs/ext4/super.c:4754 + mount_bdev+0x2e9/0x3b0 fs/super.c:1158 + mount_fs+0x4b/0x1e4 fs/super.c:1261 +[...] +================================================================== + +Above issue may happen as follows: +------------------------------------- +ext4_fill_super + ext4_enable_quotas + ext4_quota_enable + ext4_iget + __ext4_iget + ext4_ext_check_inode + ext4_ext_check + __ext4_ext_check + ext4_valid_extent_entries + Check for overlapping extents does't take effect + dquot_enable + vfs_load_quota_inode + v2_check_quota_file + v2_read_header + ext4_quota_read + ext4_bread + ext4_getblk + ext4_map_blocks + ext4_ext_map_blocks + ext4_find_extent + ext4_cache_extents + ext4_es_cache_extent + ext4_es_cache_extent + __es_tree_search + ext4_es_end + BUG_ON(es->es_lblk + es->es_len < es->es_lblk) + +The error ext4 extents is as follows: +0af3 0300 0400 0000 00000000 extent_header +00000000 0100 0000 12000000 extent1 +00000000 0100 0000 18000000 extent2 +02000000 0400 0000 14000000 extent3 + +In the ext4_valid_extent_entries function, +if prev is 0, no error is returned even if lblock<=prev. +This was intended to skip the check on the first extent, but +in the error image above, prev=0+1-1=0 when checking the second extent, +so even though lblock<=prev, the function does not return an error. +As a result, bug_ON occurs in __es_tree_search and the system panics. + +To solve this problem, we only need to check that: +1. The lblock of the first extent is not less than 0. +2. The lblock of the next extent is not less than + the next block of the previous extent. +The same applies to extent_idx. + +Cc: stable@kernel.org +Fixes: 5946d089379a ("ext4: check for overlapping extents in ext4_valid_extent_entries()") +Reported-by: Hulk Robot +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220518120816.1541863-1-libaokun1@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -372,7 +372,7 @@ static int ext4_valid_extent_entries(str + { + unsigned short entries; + ext4_lblk_t lblock = 0; +- ext4_lblk_t prev = 0; ++ ext4_lblk_t cur = 0; + + if (eh->eh_entries == 0) + return 1; +@@ -396,11 +396,11 @@ static int ext4_valid_extent_entries(str + + /* Check for overlapping extents */ + lblock = le32_to_cpu(ext->ee_block); +- if ((lblock <= prev) && prev) { ++ if (lblock < cur) { + *pblk = ext4_ext_pblock(ext); + return 0; + } +- prev = lblock + ext4_ext_get_actual_len(ext) - 1; ++ cur = lblock + ext4_ext_get_actual_len(ext); + ext++; + entries--; + } +@@ -420,13 +420,13 @@ static int ext4_valid_extent_entries(str + + /* Check for overlapping index extents */ + lblock = le32_to_cpu(ext_idx->ei_block); +- if ((lblock <= prev) && prev) { ++ if (lblock < cur) { + *pblk = ext4_idx_pblock(ext_idx); + return 0; + } + ext_idx++; + entries--; +- prev = lblock; ++ cur = lblock + 1; + } + } + return 1; diff --git a/queue-5.17/ext4-fix-bug_on-in-ext4_writepages.patch b/queue-5.17/ext4-fix-bug_on-in-ext4_writepages.patch new file mode 100644 index 00000000000..87613995e38 --- /dev/null +++ b/queue-5.17/ext4-fix-bug_on-in-ext4_writepages.patch @@ -0,0 +1,107 @@ +From ef09ed5d37b84d18562b30cf7253e57062d0db05 Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Mon, 16 May 2022 20:26:34 +0800 +Subject: ext4: fix bug_on in ext4_writepages + +From: Ye Bin + +commit ef09ed5d37b84d18562b30cf7253e57062d0db05 upstream. + +we got issue as follows: +EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls +------------[ cut here ]------------ +kernel BUG at fs/ext4/inode.c:2708! +invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI +CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155 +RIP: 0010:ext4_writepages+0x1977/0x1c10 +RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000 +RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002 +RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000 +R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001 +R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028 +FS: 00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + do_writepages+0x130/0x3a0 + filemap_fdatawrite_wbc+0x83/0xa0 + filemap_flush+0xab/0xe0 + ext4_alloc_da_blocks+0x51/0x120 + __ext4_ioctl+0x1534/0x3210 + __x64_sys_ioctl+0x12c/0x170 + do_syscall_64+0x3b/0x90 + +It may happen as follows: +1. write inline_data inode +vfs_write + new_sync_write + ext4_file_write_iter + ext4_buffered_write_iter + generic_perform_write + ext4_da_write_begin + ext4_da_write_inline_data_begin -> If inline data size too + small will allocate block to write, then mapping will has + dirty page + ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA +2. fallocate +do_vfs_ioctl + ioctl_preallocate + vfs_fallocate + ext4_fallocate + ext4_convert_inline_data + ext4_convert_inline_data_nolock + ext4_map_blocks -> fail will goto restore data + ext4_restore_inline_data + ext4_create_inline_data + ext4_write_inline_data + ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA +3. writepages +__ext4_ioctl + ext4_alloc_da_blocks + filemap_flush + filemap_fdatawrite_wbc + do_writepages + ext4_writepages + if (ext4_has_inline_data(inode)) + BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) + +The root cause of this issue is we destory inline data until call +ext4_writepages under delay allocation mode. But there maybe already +convert from inline to extent. To solve this issue, we call +filemap_flush first.. + +Cc: stable@kernel.org +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220516122634.1690462-1-yebin10@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inline.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/fs/ext4/inline.c ++++ b/fs/ext4/inline.c +@@ -2005,6 +2005,18 @@ int ext4_convert_inline_data(struct inod + if (!ext4_has_inline_data(inode)) { + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + return 0; ++ } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { ++ /* ++ * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is ++ * cleared. This means we are in the middle of moving of ++ * inline data to delay allocated block. Just force writeout ++ * here to finish conversion. ++ */ ++ error = filemap_flush(inode->i_mapping); ++ if (error) ++ return error; ++ if (!ext4_has_inline_data(inode)) ++ return 0; + } + + needed_blocks = ext4_writepage_trans_blocks(inode); diff --git a/queue-5.17/ext4-fix-journal_ioprio-mount-option-handling.patch b/queue-5.17/ext4-fix-journal_ioprio-mount-option-handling.patch new file mode 100644 index 00000000000..76d08c175ee --- /dev/null +++ b/queue-5.17/ext4-fix-journal_ioprio-mount-option-handling.patch @@ -0,0 +1,75 @@ +From e4e58e5df309d695799c494958962100a4c25039 Mon Sep 17 00:00:00 2001 +From: Ojaswin Mujoo +Date: Mon, 18 Apr 2022 14:05:45 +0530 +Subject: ext4: fix journal_ioprio mount option handling + +From: Ojaswin Mujoo + +commit e4e58e5df309d695799c494958962100a4c25039 upstream. + +In __ext4_super() we always overwrote the user specified journal_ioprio +value with a default value, expecting parse_apply_sb_mount_options() to +later correctly set ctx->journal_ioprio to the user specified value. +However, if parse_apply_sb_mount_options() returned early because of +empty sbi->es_s->s_mount_opts, the correct journal_ioprio value was +never set. + +This patch fixes __ext4_super() to only use the default value if the +user has not specified any value for journal_ioprio. + +Similarly, the remount behavior was to either use journal_ioprio +value specified during initial mount, or use the default value +irrespective of the journal_ioprio value specified during remount. +This patch modifies this to first check if a new value for ioprio +has been passed during remount and apply it. If no new value is +passed, use the value specified during initial mount. + +Signed-off-by: Ojaswin Mujoo +Reviewed-by: Ritesh Harjani +Tested-by: Ritesh Harjani +Link: https://lore.kernel.org/r/20220418083545.45778-1-ojaswin@linux.ibm.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4394,7 +4394,8 @@ static int __ext4_fill_super(struct fs_c + int silent = fc->sb_flags & SB_SILENT; + + /* Set defaults for the variables that will be set during parsing */ +- ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; ++ if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) ++ ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + + sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; + sbi->s_sectors_written_start = +@@ -6261,7 +6262,6 @@ static int __ext4_remount(struct fs_cont + char *to_free[EXT4_MAXQUOTAS]; + #endif + +- ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + + /* Store the original options */ + old_sb_flags = sb->s_flags; +@@ -6287,9 +6287,14 @@ static int __ext4_remount(struct fs_cont + } else + old_opts.s_qf_names[i] = NULL; + #endif +- if (sbi->s_journal && sbi->s_journal->j_task->io_context) +- ctx->journal_ioprio = +- sbi->s_journal->j_task->io_context->ioprio; ++ if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) { ++ if (sbi->s_journal && sbi->s_journal->j_task->io_context) ++ ctx->journal_ioprio = ++ sbi->s_journal->j_task->io_context->ioprio; ++ else ++ ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; ++ ++ } + + ext4_apply_options(fc, sb); + diff --git a/queue-5.17/ext4-fix-memory-leak-in-parse_apply_sb_mount_options.patch b/queue-5.17/ext4-fix-memory-leak-in-parse_apply_sb_mount_options.patch new file mode 100644 index 00000000000..a37bac1665c --- /dev/null +++ b/queue-5.17/ext4-fix-memory-leak-in-parse_apply_sb_mount_options.patch @@ -0,0 +1,50 @@ +From c069db76ed7b681c69159f44be96d2137e9ca989 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Fri, 13 May 2022 16:16:01 -0700 +Subject: ext4: fix memory leak in parse_apply_sb_mount_options() + +From: Eric Biggers + +commit c069db76ed7b681c69159f44be96d2137e9ca989 upstream. + +If processing the on-disk mount options fails after any memory was +allocated in the ext4_fs_context, e.g. s_qf_names, then this memory is +leaked. Fix this by calling ext4_fc_free() instead of kfree() directly. + +Reproducer: + + mkfs.ext4 -F /dev/vdc + tune2fs /dev/vdc -E mount_opts=usrjquota=file + echo clear > /sys/kernel/debug/kmemleak + mount /dev/vdc /vdc + echo scan > /sys/kernel/debug/kmemleak + sleep 5 + echo scan > /sys/kernel/debug/kmemleak + cat /sys/kernel/debug/kmemleak + +Fixes: 7edfd85b1ffd ("ext4: Completely separate options parsing and sb setup") +Cc: stable@vger.kernel.org +Signed-off-by: Eric Biggers +Tested-by: Ritesh Harjani +Link: https://lore.kernel.org/r/20220513231605.175121-2-ebiggers@kernel.org +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2626,8 +2626,10 @@ parse_failed: + ret = ext4_apply_options(fc, sb); + + out_free: +- kfree(s_ctx); +- kfree(fc); ++ if (fc) { ++ ext4_fc_free(fc); ++ kfree(fc); ++ } + kfree(s_mount_opts); + return ret; + } diff --git a/queue-5.17/ext4-fix-race-condition-between-ext4_write-and-ext4_convert_inline_data.patch b/queue-5.17/ext4-fix-race-condition-between-ext4_write-and-ext4_convert_inline_data.patch new file mode 100644 index 00000000000..00bda689613 --- /dev/null +++ b/queue-5.17/ext4-fix-race-condition-between-ext4_write-and-ext4_convert_inline_data.patch @@ -0,0 +1,132 @@ +From f87c7a4b084afc13190cbb263538e444cb2b392a Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Thu, 28 Apr 2022 21:40:31 +0800 +Subject: ext4: fix race condition between ext4_write and ext4_convert_inline_data + +From: Baokun Li + +commit f87c7a4b084afc13190cbb263538e444cb2b392a upstream. + +Hulk Robot reported a BUG_ON: + ================================================================== + EXT4-fs error (device loop3): ext4_mb_generate_buddy:805: group 0, + block bitmap and bg descriptor inconsistent: 25 vs 31513 free clusters + kernel BUG at fs/ext4/ext4_jbd2.c:53! + invalid opcode: 0000 [#1] SMP KASAN PTI + CPU: 0 PID: 25371 Comm: syz-executor.3 Not tainted 5.10.0+ #1 + RIP: 0010:ext4_put_nojournal fs/ext4/ext4_jbd2.c:53 [inline] + RIP: 0010:__ext4_journal_stop+0x10e/0x110 fs/ext4/ext4_jbd2.c:116 + [...] + Call Trace: + ext4_write_inline_data_end+0x59a/0x730 fs/ext4/inline.c:795 + generic_perform_write+0x279/0x3c0 mm/filemap.c:3344 + ext4_buffered_write_iter+0x2e3/0x3d0 fs/ext4/file.c:270 + ext4_file_write_iter+0x30a/0x11c0 fs/ext4/file.c:520 + do_iter_readv_writev+0x339/0x3c0 fs/read_write.c:732 + do_iter_write+0x107/0x430 fs/read_write.c:861 + vfs_writev fs/read_write.c:934 [inline] + do_pwritev+0x1e5/0x380 fs/read_write.c:1031 + [...] + ================================================================== + +Above issue may happen as follows: + cpu1 cpu2 +__________________________|__________________________ +do_pwritev + vfs_writev + do_iter_write + ext4_file_write_iter + ext4_buffered_write_iter + generic_perform_write + ext4_da_write_begin + vfs_fallocate + ext4_fallocate + ext4_convert_inline_data + ext4_convert_inline_data_nolock + ext4_destroy_inline_data_nolock + clear EXT4_STATE_MAY_INLINE_DATA + ext4_map_blocks + ext4_ext_map_blocks + ext4_mb_new_blocks + ext4_mb_regular_allocator + ext4_mb_good_group_nolock + ext4_mb_init_group + ext4_mb_init_cache + ext4_mb_generate_buddy --> error + ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) + ext4_restore_inline_data + set EXT4_STATE_MAY_INLINE_DATA + ext4_block_write_begin + ext4_da_write_end + ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) + ext4_write_inline_data_end + handle=NULL + ext4_journal_stop(handle) + __ext4_journal_stop + ext4_put_nojournal(handle) + ref_cnt = (unsigned long)handle + BUG_ON(ref_cnt == 0) ---> BUG_ON + +The lock held by ext4_convert_inline_data is xattr_sem, but the lock +held by generic_perform_write is i_rwsem. Therefore, the two locks can +be concurrent. + +To solve above issue, we add inode_lock() for ext4_convert_inline_data(). +At the same time, move ext4_convert_inline_data() in front of +ext4_punch_hole(), remove similar handling from ext4_punch_hole(). + +Fixes: 0c8d414f163f ("ext4: let fallocate handle inline data correctly") +Cc: stable@vger.kernel.org +Reported-by: Hulk Robot +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220428134031.4153381-1-libaokun1@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents.c | 10 ++++++---- + fs/ext4/inode.c | 9 --------- + 2 files changed, 6 insertions(+), 13 deletions(-) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4694,15 +4694,17 @@ long ext4_fallocate(struct file *file, i + FALLOC_FL_INSERT_RANGE)) + return -EOPNOTSUPP; + ++ inode_lock(inode); ++ ret = ext4_convert_inline_data(inode); ++ inode_unlock(inode); ++ if (ret) ++ goto exit; ++ + if (mode & FALLOC_FL_PUNCH_HOLE) { + ret = ext4_punch_hole(file, offset, len); + goto exit; + } + +- ret = ext4_convert_inline_data(inode); +- if (ret) +- goto exit; +- + if (mode & FALLOC_FL_COLLAPSE_RANGE) { + ret = ext4_collapse_range(file, offset, len); + goto exit; +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3958,15 +3958,6 @@ int ext4_punch_hole(struct file *file, l + + trace_ext4_punch_hole(inode, offset, length, 0); + +- ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); +- if (ext4_has_inline_data(inode)) { +- filemap_invalidate_lock(mapping); +- ret = ext4_convert_inline_data(inode); +- filemap_invalidate_unlock(mapping); +- if (ret) +- return ret; +- } +- + /* + * Write out all dirty pages to avoid race conditions + * Then release them. diff --git a/queue-5.17/ext4-fix-use-after-free-in-ext4_rename_dir_prepare.patch b/queue-5.17/ext4-fix-use-after-free-in-ext4_rename_dir_prepare.patch new file mode 100644 index 00000000000..d810817360a --- /dev/null +++ b/queue-5.17/ext4-fix-use-after-free-in-ext4_rename_dir_prepare.patch @@ -0,0 +1,125 @@ +From 0be698ecbe4471fcad80e81ec6a05001421041b3 Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Thu, 14 Apr 2022 10:52:23 +0800 +Subject: ext4: fix use-after-free in ext4_rename_dir_prepare + +From: Ye Bin + +commit 0be698ecbe4471fcad80e81ec6a05001421041b3 upstream. + +We got issue as follows: +EXT4-fs (loop0): mounted filesystem without journal. Opts: ,errors=continue +ext4_get_first_dir_block: bh->b_data=0xffff88810bee6000 len=34478 +ext4_get_first_dir_block: *parent_de=0xffff88810beee6ae bh->b_data=0xffff88810bee6000 +ext4_rename_dir_prepare: [1] parent_de=0xffff88810beee6ae +================================================================== +BUG: KASAN: use-after-free in ext4_rename_dir_prepare+0x152/0x220 +Read of size 4 at addr ffff88810beee6ae by task rep/1895 + +CPU: 13 PID: 1895 Comm: rep Not tainted 5.10.0+ #241 +Call Trace: + dump_stack+0xbe/0xf9 + print_address_description.constprop.0+0x1e/0x220 + kasan_report.cold+0x37/0x7f + ext4_rename_dir_prepare+0x152/0x220 + ext4_rename+0xf44/0x1ad0 + ext4_rename2+0x11c/0x170 + vfs_rename+0xa84/0x1440 + do_renameat2+0x683/0x8f0 + __x64_sys_renameat+0x53/0x60 + do_syscall_64+0x33/0x40 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +RIP: 0033:0x7f45a6fc41c9 +RSP: 002b:00007ffc5a470218 EFLAGS: 00000246 ORIG_RAX: 0000000000000108 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f45a6fc41c9 +RDX: 0000000000000005 RSI: 0000000020000180 RDI: 0000000000000005 +RBP: 00007ffc5a470240 R08: 00007ffc5a470160 R09: 0000000020000080 +R10: 00000000200001c0 R11: 0000000000000246 R12: 0000000000400bb0 +R13: 00007ffc5a470320 R14: 0000000000000000 R15: 0000000000000000 + +The buggy address belongs to the page: +page:00000000440015ce refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x10beee +flags: 0x200000000000000() +raw: 0200000000000000 ffffea00043ff4c8 ffffea0004325608 0000000000000000 +raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff88810beee580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff88810beee600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +>ffff88810beee680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff88810beee700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff88810beee780: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +================================================================== +Disabling lock debugging due to kernel taint +ext4_rename_dir_prepare: [2] parent_de->inode=3537895424 +ext4_rename_dir_prepare: [3] dir=0xffff888124170140 +ext4_rename_dir_prepare: [4] ino=2 +ext4_rename_dir_prepare: ent->dir->i_ino=2 parent=-757071872 + +Reason is first directory entry which 'rec_len' is 34478, then will get illegal +parent entry. Now, we do not check directory entry after read directory block +in 'ext4_get_first_dir_block'. +To solve this issue, check directory entry in 'ext4_get_first_dir_block'. + +[ Trigger an ext4_error() instead of just warning if the directory is + missing a '.' or '..' entry. Also make sure we return an error code + if the file system is corrupted. -TYT ] + +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20220414025223.4113128-1-yebin10@huawei.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 30 +++++++++++++++++++++++++++--- + 1 file changed, 27 insertions(+), 3 deletions(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -3455,6 +3455,9 @@ static struct buffer_head *ext4_get_firs + struct buffer_head *bh; + + if (!ext4_has_inline_data(inode)) { ++ struct ext4_dir_entry_2 *de; ++ unsigned int offset; ++ + /* The first directory block must not be a hole, so + * treat it as DIRENT_HTREE + */ +@@ -3463,9 +3466,30 @@ static struct buffer_head *ext4_get_firs + *retval = PTR_ERR(bh); + return NULL; + } +- *parent_de = ext4_next_entry( +- (struct ext4_dir_entry_2 *)bh->b_data, +- inode->i_sb->s_blocksize); ++ ++ de = (struct ext4_dir_entry_2 *) bh->b_data; ++ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, ++ bh->b_size, 0) || ++ le32_to_cpu(de->inode) != inode->i_ino || ++ strcmp(".", de->name)) { ++ EXT4_ERROR_INODE(inode, "directory missing '.'"); ++ brelse(bh); ++ *retval = -EFSCORRUPTED; ++ return NULL; ++ } ++ offset = ext4_rec_len_from_disk(de->rec_len, ++ inode->i_sb->s_blocksize); ++ de = ext4_next_entry(de, inode->i_sb->s_blocksize); ++ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, ++ bh->b_size, offset) || ++ le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { ++ EXT4_ERROR_INODE(inode, "directory missing '..'"); ++ brelse(bh); ++ *retval = -EFSCORRUPTED; ++ return NULL; ++ } ++ *parent_de = de; ++ + return bh; + } + diff --git a/queue-5.17/ext4-fix-warning-in-ext4_handle_inode_extension.patch b/queue-5.17/ext4-fix-warning-in-ext4_handle_inode_extension.patch new file mode 100644 index 00000000000..24419a3face --- /dev/null +++ b/queue-5.17/ext4-fix-warning-in-ext4_handle_inode_extension.patch @@ -0,0 +1,107 @@ +From f4534c9fc94d22383f187b9409abb3f9df2e3db3 Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Sat, 26 Mar 2022 14:53:51 +0800 +Subject: ext4: fix warning in ext4_handle_inode_extension + +From: Ye Bin + +commit f4534c9fc94d22383f187b9409abb3f9df2e3db3 upstream. + +We got issue as follows: +EXT4-fs error (device loop0) in ext4_reserve_inode_write:5741: Out of memory +EXT4-fs error (device loop0): ext4_setattr:5462: inode #13: comm syz-executor.0: mark_inode_dirty error +EXT4-fs error (device loop0) in ext4_setattr:5519: Out of memory +EXT4-fs error (device loop0): ext4_ind_map_blocks:595: inode #13: comm syz-executor.0: Can't allocate blocks for non-extent mapped inodes with bigalloc +------------[ cut here ]------------ +WARNING: CPU: 1 PID: 4361 at fs/ext4/file.c:301 ext4_file_write_iter+0x11c9/0x1220 +Modules linked in: +CPU: 1 PID: 4361 Comm: syz-executor.0 Not tainted 5.10.0+ #1 +RIP: 0010:ext4_file_write_iter+0x11c9/0x1220 +RSP: 0018:ffff924d80b27c00 EFLAGS: 00010282 +RAX: ffffffff815a3379 RBX: 0000000000000000 RCX: 000000003b000000 +RDX: ffff924d81601000 RSI: 00000000000009cc RDI: 00000000000009cd +RBP: 000000000000000d R08: ffffffffbc5a2c6b R09: 0000902e0e52a96f +R10: ffff902e2b7c1b40 R11: ffff902e2b7c1b40 R12: 000000000000000a +R13: 0000000000000001 R14: ffff902e0e52aa10 R15: ffffffffffffff8b +FS: 00007f81a7f65700(0000) GS:ffff902e3bc80000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: ffffffffff600400 CR3: 000000012db88001 CR4: 00000000003706e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + do_iter_readv_writev+0x2e5/0x360 + do_iter_write+0x112/0x4c0 + do_pwritev+0x1e5/0x390 + __x64_sys_pwritev2+0x7e/0xa0 + do_syscall_64+0x37/0x50 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Above issue may happen as follows: +Assume +inode.i_size=4096 +EXT4_I(inode)->i_disksize=4096 + +step 1: set inode->i_isize = 8192 +ext4_setattr + if (attr->ia_size != inode->i_size) + EXT4_I(inode)->i_disksize = attr->ia_size; + rc = ext4_mark_inode_dirty + ext4_reserve_inode_write + ext4_get_inode_loc + __ext4_get_inode_loc + sb_getblk --> return -ENOMEM + ... + if (!error) ->will not update i_size + i_size_write(inode, attr->ia_size); +Now: +inode.i_size=4096 +EXT4_I(inode)->i_disksize=8192 + +step 2: Direct write 4096 bytes +ext4_file_write_iter + ext4_dio_write_iter + iomap_dio_rw ->return error + if (extend) + ext4_handle_inode_extension + WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize); +->Then trigger warning. + +To solve above issue, if mark inode dirty failed in ext4_setattr just +set 'EXT4_I(inode)->i_disksize' with old value. + +Signed-off-by: Ye Bin +Link: https://lore.kernel.org/r/20220326065351.761952-1-yebin10@huawei.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -5381,6 +5381,7 @@ int ext4_setattr(struct user_namespace * + if (attr->ia_valid & ATTR_SIZE) { + handle_t *handle; + loff_t oldsize = inode->i_size; ++ loff_t old_disksize; + int shrink = (attr->ia_size < inode->i_size); + + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { +@@ -5452,6 +5453,7 @@ int ext4_setattr(struct user_namespace * + inode->i_sb->s_blocksize_bits); + + down_write(&EXT4_I(inode)->i_data_sem); ++ old_disksize = EXT4_I(inode)->i_disksize; + EXT4_I(inode)->i_disksize = attr->ia_size; + rc = ext4_mark_inode_dirty(handle, inode); + if (!error) +@@ -5463,6 +5465,8 @@ int ext4_setattr(struct user_namespace * + */ + if (!error) + i_size_write(inode, attr->ia_size); ++ else ++ EXT4_I(inode)->i_disksize = old_disksize; + up_write(&EXT4_I(inode)->i_data_sem); + ext4_journal_stop(handle); + if (error) diff --git a/queue-5.17/ext4-mark-group-as-trimmed-only-if-it-was-fully-scanned.patch b/queue-5.17/ext4-mark-group-as-trimmed-only-if-it-was-fully-scanned.patch new file mode 100644 index 00000000000..dc610256837 --- /dev/null +++ b/queue-5.17/ext4-mark-group-as-trimmed-only-if-it-was-fully-scanned.patch @@ -0,0 +1,101 @@ +From d63c00ea435a5352f486c259665a4ced60399421 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov +Date: Sun, 17 Apr 2022 20:03:15 +0300 +Subject: ext4: mark group as trimmed only if it was fully scanned + +From: Dmitry Monakhov + +commit d63c00ea435a5352f486c259665a4ced60399421 upstream. + +Otherwise nonaligned fstrim calls will works inconveniently for iterative +scanners, for example: + +// trim [0,16MB] for group-1, but mark full group as trimmed +fstrim -o $((1024*1024*128)) -l $((1024*1024*16)) ./m +// handle [16MB,16MB] for group-1, do nothing because group already has the flag. +fstrim -o $((1024*1024*144)) -l $((1024*1024*16)) ./m + +[ Update function documentation for ext4_trim_all_free -- TYT ] + +Signed-off-by: Dmitry Monakhov +Link: https://lore.kernel.org/r/1650214995-860245-1-git-send-email-dmtrmonakhov@yandex-team.ru +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -6377,6 +6377,7 @@ __releases(ext4_group_lock_ptr(sb, e4b-> + * @start: first group block to examine + * @max: last group block to examine + * @minblocks: minimum extent block count ++ * @set_trimmed: set the trimmed flag if at least one block is trimmed + * + * ext4_trim_all_free walks through group's block bitmap searching for free + * extents. When the free extent is found, mark it as used in group buddy +@@ -6386,7 +6387,7 @@ __releases(ext4_group_lock_ptr(sb, e4b-> + static ext4_grpblk_t + ext4_trim_all_free(struct super_block *sb, ext4_group_t group, + ext4_grpblk_t start, ext4_grpblk_t max, +- ext4_grpblk_t minblocks) ++ ext4_grpblk_t minblocks, bool set_trimmed) + { + struct ext4_buddy e4b; + int ret; +@@ -6405,7 +6406,7 @@ ext4_trim_all_free(struct super_block *s + if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || + minblocks < EXT4_SB(sb)->s_last_trim_minblks) { + ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); +- if (ret >= 0) ++ if (ret >= 0 && set_trimmed) + EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); + } else { + ret = 0; +@@ -6442,6 +6443,7 @@ int ext4_trim_fs(struct super_block *sb, + ext4_fsblk_t first_data_blk = + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); + ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); ++ bool whole_group, eof = false; + int ret = 0; + + start = range->start >> sb->s_blocksize_bits; +@@ -6460,8 +6462,10 @@ int ext4_trim_fs(struct super_block *sb, + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) + goto out; + } +- if (end >= max_blks) ++ if (end >= max_blks - 1) { + end = max_blks - 1; ++ eof = true; ++ } + if (end <= first_data_blk) + goto out; + if (start < first_data_blk) +@@ -6475,6 +6479,7 @@ int ext4_trim_fs(struct super_block *sb, + + /* end now represents the last cluster to discard in this group */ + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; ++ whole_group = true; + + for (group = first_group; group <= last_group; group++) { + grp = ext4_get_group_info(sb, group); +@@ -6491,12 +6496,13 @@ int ext4_trim_fs(struct super_block *sb, + * change it for the last group, note that last_cluster is + * already computed earlier by ext4_get_group_no_and_offset() + */ +- if (group == last_group) ++ if (group == last_group) { + end = last_cluster; +- ++ whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1; ++ } + if (grp->bb_free >= minlen) { + cnt = ext4_trim_all_free(sb, group, first_cluster, +- end, minlen); ++ end, minlen, whole_group); + if (cnt < 0) { + ret = cnt; + break; diff --git a/queue-5.17/ext4-verify-dir-block-before-splitting-it.patch b/queue-5.17/ext4-verify-dir-block-before-splitting-it.patch new file mode 100644 index 00000000000..d5be014c2d3 --- /dev/null +++ b/queue-5.17/ext4-verify-dir-block-before-splitting-it.patch @@ -0,0 +1,88 @@ +From 46c116b920ebec58031f0a78c5ea9599b0d2a371 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Wed, 18 May 2022 11:33:28 +0200 +Subject: ext4: verify dir block before splitting it + +From: Jan Kara + +commit 46c116b920ebec58031f0a78c5ea9599b0d2a371 upstream. + +Before splitting a directory block verify its directory entries are sane +so that the splitting code does not access memory it should not. + +Cc: stable@vger.kernel.org +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220518093332.13986-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 32 +++++++++++++++++++++----------- + 1 file changed, 21 insertions(+), 11 deletions(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -277,9 +277,9 @@ static struct dx_frame *dx_probe(struct + struct dx_hash_info *hinfo, + struct dx_frame *frame); + static void dx_release(struct dx_frame *frames); +-static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, +- unsigned blocksize, struct dx_hash_info *hinfo, +- struct dx_map_entry map[]); ++static int dx_make_map(struct inode *dir, struct buffer_head *bh, ++ struct dx_hash_info *hinfo, ++ struct dx_map_entry *map_tail); + static void dx_sort_map(struct dx_map_entry *map, unsigned count); + static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from, + char *to, struct dx_map_entry *offsets, +@@ -1249,15 +1249,23 @@ static inline int search_dirblock(struct + * Create map of hash values, offsets, and sizes, stored at end of block. + * Returns number of entries mapped. + */ +-static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, +- unsigned blocksize, struct dx_hash_info *hinfo, ++static int dx_make_map(struct inode *dir, struct buffer_head *bh, ++ struct dx_hash_info *hinfo, + struct dx_map_entry *map_tail) + { + int count = 0; +- char *base = (char *) de; ++ struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data; ++ unsigned int buflen = bh->b_size; ++ char *base = bh->b_data; + struct dx_hash_info h = *hinfo; + +- while ((char *) de < base + blocksize) { ++ if (ext4_has_metadata_csum(dir->i_sb)) ++ buflen -= sizeof(struct ext4_dir_entry_tail); ++ ++ while ((char *) de < base + buflen) { ++ if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen, ++ ((char *)de) - base)) ++ return -EFSCORRUPTED; + if (de->name_len && de->inode) { + if (ext4_hash_in_dirent(dir)) + h.hash = EXT4_DIRENT_HASH(de); +@@ -1270,8 +1278,7 @@ static int dx_make_map(struct inode *dir + count++; + cond_resched(); + } +- /* XXX: do we need to check rec_len == 0 case? -Chris */ +- de = ext4_next_entry(de, blocksize); ++ de = ext4_next_entry(de, dir->i_sb->s_blocksize); + } + return count; + } +@@ -1943,8 +1950,11 @@ static struct ext4_dir_entry_2 *do_split + + /* create map in the end of data2 block */ + map = (struct dx_map_entry *) (data2 + blocksize); +- count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1, +- blocksize, hinfo, map); ++ count = dx_make_map(dir, *bh, hinfo, map); ++ if (count < 0) { ++ err = count; ++ goto journal_error; ++ } + map -= count; + dx_sort_map(map, count); + /* Ensure that neither split block is over half full */ diff --git a/queue-5.17/series b/queue-5.17/series index cbc9b5615e0..e067853e2c7 100644 --- a/queue-5.17/series +++ b/queue-5.17/series @@ -634,3 +634,15 @@ bfq-remove-pointless-bfq_init_rq-calls.patch bfq-track-whether-bfq_group-is-still-online.patch bfq-get-rid-of-__bio_blkcg-usage.patch bfq-make-sure-bfqg-for-which-we-are-queueing-requests-is-online.patch +ext4-mark-group-as-trimmed-only-if-it-was-fully-scanned.patch +ext4-fix-use-after-free-in-ext4_rename_dir_prepare.patch +ext4-fix-journal_ioprio-mount-option-handling.patch +ext4-fix-race-condition-between-ext4_write-and-ext4_convert_inline_data.patch +ext4-fix-warning-in-ext4_handle_inode_extension.patch +ext4-fix-memory-leak-in-parse_apply_sb_mount_options.patch +ext4-fix-bug_on-in-ext4_writepages.patch +ext4-filter-out-ext4_fc_replay-from-on-disk-superblock-field-s_state.patch +ext4-fix-bug_on-in-__es_tree_search.patch +ext4-verify-dir-block-before-splitting-it.patch +ext4-avoid-cycles-in-directory-h-tree.patch +acpi-property-release-subnode-properties-with-data-nodes.patch