From 2b0078f83d7e368e5fa8da06fe3dce444c12755c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2023 14:09:22 +0000 Subject: [PATCH] 6.5-stable patches added patches: drm-amd-pm-handle-non-terminated-overdrive-commands.patch drm-bridge-it66121-get_edid-callback-must-not-return-err-pointers.patch ext4-add-missed-brelse-in-update_backups.patch ext4-apply-umask-if-acl-support-is-disabled.patch ext4-correct-offset-of-gdb-backup-in-non-meta_bg-group-to-update_backups.patch ext4-correct-return-value-of-ext4_convert_meta_bg.patch ext4-correct-the-start-block-of-counting-reserved-clusters.patch ext4-fix-race-between-writepages-and-remount.patch ext4-fix-racy-may-inline-data-check-in-dio-write.patch ext4-make-sure-allocate-pending-entry-not-fail.patch ext4-mark-buffer-new-if-it-is-unwritten-to-avoid-stale-data-exposure.patch ext4-properly-sync-file-size-update-after-o_sync-direct-io.patch ext4-remove-gdb-backup-copy-for-meta-bg-in-setup_new_flex_group_blocks.patch revert-net-r8169-disable-multicast-filter-for-rtl8168h-and-rtl8107e.patch --- ...le-non-terminated-overdrive-commands.patch | 55 ++++ ...allback-must-not-return-err-pointers.patch | 55 ++++ ...-add-missed-brelse-in-update_backups.patch | 35 ++ ...ply-umask-if-acl-support-is-disabled.patch | 45 +++ ...-non-meta_bg-group-to-update_backups.patch | 57 ++++ ...return-value-of-ext4_convert_meta_bg.patch | 33 ++ ...-block-of-counting-reserved-clusters.patch | 51 +++ ...-race-between-writepages-and-remount.patch | 158 +++++++++ ...y-may-inline-data-check-in-dio-write.patch | 76 +++++ ...sure-allocate-pending-entry-not-fail.patch | 300 ++++++++++++++++++ ...written-to-avoid-stale-data-exposure.patch | 140 ++++++++ ...e-size-update-after-o_sync-direct-io.patch | 241 ++++++++++++++ ...ta-bg-in-setup_new_flex_group_blocks.patch | 67 ++++ ...ast-filter-for-rtl8168h-and-rtl8107e.patch | 45 +++ queue-6.5/series | 14 + 15 files changed, 1372 insertions(+) create mode 100644 queue-6.5/drm-amd-pm-handle-non-terminated-overdrive-commands.patch create mode 100644 queue-6.5/drm-bridge-it66121-get_edid-callback-must-not-return-err-pointers.patch create mode 100644 queue-6.5/ext4-add-missed-brelse-in-update_backups.patch create mode 100644 queue-6.5/ext4-apply-umask-if-acl-support-is-disabled.patch create mode 100644 queue-6.5/ext4-correct-offset-of-gdb-backup-in-non-meta_bg-group-to-update_backups.patch create mode 100644 queue-6.5/ext4-correct-return-value-of-ext4_convert_meta_bg.patch create mode 100644 queue-6.5/ext4-correct-the-start-block-of-counting-reserved-clusters.patch create mode 100644 queue-6.5/ext4-fix-race-between-writepages-and-remount.patch create mode 100644 queue-6.5/ext4-fix-racy-may-inline-data-check-in-dio-write.patch create mode 100644 queue-6.5/ext4-make-sure-allocate-pending-entry-not-fail.patch create mode 100644 queue-6.5/ext4-mark-buffer-new-if-it-is-unwritten-to-avoid-stale-data-exposure.patch create mode 100644 queue-6.5/ext4-properly-sync-file-size-update-after-o_sync-direct-io.patch create mode 100644 queue-6.5/ext4-remove-gdb-backup-copy-for-meta-bg-in-setup_new_flex_group_blocks.patch create mode 100644 queue-6.5/revert-net-r8169-disable-multicast-filter-for-rtl8168h-and-rtl8107e.patch diff --git a/queue-6.5/drm-amd-pm-handle-non-terminated-overdrive-commands.patch b/queue-6.5/drm-amd-pm-handle-non-terminated-overdrive-commands.patch new file mode 100644 index 00000000000..be06a80cae6 --- /dev/null +++ b/queue-6.5/drm-amd-pm-handle-non-terminated-overdrive-commands.patch @@ -0,0 +1,55 @@ +From 08e9ebc75b5bcfec9d226f9e16bab2ab7b25a39a Mon Sep 17 00:00:00 2001 +From: Bas Nieuwenhuizen +Date: Tue, 17 Oct 2023 16:01:35 +0200 +Subject: drm/amd/pm: Handle non-terminated overdrive commands. + +From: Bas Nieuwenhuizen + +commit 08e9ebc75b5bcfec9d226f9e16bab2ab7b25a39a upstream. + +The incoming strings might not be terminated by a newline +or a 0. + +(found while testing a program that just wrote the string + itself, causing a crash) + +Cc: stable@vger.kernel.org +Fixes: e3933f26b657 ("drm/amd/pp: Add edit/commit/show OD clock/voltage support in sysfs") +Signed-off-by: Bas Nieuwenhuizen +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c ++++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c +@@ -734,7 +734,7 @@ static ssize_t amdgpu_set_pp_od_clk_volt + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + +- if (count > 127) ++ if (count > 127 || count == 0) + return -EINVAL; + + if (*buf == 's') +@@ -754,7 +754,8 @@ static ssize_t amdgpu_set_pp_od_clk_volt + else + return -EINVAL; + +- memcpy(buf_cpy, buf, count+1); ++ memcpy(buf_cpy, buf, count); ++ buf_cpy[count] = 0; + + tmp_str = buf_cpy; + +@@ -771,6 +772,9 @@ static ssize_t amdgpu_set_pp_od_clk_volt + return -EINVAL; + parameter_size++; + ++ if (!tmp_str) ++ break; ++ + while (isspace(*tmp_str)) + tmp_str++; + } diff --git a/queue-6.5/drm-bridge-it66121-get_edid-callback-must-not-return-err-pointers.patch b/queue-6.5/drm-bridge-it66121-get_edid-callback-must-not-return-err-pointers.patch new file mode 100644 index 00000000000..5711ac9a52a --- /dev/null +++ b/queue-6.5/drm-bridge-it66121-get_edid-callback-must-not-return-err-pointers.patch @@ -0,0 +1,55 @@ +From 81995ee1620318b4c7bbeb02bcc372da2c078c76 Mon Sep 17 00:00:00 2001 +From: Jani Nikula +Date: Thu, 14 Sep 2023 16:11:59 +0300 +Subject: drm: bridge: it66121: ->get_edid callback must not return err pointers + +From: Jani Nikula + +commit 81995ee1620318b4c7bbeb02bcc372da2c078c76 upstream. + +The drm stack does not expect error valued pointers for EDID anywhere. + +Fixes: e66856508746 ("drm: bridge: it66121: Set DDC preamble only once before reading EDID") +Cc: Paul Cercueil +Cc: Robert Foss +Cc: Phong LE +Cc: Neil Armstrong +Cc: Andrzej Hajda +Cc: Robert Foss +Cc: Laurent Pinchart +Cc: Jonas Karlman +Cc: Jernej Skrabec +Cc: # v6.3+ +Signed-off-by: Jani Nikula +Reviewed-by: Neil Armstrong +Signed-off-by: Paul Cercueil +Link: https://patchwork.freedesktop.org/patch/msgid/20230914131159.2472513-1-jani.nikula@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/bridge/ite-it66121.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c +index 3c9b42c9d2ee..1cf3fb1f13dc 100644 +--- a/drivers/gpu/drm/bridge/ite-it66121.c ++++ b/drivers/gpu/drm/bridge/ite-it66121.c +@@ -884,14 +884,14 @@ static struct edid *it66121_bridge_get_edid(struct drm_bridge *bridge, + mutex_lock(&ctx->lock); + ret = it66121_preamble_ddc(ctx); + if (ret) { +- edid = ERR_PTR(ret); ++ edid = NULL; + goto out_unlock; + } + + ret = regmap_write(ctx->regmap, IT66121_DDC_HEADER_REG, + IT66121_DDC_HEADER_EDID); + if (ret) { +- edid = ERR_PTR(ret); ++ edid = NULL; + goto out_unlock; + } + +-- +2.43.0 + diff --git a/queue-6.5/ext4-add-missed-brelse-in-update_backups.patch b/queue-6.5/ext4-add-missed-brelse-in-update_backups.patch new file mode 100644 index 00000000000..ee822893a23 --- /dev/null +++ b/queue-6.5/ext4-add-missed-brelse-in-update_backups.patch @@ -0,0 +1,35 @@ +From 9adac8b01f4be28acd5838aade42b8daa4f0b642 Mon Sep 17 00:00:00 2001 +From: Kemeng Shi +Date: Sun, 27 Aug 2023 01:47:01 +0800 +Subject: ext4: add missed brelse in update_backups + +From: Kemeng Shi + +commit 9adac8b01f4be28acd5838aade42b8daa4f0b642 upstream. + +add missed brelse in update_backups + +Signed-off-by: Kemeng Shi +Reviewed-by: Theodore Ts'o +Link: https://lore.kernel.org/r/20230826174712.4059355-3-shikemeng@huaweicloud.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/resize.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1186,8 +1186,10 @@ static void update_backups(struct super_ + ext4_group_first_block_no(sb, group)); + BUFFER_TRACE(bh, "get_write_access"); + if ((err = ext4_journal_get_write_access(handle, sb, bh, +- EXT4_JTR_NONE))) ++ EXT4_JTR_NONE))) { ++ brelse(bh); + break; ++ } + lock_buffer(bh); + memcpy(bh->b_data, data, size); + if (rest) diff --git a/queue-6.5/ext4-apply-umask-if-acl-support-is-disabled.patch b/queue-6.5/ext4-apply-umask-if-acl-support-is-disabled.patch new file mode 100644 index 00000000000..2fdda0c5fef --- /dev/null +++ b/queue-6.5/ext4-apply-umask-if-acl-support-is-disabled.patch @@ -0,0 +1,45 @@ +From 484fd6c1de13b336806a967908a927cc0356e312 Mon Sep 17 00:00:00 2001 +From: Max Kellermann +Date: Tue, 19 Sep 2023 10:18:23 +0200 +Subject: ext4: apply umask if ACL support is disabled + +From: Max Kellermann + +commit 484fd6c1de13b336806a967908a927cc0356e312 upstream. + +The function ext4_init_acl() calls posix_acl_create() which is +responsible for applying the umask. But without +CONFIG_EXT4_FS_POSIX_ACL, ext4_init_acl() is an empty inline function, +and nobody applies the umask. + +This fixes a bug which causes the umask to be ignored with O_TMPFILE +on ext4: + + https://github.com/MusicPlayerDaemon/MPD/issues/558 + https://bugs.gentoo.org/show_bug.cgi?id=686142#c3 + https://bugzilla.kernel.org/show_bug.cgi?id=203625 + +Reviewed-by: "J. Bruce Fields" +Cc: stable@vger.kernel.org +Signed-off-by: Max Kellermann +Link: https://lore.kernel.org/r/20230919081824.1096619-1-max.kellermann@ionos.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/acl.h | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/ext4/acl.h ++++ b/fs/ext4/acl.h +@@ -68,6 +68,11 @@ extern int ext4_init_acl(handle_t *, str + static inline int + ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) + { ++ /* usually, the umask is applied by posix_acl_create(), but if ++ ext4 ACL support is disabled at compile time, we need to do ++ it here, because posix_acl_create() will never be called */ ++ inode->i_mode &= ~current_umask(); ++ + return 0; + } + #endif /* CONFIG_EXT4_FS_POSIX_ACL */ diff --git a/queue-6.5/ext4-correct-offset-of-gdb-backup-in-non-meta_bg-group-to-update_backups.patch b/queue-6.5/ext4-correct-offset-of-gdb-backup-in-non-meta_bg-group-to-update_backups.patch new file mode 100644 index 00000000000..eba1bcb4a2e --- /dev/null +++ b/queue-6.5/ext4-correct-offset-of-gdb-backup-in-non-meta_bg-group-to-update_backups.patch @@ -0,0 +1,57 @@ +From 31f13421c004a420c0e9d288859c9ea9259ea0cc Mon Sep 17 00:00:00 2001 +From: Kemeng Shi +Date: Sun, 27 Aug 2023 01:47:00 +0800 +Subject: ext4: correct offset of gdb backup in non meta_bg group to update_backups + +From: Kemeng Shi + +commit 31f13421c004a420c0e9d288859c9ea9259ea0cc upstream. + +Commit 0aeaa2559d6d5 ("ext4: fix corruption when online resizing a 1K +bigalloc fs") found that primary superblock's offset in its group is +not equal to offset of backup superblock in its group when block size +is 1K and bigalloc is enabled. As group descriptor blocks are right +after superblock, we can't pass block number of gdb to update_backups +for the same reason. + +The root casue of the issue above is that leading 1K padding block is +count as data block offset for primary block while backup block has no +padding block offset in its group. + +Remove padding data block count to fix the issue for gdb backups. + +For meta_bg case, update_backups treat blk_off as block number, do no +conversion in this case. + +Signed-off-by: Kemeng Shi +Reviewed-by: Theodore Ts'o +Link: https://lore.kernel.org/r/20230826174712.4059355-2-shikemeng@huaweicloud.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/resize.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1601,6 +1601,8 @@ exit_journal: + int gdb_num_end = ((group + flex_gd->count - 1) / + EXT4_DESC_PER_BLOCK(sb)); + int meta_bg = ext4_has_feature_meta_bg(sb); ++ sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - ++ ext4_group_first_block_no(sb, 0); + sector_t old_gdb = 0; + + update_backups(sb, ext4_group_first_block_no(sb, 0), +@@ -1612,8 +1614,8 @@ exit_journal: + gdb_num); + if (old_gdb == gdb_bh->b_blocknr) + continue; +- update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, +- gdb_bh->b_size, meta_bg); ++ update_backups(sb, gdb_bh->b_blocknr - padding_blocks, ++ gdb_bh->b_data, gdb_bh->b_size, meta_bg); + old_gdb = gdb_bh->b_blocknr; + } + } diff --git a/queue-6.5/ext4-correct-return-value-of-ext4_convert_meta_bg.patch b/queue-6.5/ext4-correct-return-value-of-ext4_convert_meta_bg.patch new file mode 100644 index 00000000000..8aa88c9ea9d --- /dev/null +++ b/queue-6.5/ext4-correct-return-value-of-ext4_convert_meta_bg.patch @@ -0,0 +1,33 @@ +From 48f1551592c54f7d8e2befc72a99ff4e47f7dca0 Mon Sep 17 00:00:00 2001 +From: Kemeng Shi +Date: Sun, 27 Aug 2023 01:47:02 +0800 +Subject: ext4: correct return value of ext4_convert_meta_bg + +From: Kemeng Shi + +commit 48f1551592c54f7d8e2befc72a99ff4e47f7dca0 upstream. + +Avoid to ignore error in "err". + +Signed-off-by: Kemeng Shi +Link: https://lore.kernel.org/r/20230826174712.4059355-4-shikemeng@huaweicloud.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/resize.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1982,9 +1982,7 @@ static int ext4_convert_meta_bg(struct s + + errout: + ret = ext4_journal_stop(handle); +- if (!err) +- err = ret; +- return ret; ++ return err ? err : ret; + + invalid_resize_inode: + ext4_error(sb, "corrupted/inconsistent resize inode"); diff --git a/queue-6.5/ext4-correct-the-start-block-of-counting-reserved-clusters.patch b/queue-6.5/ext4-correct-the-start-block-of-counting-reserved-clusters.patch new file mode 100644 index 00000000000..00880d26050 --- /dev/null +++ b/queue-6.5/ext4-correct-the-start-block-of-counting-reserved-clusters.patch @@ -0,0 +1,51 @@ +From 40ea98396a3659062267d1fe5f99af4f7e4f05e3 Mon Sep 17 00:00:00 2001 +From: Zhang Yi +Date: Thu, 24 Aug 2023 17:26:04 +0800 +Subject: ext4: correct the start block of counting reserved clusters + +From: Zhang Yi + +commit 40ea98396a3659062267d1fe5f99af4f7e4f05e3 upstream. + +When big allocate feature is enabled, we need to count and update +reserved clusters before removing a delayed only extent_status entry. +{init|count|get}_rsvd() have already done this, but the start block +number of this counting isn't correct in the following case. + + lblk end + | | + v v + ------------------------- + | | orig_es + ------------------------- + ^ ^ + len1 is 0 | len2 | + +If the start block of the orig_es entry founded is bigger than lblk, we +passed lblk as start block to count_rsvd(), but the length is correct, +finally, the range to be counted is offset. This patch fix this by +passing the start blocks to 'orig_es->lblk + len1'. + +Signed-off-by: Zhang Yi +Cc: stable@kernel.org +Link: https://lore.kernel.org/r/20230824092619.1327976-2-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents_status.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/ext4/extents_status.c ++++ b/fs/ext4/extents_status.c +@@ -1431,8 +1431,8 @@ static int __es_remove_extent(struct ino + } + } + if (count_reserved) +- count_rsvd(inode, lblk, orig_es.es_len - len1 - len2, +- &orig_es, &rc); ++ count_rsvd(inode, orig_es.es_lblk + len1, ++ orig_es.es_len - len1 - len2, &orig_es, &rc); + goto out_get_reserved; + } + diff --git a/queue-6.5/ext4-fix-race-between-writepages-and-remount.patch b/queue-6.5/ext4-fix-race-between-writepages-and-remount.patch new file mode 100644 index 00000000000..b91816e9b24 --- /dev/null +++ b/queue-6.5/ext4-fix-race-between-writepages-and-remount.patch @@ -0,0 +1,158 @@ +From 745f17a4166e79315e4b7f33ce89d03e75a76983 Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Wed, 24 May 2023 15:25:38 +0800 +Subject: ext4: fix race between writepages and remount + +From: Baokun Li + +commit 745f17a4166e79315e4b7f33ce89d03e75a76983 upstream. + +We got a WARNING in ext4_add_complete_io: +================================================================== + WARNING: at fs/ext4/page-io.c:231 ext4_put_io_end_defer+0x182/0x250 + CPU: 10 PID: 77 Comm: ksoftirqd/10 Tainted: 6.3.0-rc2 #85 + RIP: 0010:ext4_put_io_end_defer+0x182/0x250 [ext4] + [...] + Call Trace: + + ext4_end_bio+0xa8/0x240 [ext4] + bio_endio+0x195/0x310 + blk_update_request+0x184/0x770 + scsi_end_request+0x2f/0x240 + scsi_io_completion+0x75/0x450 + scsi_finish_command+0xef/0x160 + scsi_complete+0xa3/0x180 + blk_complete_reqs+0x60/0x80 + blk_done_softirq+0x25/0x40 + __do_softirq+0x119/0x4c8 + run_ksoftirqd+0x42/0x70 + smpboot_thread_fn+0x136/0x3c0 + kthread+0x140/0x1a0 + ret_from_fork+0x2c/0x50 +================================================================== + +Above issue may happen as follows: + + cpu1 cpu2 +----------------------------|---------------------------- +mount -o dioread_lock +ext4_writepages + ext4_do_writepages + *if (ext4_should_dioread_nolock(inode))* + // rsv_blocks is not assigned here + mount -o remount,dioread_nolock + ext4_journal_start_with_reserve + __ext4_journal_start + __ext4_journal_start_sb + jbd2__journal_start + *if (rsv_blocks)* + // h_rsv_handle is not initialized here + mpage_map_and_submit_extent + mpage_map_one_extent + dioread_nolock = ext4_should_dioread_nolock(inode) + if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) + mpd->io_submit.io_end->handle = handle->h_rsv_handle + ext4_set_io_unwritten_flag + io_end->flag |= EXT4_IO_END_UNWRITTEN + // now io_end->handle is NULL but has EXT4_IO_END_UNWRITTEN flag + +scsi_finish_command + scsi_io_completion + scsi_io_completion_action + scsi_end_request + blk_update_request + req_bio_endio + bio_endio + bio->bi_end_io > ext4_end_bio + ext4_put_io_end_defer + ext4_add_complete_io + // trigger WARN_ON(!io_end->handle && sbi->s_journal); + +The immediate cause of this problem is that ext4_should_dioread_nolock() +function returns inconsistent values in the ext4_do_writepages() and +mpage_map_one_extent(). There are four conditions in this function that +can be changed at mount time to cause this problem. These four conditions +can be divided into two categories: + + (1) journal_data and EXT4_EXTENTS_FL, which can be changed by ioctl + (2) DELALLOC and DIOREAD_NOLOCK, which can be changed by remount + +The two in the first category have been fixed by commit c8585c6fcaf2 +("ext4: fix races between changing inode journal mode and ext4_writepages") +and commit cb85f4d23f79 ("ext4: fix race between writepages and enabling +EXT4_EXTENTS_FL") respectively. + +Two cases in the other category have not yet been fixed, and the above +issue is caused by this situation. We refer to the fix for the first +category, when applying options during remount, we grab s_writepages_rwsem +to avoid racing with writepages ops to trigger this problem. + +Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io") +Cc: stable@vger.kernel.org +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230524072538.2883391-1-libaokun1@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ext4.h | 3 ++- + fs/ext4/super.c | 14 ++++++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1674,7 +1674,8 @@ struct ext4_sb_info { + + /* + * Barrier between writepages ops and changing any inode's JOURNAL_DATA +- * or EXTENTS flag. ++ * or EXTENTS flag or between writepages ops and changing DELALLOC or ++ * DIOREAD_NOLOCK mount options on remount. + */ + struct percpu_rw_semaphore s_writepages_rwsem; + struct dax_device *s_daxdev; +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -6425,6 +6425,7 @@ static int __ext4_remount(struct fs_cont + struct ext4_mount_options old_opts; + ext4_group_t g; + int err = 0; ++ int alloc_ctx; + #ifdef CONFIG_QUOTA + int enable_quota = 0; + int i, j; +@@ -6465,7 +6466,16 @@ static int __ext4_remount(struct fs_cont + + } + ++ /* ++ * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause ++ * two calls to ext4_should_dioread_nolock() to return inconsistent ++ * values, triggering WARN_ON in ext4_add_complete_io(). we grab ++ * here s_writepages_rwsem to avoid race between writepages ops and ++ * remount. ++ */ ++ alloc_ctx = ext4_writepages_down_write(sb); + ext4_apply_options(fc, sb); ++ ext4_writepages_up_write(sb, alloc_ctx); + + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ + test_opt(sb, JOURNAL_CHECKSUM)) { +@@ -6683,6 +6693,8 @@ restore_opts: + if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) && + sb_any_quota_suspended(sb)) + dquot_resume(sb, -1); ++ ++ alloc_ctx = ext4_writepages_down_write(sb); + sb->s_flags = old_sb_flags; + sbi->s_mount_opt = old_opts.s_mount_opt; + sbi->s_mount_opt2 = old_opts.s_mount_opt2; +@@ -6691,6 +6703,8 @@ restore_opts: + sbi->s_commit_interval = old_opts.s_commit_interval; + sbi->s_min_batch_time = old_opts.s_min_batch_time; + sbi->s_max_batch_time = old_opts.s_max_batch_time; ++ ext4_writepages_up_write(sb, alloc_ctx); ++ + if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) + ext4_release_system_zone(sb); + #ifdef CONFIG_QUOTA diff --git a/queue-6.5/ext4-fix-racy-may-inline-data-check-in-dio-write.patch b/queue-6.5/ext4-fix-racy-may-inline-data-check-in-dio-write.patch new file mode 100644 index 00000000000..5853d8c1821 --- /dev/null +++ b/queue-6.5/ext4-fix-racy-may-inline-data-check-in-dio-write.patch @@ -0,0 +1,76 @@ +From ce56d21355cd6f6937aca32f1f44ca749d1e4808 Mon Sep 17 00:00:00 2001 +From: Brian Foster +Date: Mon, 2 Oct 2023 14:50:20 -0400 +Subject: ext4: fix racy may inline data check in dio write + +From: Brian Foster + +commit ce56d21355cd6f6937aca32f1f44ca749d1e4808 upstream. + +syzbot reports that the following warning from ext4_iomap_begin() +triggers as of the commit referenced below: + + if (WARN_ON_ONCE(ext4_has_inline_data(inode))) + return -ERANGE; + +This occurs during a dio write, which is never expected to encounter +an inode with inline data. To enforce this behavior, +ext4_dio_write_iter() checks the current inline state of the inode +and clears the MAY_INLINE_DATA state flag to either fall back to +buffered writes, or enforce that any other writers in progress on +the inode are not allowed to create inline data. + +The problem is that the check for existing inline data and the state +flag can span a lock cycle. For example, if the ilock is originally +locked shared and subsequently upgraded to exclusive, another writer +may have reacquired the lock and created inline data before the dio +write task acquires the lock and proceeds. + +The commit referenced below loosens the lock requirements to allow +some forms of unaligned dio writes to occur under shared lock, but +AFAICT the inline data check was technically already racy for any +dio write that would have involved a lock cycle. Regardless, lift +clearing of the state bit to the same lock critical section that +checks for preexisting inline data on the inode to close the race. + +Cc: stable@kernel.org +Reported-by: syzbot+307da6ca5cb0d01d581a@syzkaller.appspotmail.com +Fixes: 310ee0902b8d ("ext4: allow concurrent unaligned dio overwrites") +Signed-off-by: Brian Foster +Link: https://lore.kernel.org/r/20231002185020.531537-1-bfoster@redhat.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/file.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -537,18 +537,20 @@ static ssize_t ext4_dio_write_iter(struc + return ext4_buffered_write_iter(iocb, from); + } + ++ /* ++ * Prevent inline data from being created since we are going to allocate ++ * blocks for DIO. We know the inode does not currently have inline data ++ * because ext4_should_use_dio() checked for it, but we have to clear ++ * the state flag before the write checks because a lock cycle could ++ * introduce races with other writers. ++ */ ++ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ++ + ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend, + &unwritten, &dio_flags); + if (ret <= 0) + return ret; + +- /* +- * Make sure inline data cannot be created anymore since we are going +- * to allocate blocks for DIO. We know the inode does not have any +- * inline data now because ext4_dio_supported() checked for that. +- */ +- ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); +- + offset = iocb->ki_pos; + count = ret; + diff --git a/queue-6.5/ext4-make-sure-allocate-pending-entry-not-fail.patch b/queue-6.5/ext4-make-sure-allocate-pending-entry-not-fail.patch new file mode 100644 index 00000000000..1e16de38716 --- /dev/null +++ b/queue-6.5/ext4-make-sure-allocate-pending-entry-not-fail.patch @@ -0,0 +1,300 @@ +From 8e387c89e96b9543a339f84043cf9df15fed2632 Mon Sep 17 00:00:00 2001 +From: Zhang Yi +Date: Thu, 24 Aug 2023 17:26:05 +0800 +Subject: ext4: make sure allocate pending entry not fail + +From: Zhang Yi + +commit 8e387c89e96b9543a339f84043cf9df15fed2632 upstream. + +__insert_pending() allocate memory in atomic context, so the allocation +could fail, but we are not handling that failure now. It could lead +ext4_es_remove_extent() to get wrong reserved clusters, and the global +data blocks reservation count will be incorrect. The same to +extents_status entry preallocation, preallocate pending entry out of the +i_es_lock with __GFP_NOFAIL, make sure __insert_pending() and +__revise_pending() always succeeds. + +Signed-off-by: Zhang Yi +Cc: stable@kernel.org +Link: https://lore.kernel.org/r/20230824092619.1327976-3-yi.zhang@huaweicloud.com +Reviewed-by: Jan Kara +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents_status.c | 123 ++++++++++++++++++++++++++++++++++------------- + 1 file changed, 89 insertions(+), 34 deletions(-) + +--- a/fs/ext4/extents_status.c ++++ b/fs/ext4/extents_status.c +@@ -152,8 +152,9 @@ static int __es_remove_extent(struct ino + static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); + static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, + struct ext4_inode_info *locked_ei); +-static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, +- ext4_lblk_t len); ++static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, ++ ext4_lblk_t len, ++ struct pending_reservation **prealloc); + + int __init ext4_init_es(void) + { +@@ -448,6 +449,19 @@ static void ext4_es_list_del(struct inod + spin_unlock(&sbi->s_es_lock); + } + ++static inline struct pending_reservation *__alloc_pending(bool nofail) ++{ ++ if (!nofail) ++ return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); ++ ++ return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL); ++} ++ ++static inline void __free_pending(struct pending_reservation *pr) ++{ ++ kmem_cache_free(ext4_pending_cachep, pr); ++} ++ + /* + * Returns true if we cannot fail to allocate memory for this extent_status + * entry and cannot reclaim it until its status changes. +@@ -836,11 +850,12 @@ void ext4_es_insert_extent(struct inode + { + struct extent_status newes; + ext4_lblk_t end = lblk + len - 1; +- int err1 = 0; +- int err2 = 0; ++ int err1 = 0, err2 = 0, err3 = 0; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct extent_status *es1 = NULL; + struct extent_status *es2 = NULL; ++ struct pending_reservation *pr = NULL; ++ bool revise_pending = false; + + if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + return; +@@ -868,11 +883,17 @@ void ext4_es_insert_extent(struct inode + + ext4_es_insert_extent_check(inode, &newes); + ++ revise_pending = sbi->s_cluster_ratio > 1 && ++ test_opt(inode->i_sb, DELALLOC) && ++ (status & (EXTENT_STATUS_WRITTEN | ++ EXTENT_STATUS_UNWRITTEN)); + retry: + if (err1 && !es1) + es1 = __es_alloc_extent(true); + if ((err1 || err2) && !es2) + es2 = __es_alloc_extent(true); ++ if ((err1 || err2 || err3) && revise_pending && !pr) ++ pr = __alloc_pending(true); + write_lock(&EXT4_I(inode)->i_es_lock); + + err1 = __es_remove_extent(inode, lblk, end, NULL, es1); +@@ -897,13 +918,18 @@ retry: + es2 = NULL; + } + +- if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) && +- (status & EXTENT_STATUS_WRITTEN || +- status & EXTENT_STATUS_UNWRITTEN)) +- __revise_pending(inode, lblk, len); ++ if (revise_pending) { ++ err3 = __revise_pending(inode, lblk, len, &pr); ++ if (err3 != 0) ++ goto error; ++ if (pr) { ++ __free_pending(pr); ++ pr = NULL; ++ } ++ } + error: + write_unlock(&EXT4_I(inode)->i_es_lock); +- if (err1 || err2) ++ if (err1 || err2 || err3) + goto retry; + + ext4_es_print_tree(inode); +@@ -1311,7 +1337,7 @@ static unsigned int get_rsvd(struct inod + rc->ndelonly--; + node = rb_next(&pr->rb_node); + rb_erase(&pr->rb_node, &tree->root); +- kmem_cache_free(ext4_pending_cachep, pr); ++ __free_pending(pr); + if (!node) + break; + pr = rb_entry(node, struct pending_reservation, +@@ -1907,11 +1933,13 @@ static struct pending_reservation *__get + * + * @inode - file containing the cluster + * @lblk - logical block in the cluster to be added ++ * @prealloc - preallocated pending entry + * + * Returns 0 on successful insertion and -ENOMEM on failure. If the + * pending reservation is already in the set, returns successfully. + */ +-static int __insert_pending(struct inode *inode, ext4_lblk_t lblk) ++static int __insert_pending(struct inode *inode, ext4_lblk_t lblk, ++ struct pending_reservation **prealloc) + { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree; +@@ -1937,10 +1965,15 @@ static int __insert_pending(struct inode + } + } + +- pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); +- if (pr == NULL) { +- ret = -ENOMEM; +- goto out; ++ if (likely(*prealloc == NULL)) { ++ pr = __alloc_pending(false); ++ if (!pr) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else { ++ pr = *prealloc; ++ *prealloc = NULL; + } + pr->lclu = lclu; + +@@ -1970,7 +2003,7 @@ static void __remove_pending(struct inod + if (pr != NULL) { + tree = &EXT4_I(inode)->i_pending_tree; + rb_erase(&pr->rb_node, &tree->root); +- kmem_cache_free(ext4_pending_cachep, pr); ++ __free_pending(pr); + } + } + +@@ -2029,10 +2062,10 @@ void ext4_es_insert_delayed_block(struct + bool allocated) + { + struct extent_status newes; +- int err1 = 0; +- int err2 = 0; ++ int err1 = 0, err2 = 0, err3 = 0; + struct extent_status *es1 = NULL; + struct extent_status *es2 = NULL; ++ struct pending_reservation *pr = NULL; + + if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + return; +@@ -2052,6 +2085,8 @@ retry: + es1 = __es_alloc_extent(true); + if ((err1 || err2) && !es2) + es2 = __es_alloc_extent(true); ++ if ((err1 || err2 || err3) && allocated && !pr) ++ pr = __alloc_pending(true); + write_lock(&EXT4_I(inode)->i_es_lock); + + err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); +@@ -2074,11 +2109,18 @@ retry: + es2 = NULL; + } + +- if (allocated) +- __insert_pending(inode, lblk); ++ if (allocated) { ++ err3 = __insert_pending(inode, lblk, &pr); ++ if (err3 != 0) ++ goto error; ++ if (pr) { ++ __free_pending(pr); ++ pr = NULL; ++ } ++ } + error: + write_unlock(&EXT4_I(inode)->i_es_lock); +- if (err1 || err2) ++ if (err1 || err2 || err3) + goto retry; + + ext4_es_print_tree(inode); +@@ -2184,21 +2226,24 @@ unsigned int ext4_es_delayed_clu(struct + * @inode - file containing the range + * @lblk - logical block defining the start of range + * @len - length of range in blocks ++ * @prealloc - preallocated pending entry + * + * Used after a newly allocated extent is added to the extents status tree. + * Requires that the extents in the range have either written or unwritten + * status. Must be called while holding i_es_lock. + */ +-static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, +- ext4_lblk_t len) ++static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, ++ ext4_lblk_t len, ++ struct pending_reservation **prealloc) + { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + ext4_lblk_t end = lblk + len - 1; + ext4_lblk_t first, last; + bool f_del = false, l_del = false; ++ int ret = 0; + + if (len == 0) +- return; ++ return 0; + + /* + * Two cases - block range within single cluster and block range +@@ -2219,7 +2264,9 @@ static void __revise_pending(struct inod + f_del = __es_scan_range(inode, &ext4_es_is_delonly, + first, lblk - 1); + if (f_del) { +- __insert_pending(inode, first); ++ ret = __insert_pending(inode, first, prealloc); ++ if (ret < 0) ++ goto out; + } else { + last = EXT4_LBLK_CMASK(sbi, end) + + sbi->s_cluster_ratio - 1; +@@ -2227,9 +2274,11 @@ static void __revise_pending(struct inod + l_del = __es_scan_range(inode, + &ext4_es_is_delonly, + end + 1, last); +- if (l_del) +- __insert_pending(inode, last); +- else ++ if (l_del) { ++ ret = __insert_pending(inode, last, prealloc); ++ if (ret < 0) ++ goto out; ++ } else + __remove_pending(inode, last); + } + } else { +@@ -2237,18 +2286,24 @@ static void __revise_pending(struct inod + if (first != lblk) + f_del = __es_scan_range(inode, &ext4_es_is_delonly, + first, lblk - 1); +- if (f_del) +- __insert_pending(inode, first); +- else ++ if (f_del) { ++ ret = __insert_pending(inode, first, prealloc); ++ if (ret < 0) ++ goto out; ++ } else + __remove_pending(inode, first); + + last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1; + if (last != end) + l_del = __es_scan_range(inode, &ext4_es_is_delonly, + end + 1, last); +- if (l_del) +- __insert_pending(inode, last); +- else ++ if (l_del) { ++ ret = __insert_pending(inode, last, prealloc); ++ if (ret < 0) ++ goto out; ++ } else + __remove_pending(inode, last); + } ++out: ++ return ret; + } diff --git a/queue-6.5/ext4-mark-buffer-new-if-it-is-unwritten-to-avoid-stale-data-exposure.patch b/queue-6.5/ext4-mark-buffer-new-if-it-is-unwritten-to-avoid-stale-data-exposure.patch new file mode 100644 index 00000000000..73b51fd0a97 --- /dev/null +++ b/queue-6.5/ext4-mark-buffer-new-if-it-is-unwritten-to-avoid-stale-data-exposure.patch @@ -0,0 +1,140 @@ +From 2cd8bdb5efc1e0d5b11a4b7ba6b922fd2736a87f Mon Sep 17 00:00:00 2001 +From: Ojaswin Mujoo +Date: Mon, 18 Sep 2023 16:15:50 +0530 +Subject: ext4: mark buffer new if it is unwritten to avoid stale data exposure + +From: Ojaswin Mujoo + +commit 2cd8bdb5efc1e0d5b11a4b7ba6b922fd2736a87f upstream. + +** Short Version ** + +In ext4 with dioread_nolock, we could have a scenario where the bh returned by +get_blocks (ext4_get_block_unwritten()) in __block_write_begin_int() has +UNWRITTEN and MAPPED flag set. Since such a bh does not have NEW flag set we +never zero out the range of bh that is not under write, causing whatever stale +data is present in the folio at that time to be written out to disk. To fix this +mark the buffer as new, in case it is unwritten, in ext4_get_block_unwritten(). + +** Long Version ** + +The issue mentioned above was resulting in two different bugs: + +1. On block size < page size case in ext4, generic/269 was reliably +failing with dioread_nolock. The state of the write was as follows: + + * The write was extending i_size. + * The last block of the file was fallocated and had an unwritten extent + * We were near ENOSPC and hence we were switching to non-delayed alloc + allocation. + +In this case, the back trace that triggers the bug is as follows: + + ext4_da_write_begin() + /* switch to nodelalloc due to low space */ + ext4_write_begin() + ext4_should_dioread_nolock() // true since mount flags still have delalloc + __block_write_begin(..., ext4_get_block_unwritten) + __block_write_begin_int() + for(each buffer head in page) { + /* first iteration, this is bh1 which contains i_size */ + if (!buffer_mapped) + get_block() /* returns bh with only UNWRITTEN and MAPPED */ + /* second iteration, bh2 */ + if (!buffer_mapped) + get_block() /* we fail here, could be ENOSPC */ + } + if (err) + /* + * this would zero out all new buffers and mark them uptodate. + * Since bh1 was never marked new, we skip it here which causes + * the bug later. + */ + folio_zero_new_buffers(); + /* ext4_wrte_begin() error handling */ + ext4_truncate_failed_write() + ext4_truncate() + ext4_block_truncate_page() + __ext4_block_zero_page_range() + if(!buffer_uptodate()) + ext4_read_bh_lock() + ext4_read_bh() -> ... ext4_submit_bh_wbc() + BUG_ON(buffer_unwritten(bh)); /* !!! */ + +2. The second issue is stale data exposure with page size >= blocksize +with dioread_nolock. The conditions needed for it to happen are same as +the previous issue ie dioread_nolock around ENOSPC condition. The issue +is also similar where in __block_write_begin_int() when we call +ext4_get_block_unwritten() on the buffer_head and the underlying extent +is unwritten, we get an unwritten and mapped buffer head. Since it is +not new, we never zero out the partial range which is not under write, +thus writing stale data to disk. This can be easily observed with the +following reproducer: + + fallocate -l 4k testfile + xfs_io -c "pwrite 2k 2k" testfile + # hexdump output will have stale data in from byte 0 to 2k in testfile + hexdump -C testfile + +NOTE: To trigger this, we need dioread_nolock enabled and write happening via +ext4_write_begin(), which is usually used when we have -o nodealloc. Since +dioread_nolock is disabled with nodelalloc, the only alternate way to call +ext4_write_begin() is to ensure that delayed alloc switches to nodelalloc ie +ext4_da_write_begin() calls ext4_write_begin(). This will usually happen when +ext4 is almost full like the way generic/269 was triggering it in Issue 1 above. +This might make the issue harder to hit. Hence, for reliable replication, I used +the below patch to temporarily allow dioread_nolock with nodelalloc and then +mount the disk with -o nodealloc,dioread_nolock. With this you can hit the stale +data issue 100% of times: + +@@ -508,8 +508,8 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) + if (ext4_should_journal_data(inode)) + return 0; + /* temporary fix to prevent generic/422 test failures */ +- if (!test_opt(inode->i_sb, DELALLOC)) +- return 0; ++ // if (!test_opt(inode->i_sb, DELALLOC)) ++ // return 0; + return 1; + } + +After applying this patch to mark buffer as NEW, both the above issues are +fixed. + +Signed-off-by: Ojaswin Mujoo +Cc: stable@kernel.org +Reviewed-by: Jan Kara +Reviewed-by: "Ritesh Harjani (IBM)" +Link: https://lore.kernel.org/r/d0ed09d70a9733fbb5349c5c7b125caac186ecdf.1695033645.git.ojaswin@linux.ibm.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -789,10 +789,22 @@ int ext4_get_block(struct inode *inode, + int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) + { ++ int ret = 0; ++ + ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", + inode->i_ino, create); +- return _ext4_get_block(inode, iblock, bh_result, ++ ret = _ext4_get_block(inode, iblock, bh_result, + EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT); ++ ++ /* ++ * If the buffer is marked unwritten, mark it as new to make sure it is ++ * zeroed out correctly in case of partial writes. Otherwise, there is ++ * a chance of stale data getting exposed. ++ */ ++ if (ret == 0 && buffer_unwritten(bh_result)) ++ set_buffer_new(bh_result); ++ ++ return ret; + } + + /* Maximum number of blocks we map for direct IO at once. */ diff --git a/queue-6.5/ext4-properly-sync-file-size-update-after-o_sync-direct-io.patch b/queue-6.5/ext4-properly-sync-file-size-update-after-o_sync-direct-io.patch new file mode 100644 index 00000000000..a2b3e9ce6f8 --- /dev/null +++ b/queue-6.5/ext4-properly-sync-file-size-update-after-o_sync-direct-io.patch @@ -0,0 +1,241 @@ +From 91562895f8030cb9a0470b1db49de79346a69f91 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Fri, 13 Oct 2023 14:13:50 +0200 +Subject: ext4: properly sync file size update after O_SYNC direct IO + +From: Jan Kara + +commit 91562895f8030cb9a0470b1db49de79346a69f91 upstream. + +Gao Xiang has reported that on ext4 O_SYNC direct IO does not properly +sync file size update and thus if we crash at unfortunate moment, the +file can have smaller size although O_SYNC IO has reported successful +completion. The problem happens because update of on-disk inode size is +handled in ext4_dio_write_iter() *after* iomap_dio_rw() (and thus +dio_complete() in particular) has returned and generic_file_sync() gets +called by dio_complete(). Fix the problem by handling on-disk inode size +update directly in our ->end_io completion handler. + +References: https://lore.kernel.org/all/02d18236-26ef-09b0-90ad-030c4fe3ee20@linux.alibaba.com +Reported-by: Gao Xiang +CC: stable@vger.kernel.org +Fixes: 378f32bab371 ("ext4: introduce direct I/O write using iomap infrastructure") +Signed-off-by: Jan Kara +Tested-by: Joseph Qi +Reviewed-by: "Ritesh Harjani (IBM)" +Link: https://lore.kernel.org/r/20231013121350.26872-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/file.c | 153 ++++++++++++++++++++++++--------------------------------- + 1 file changed, 65 insertions(+), 88 deletions(-) + +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -306,80 +306,38 @@ out: + } + + static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, +- ssize_t written, size_t count) ++ ssize_t count) + { + handle_t *handle; +- bool truncate = false; +- u8 blkbits = inode->i_blkbits; +- ext4_lblk_t written_blk, end_blk; +- int ret; +- +- /* +- * Note that EXT4_I(inode)->i_disksize can get extended up to +- * inode->i_size while the I/O was running due to writeback of delalloc +- * blocks. But, the code in ext4_iomap_alloc() is careful to use +- * zeroed/unwritten extents if this is possible; thus we won't leave +- * uninitialized blocks in a file even if we didn't succeed in writing +- * as much as we intended. +- */ +- WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize); +- if (offset + count <= EXT4_I(inode)->i_disksize) { +- /* +- * We need to ensure that the inode is removed from the orphan +- * list if it has been added prematurely, due to writeback of +- * delalloc blocks. +- */ +- if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { +- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); +- +- if (IS_ERR(handle)) { +- ext4_orphan_del(NULL, inode); +- return PTR_ERR(handle); +- } +- +- ext4_orphan_del(handle, inode); +- ext4_journal_stop(handle); +- } +- +- return written; +- } +- +- if (written < 0) +- goto truncate; + ++ lockdep_assert_held_write(&inode->i_rwsem); + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); +- if (IS_ERR(handle)) { +- written = PTR_ERR(handle); +- goto truncate; +- } ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); + +- if (ext4_update_inode_size(inode, offset + written)) { +- ret = ext4_mark_inode_dirty(handle, inode); ++ if (ext4_update_inode_size(inode, offset + count)) { ++ int ret = ext4_mark_inode_dirty(handle, inode); + if (unlikely(ret)) { +- written = ret; + ext4_journal_stop(handle); +- goto truncate; ++ return ret; + } + } + +- /* +- * We may need to truncate allocated but not written blocks beyond EOF. +- */ +- written_blk = ALIGN(offset + written, 1 << blkbits); +- end_blk = ALIGN(offset + count, 1 << blkbits); +- if (written_blk < end_blk && ext4_can_truncate(inode)) +- truncate = true; +- +- /* +- * Remove the inode from the orphan list if it has been extended and +- * everything went OK. +- */ +- if (!truncate && inode->i_nlink) ++ if (inode->i_nlink) + ext4_orphan_del(handle, inode); + ext4_journal_stop(handle); + +- if (truncate) { +-truncate: ++ return count; ++} ++ ++/* ++ * Clean up the inode after DIO or DAX extending write has completed and the ++ * inode size has been updated using ext4_handle_inode_extension(). ++ */ ++static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count) ++{ ++ lockdep_assert_held_write(&inode->i_rwsem); ++ if (count < 0) { + ext4_truncate_failed_write(inode); + /* + * If the truncate operation failed early, then the inode may +@@ -388,9 +346,28 @@ truncate: + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); ++ return; + } ++ /* ++ * If i_disksize got extended due to writeback of delalloc blocks while ++ * the DIO was running we could fail to cleanup the orphan list in ++ * ext4_handle_inode_extension(). Do it now. ++ */ ++ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { ++ handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + +- return written; ++ if (IS_ERR(handle)) { ++ /* ++ * The write has successfully completed. Not much to ++ * do with the error here so just cleanup the orphan ++ * list and hope for the best. ++ */ ++ ext4_orphan_del(NULL, inode); ++ return; ++ } ++ ext4_orphan_del(handle, inode); ++ ext4_journal_stop(handle); ++ } + } + + static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, +@@ -399,31 +376,22 @@ static int ext4_dio_write_end_io(struct + loff_t pos = iocb->ki_pos; + struct inode *inode = file_inode(iocb->ki_filp); + ++ if (!error && size && flags & IOMAP_DIO_UNWRITTEN) ++ error = ext4_convert_unwritten_extents(NULL, inode, pos, size); + if (error) + return error; +- +- if (size && flags & IOMAP_DIO_UNWRITTEN) { +- error = ext4_convert_unwritten_extents(NULL, inode, pos, size); +- if (error < 0) +- return error; +- } + /* +- * If we are extending the file, we have to update i_size here before +- * page cache gets invalidated in iomap_dio_rw(). Otherwise racing +- * buffered reads could zero out too much from page cache pages. Update +- * of on-disk size will happen later in ext4_dio_write_iter() where +- * we have enough information to also perform orphan list handling etc. +- * Note that we perform all extending writes synchronously under +- * i_rwsem held exclusively so i_size update is safe here in that case. +- * If the write was not extending, we cannot see pos > i_size here +- * because operations reducing i_size like truncate wait for all +- * outstanding DIO before updating i_size. ++ * Note that EXT4_I(inode)->i_disksize can get extended up to ++ * inode->i_size while the I/O was running due to writeback of delalloc ++ * blocks. But the code in ext4_iomap_alloc() is careful to use ++ * zeroed/unwritten extents if this is possible; thus we won't leave ++ * uninitialized blocks in a file even if we didn't succeed in writing ++ * as much as we intended. + */ +- pos += size; +- if (pos > i_size_read(inode)) +- i_size_write(inode, pos); +- +- return 0; ++ WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize)); ++ if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize)) ++ return size; ++ return ext4_handle_inode_extension(inode, pos, size); + } + + static const struct iomap_dio_ops ext4_dio_write_ops = { +@@ -606,9 +574,16 @@ static ssize_t ext4_dio_write_iter(struc + dio_flags, NULL, 0); + if (ret == -ENOTBLK) + ret = 0; +- +- if (extend) +- ret = ext4_handle_inode_extension(inode, offset, ret, count); ++ if (extend) { ++ /* ++ * We always perform extending DIO write synchronously so by ++ * now the IO is completed and ext4_handle_inode_extension() ++ * was called. Cleanup the inode in case of error or race with ++ * writeback of delalloc blocks. ++ */ ++ WARN_ON_ONCE(ret == -EIOCBQUEUED); ++ ext4_inode_extension_cleanup(inode, ret); ++ } + + out: + if (ilock_shared) +@@ -689,8 +664,10 @@ ext4_dax_write_iter(struct kiocb *iocb, + + ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); + +- if (extend) +- ret = ext4_handle_inode_extension(inode, offset, ret, count); ++ if (extend) { ++ ret = ext4_handle_inode_extension(inode, offset, ret); ++ ext4_inode_extension_cleanup(inode, ret); ++ } + out: + inode_unlock(inode); + if (ret > 0) diff --git a/queue-6.5/ext4-remove-gdb-backup-copy-for-meta-bg-in-setup_new_flex_group_blocks.patch b/queue-6.5/ext4-remove-gdb-backup-copy-for-meta-bg-in-setup_new_flex_group_blocks.patch new file mode 100644 index 00000000000..4e96e64acd7 --- /dev/null +++ b/queue-6.5/ext4-remove-gdb-backup-copy-for-meta-bg-in-setup_new_flex_group_blocks.patch @@ -0,0 +1,67 @@ +From 40dd7953f4d606c280074f10d23046b6812708ce Mon Sep 17 00:00:00 2001 +From: Kemeng Shi +Date: Sun, 27 Aug 2023 01:47:03 +0800 +Subject: ext4: remove gdb backup copy for meta bg in setup_new_flex_group_blocks + +From: Kemeng Shi + +commit 40dd7953f4d606c280074f10d23046b6812708ce upstream. + +Wrong check of gdb backup in meta bg as following: +first_group is the first group of meta_bg which contains target group, so +target group is always >= first_group. We check if target group has gdb +backup by comparing first_group with [group + 1] and [group + +EXT4_DESC_PER_BLOCK(sb) - 1]. As group >= first_group, then [group + N] is +> first_group. So no copy of gdb backup in meta bg is done in +setup_new_flex_group_blocks. + +No need to do gdb backup copy in meta bg from setup_new_flex_group_blocks +as we always copy updated gdb block to backups at end of +ext4_flex_group_add as following: + +ext4_flex_group_add + /* no gdb backup copy for meta bg any more */ + setup_new_flex_group_blocks + + /* update current group number */ + ext4_update_super + sbi->s_groups_count += flex_gd->count; + + /* + * if group in meta bg contains backup is added, the primary gdb block + * of the meta bg will be copy to backup in new added group here. + */ + for (; gdb_num <= gdb_num_end; gdb_num++) + update_backups(...) + +In summary, we can remove wrong gdb backup copy code in +setup_new_flex_group_blocks. + +Signed-off-by: Kemeng Shi +Reviewed-by: Theodore Ts'o +Link: https://lore.kernel.org/r/20230826174712.4059355-5-shikemeng@huaweicloud.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/resize.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -560,13 +560,8 @@ static int setup_new_flex_group_blocks(s + if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) + goto handle_itb; + +- if (meta_bg == 1) { +- ext4_group_t first_group; +- first_group = ext4_meta_bg_first_group(sb, group); +- if (first_group != group + 1 && +- first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) +- goto handle_itb; +- } ++ if (meta_bg == 1) ++ goto handle_itb; + + block = start + ext4_bg_has_super(sb, group); + /* Copy all of the GDT blocks into the backup in this group */ diff --git a/queue-6.5/revert-net-r8169-disable-multicast-filter-for-rtl8168h-and-rtl8107e.patch b/queue-6.5/revert-net-r8169-disable-multicast-filter-for-rtl8168h-and-rtl8107e.patch new file mode 100644 index 00000000000..420042e5799 --- /dev/null +++ b/queue-6.5/revert-net-r8169-disable-multicast-filter-for-rtl8168h-and-rtl8107e.patch @@ -0,0 +1,45 @@ +From 6a26310273c323380da21eb23fcfd50e31140913 Mon Sep 17 00:00:00 2001 +From: Heiner Kallweit +Date: Tue, 21 Nov 2023 09:09:33 +0100 +Subject: Revert "net: r8169: Disable multicast filter for RTL8168H and RTL8107E" + +From: Heiner Kallweit + +commit 6a26310273c323380da21eb23fcfd50e31140913 upstream. + +This reverts commit efa5f1311c4998e9e6317c52bc5ee93b3a0f36df. + +I couldn't reproduce the reported issue. What I did, based on a pcap +packet log provided by the reporter: +- Used same chip version (RTL8168h) +- Set MAC address to the one used on the reporters system +- Replayed the EAPOL unicast packet that, according to the reporter, + was filtered out by the mc filter. +The packet was properly received. + +Therefore the root cause of the reported issue seems to be somewhere +else. Disabling mc filtering completely for the most common chip +version is a quite big hammer. Therefore revert the change and wait +for further analysis results from the reporter. + +Cc: stable@vger.kernel.org +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169_main.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -2599,9 +2599,7 @@ static void rtl_set_rx_mode(struct net_d + rx_mode &= ~AcceptMulticast; + } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT || + dev->flags & IFF_ALLMULTI || +- tp->mac_version == RTL_GIGA_MAC_VER_35 || +- tp->mac_version == RTL_GIGA_MAC_VER_46 || +- tp->mac_version == RTL_GIGA_MAC_VER_48) { ++ tp->mac_version == RTL_GIGA_MAC_VER_35) { + /* accept all multicasts */ + } else if (netdev_mc_empty(dev)) { + rx_mode &= ~AcceptMulticast; diff --git a/queue-6.5/series b/queue-6.5/series index ba8fedf65f9..b333ce9a045 100644 --- a/queue-6.5/series +++ b/queue-6.5/series @@ -456,3 +456,17 @@ media-qcom-camss-fix-missing-vfe_lite-clocks-check.patch media-qcom-camss-fix-set-csi2_rx_cfg1_vc_mode-when-vc-is-greater-than-3.patch media-qcom-camss-fix-invalid-clock-enable-bit-disjunction.patch media-qcom-camss-fix-csid-gen2-for-test-pattern-generator.patch +revert-net-r8169-disable-multicast-filter-for-rtl8168h-and-rtl8107e.patch +ext4-fix-race-between-writepages-and-remount.patch +ext4-make-sure-allocate-pending-entry-not-fail.patch +ext4-apply-umask-if-acl-support-is-disabled.patch +ext4-correct-offset-of-gdb-backup-in-non-meta_bg-group-to-update_backups.patch +ext4-mark-buffer-new-if-it-is-unwritten-to-avoid-stale-data-exposure.patch +ext4-correct-return-value-of-ext4_convert_meta_bg.patch +ext4-correct-the-start-block-of-counting-reserved-clusters.patch +ext4-remove-gdb-backup-copy-for-meta-bg-in-setup_new_flex_group_blocks.patch +ext4-add-missed-brelse-in-update_backups.patch +ext4-properly-sync-file-size-update-after-o_sync-direct-io.patch +ext4-fix-racy-may-inline-data-check-in-dio-write.patch +drm-amd-pm-handle-non-terminated-overdrive-commands.patch +drm-bridge-it66121-get_edid-callback-must-not-return-err-pointers.patch -- 2.47.3