--- /dev/null
+From 08e9ebc75b5bcfec9d226f9e16bab2ab7b25a39a Mon Sep 17 00:00:00 2001
+From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Date: Tue, 17 Oct 2023 16:01:35 +0200
+Subject: drm/amd/pm: Handle non-terminated overdrive commands.
+
+From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+
+commit 08e9ebc75b5bcfec9d226f9e16bab2ab7b25a39a upstream.
+
+The incoming strings might not be terminated by a newline
+or a 0.
+
+(found while testing a program that just wrote the string
+ itself, causing a crash)
+
+Cc: stable@vger.kernel.org
+Fixes: e3933f26b657 ("drm/amd/pp: Add edit/commit/show OD clock/voltage support in sysfs")
+Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/amdgpu_pm.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
++++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+@@ -734,7 +734,7 @@ static ssize_t amdgpu_set_pp_od_clk_volt
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
+
+- if (count > 127)
++ if (count > 127 || count == 0)
+ return -EINVAL;
+
+ if (*buf == 's')
+@@ -754,7 +754,8 @@ static ssize_t amdgpu_set_pp_od_clk_volt
+ else
+ return -EINVAL;
+
+- memcpy(buf_cpy, buf, count+1);
++ memcpy(buf_cpy, buf, count);
++ buf_cpy[count] = 0;
+
+ tmp_str = buf_cpy;
+
+@@ -771,6 +772,9 @@ static ssize_t amdgpu_set_pp_od_clk_volt
+ return -EINVAL;
+ parameter_size++;
+
++ if (!tmp_str)
++ break;
++
+ while (isspace(*tmp_str))
+ tmp_str++;
+ }
--- /dev/null
+From 81995ee1620318b4c7bbeb02bcc372da2c078c76 Mon Sep 17 00:00:00 2001
+From: Jani Nikula <jani.nikula@intel.com>
+Date: Thu, 14 Sep 2023 16:11:59 +0300
+Subject: drm: bridge: it66121: ->get_edid callback must not return err pointers
+
+From: Jani Nikula <jani.nikula@intel.com>
+
+commit 81995ee1620318b4c7bbeb02bcc372da2c078c76 upstream.
+
+The drm stack does not expect error valued pointers for EDID anywhere.
+
+Fixes: e66856508746 ("drm: bridge: it66121: Set DDC preamble only once before reading EDID")
+Cc: Paul Cercueil <paul@crapouillou.net>
+Cc: Robert Foss <robert.foss@linaro.org>
+Cc: Phong LE <ple@baylibre.com>
+Cc: Neil Armstrong <neil.armstrong@linaro.org>
+Cc: Andrzej Hajda <andrzej.hajda@intel.com>
+Cc: Robert Foss <rfoss@kernel.org>
+Cc: Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
+Cc: Jonas Karlman <jonas@kwiboo.se>
+Cc: Jernej Skrabec <jernej.skrabec@gmail.com>
+Cc: <stable@vger.kernel.org> # v6.3+
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
+Signed-off-by: Paul Cercueil <paul@crapouillou.net>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230914131159.2472513-1-jani.nikula@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/bridge/ite-it66121.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/bridge/ite-it66121.c
++++ b/drivers/gpu/drm/bridge/ite-it66121.c
+@@ -884,14 +884,14 @@ static struct edid *it66121_bridge_get_e
+ mutex_lock(&ctx->lock);
+ ret = it66121_preamble_ddc(ctx);
+ if (ret) {
+- edid = ERR_PTR(ret);
++ edid = NULL;
+ goto out_unlock;
+ }
+
+ ret = regmap_write(ctx->regmap, IT66121_DDC_HEADER_REG,
+ IT66121_DDC_HEADER_EDID);
+ if (ret) {
+- edid = ERR_PTR(ret);
++ edid = NULL;
+ goto out_unlock;
+ }
+
--- /dev/null
+From 9adac8b01f4be28acd5838aade42b8daa4f0b642 Mon Sep 17 00:00:00 2001
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+Date: Sun, 27 Aug 2023 01:47:01 +0800
+Subject: ext4: add missed brelse in update_backups
+
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+
+commit 9adac8b01f4be28acd5838aade42b8daa4f0b642 upstream.
+
+add missed brelse in update_backups
+
+Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
+Reviewed-by: Theodore Ts'o <tytso@mit.edu>
+Link: https://lore.kernel.org/r/20230826174712.4059355-3-shikemeng@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/resize.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1186,8 +1186,10 @@ static void update_backups(struct super_
+ ext4_group_first_block_no(sb, group));
+ BUFFER_TRACE(bh, "get_write_access");
+ if ((err = ext4_journal_get_write_access(handle, sb, bh,
+- EXT4_JTR_NONE)))
++ EXT4_JTR_NONE))) {
++ brelse(bh);
+ break;
++ }
+ lock_buffer(bh);
+ memcpy(bh->b_data, data, size);
+ if (rest)
--- /dev/null
+From 484fd6c1de13b336806a967908a927cc0356e312 Mon Sep 17 00:00:00 2001
+From: Max Kellermann <max.kellermann@ionos.com>
+Date: Tue, 19 Sep 2023 10:18:23 +0200
+Subject: ext4: apply umask if ACL support is disabled
+
+From: Max Kellermann <max.kellermann@ionos.com>
+
+commit 484fd6c1de13b336806a967908a927cc0356e312 upstream.
+
+The function ext4_init_acl() calls posix_acl_create() which is
+responsible for applying the umask. But without
+CONFIG_EXT4_FS_POSIX_ACL, ext4_init_acl() is an empty inline function,
+and nobody applies the umask.
+
+This fixes a bug which causes the umask to be ignored with O_TMPFILE
+on ext4:
+
+ https://github.com/MusicPlayerDaemon/MPD/issues/558
+ https://bugs.gentoo.org/show_bug.cgi?id=686142#c3
+ https://bugzilla.kernel.org/show_bug.cgi?id=203625
+
+Reviewed-by: "J. Bruce Fields" <bfields@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
+Link: https://lore.kernel.org/r/20230919081824.1096619-1-max.kellermann@ionos.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/acl.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/ext4/acl.h
++++ b/fs/ext4/acl.h
+@@ -68,6 +68,11 @@ extern int ext4_init_acl(handle_t *, str
+ static inline int
+ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+ {
++ /* usually, the umask is applied by posix_acl_create(), but if
++ ext4 ACL support is disabled at compile time, we need to do
++ it here, because posix_acl_create() will never be called */
++ inode->i_mode &= ~current_umask();
++
+ return 0;
+ }
+ #endif /* CONFIG_EXT4_FS_POSIX_ACL */
--- /dev/null
+From 31f13421c004a420c0e9d288859c9ea9259ea0cc Mon Sep 17 00:00:00 2001
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+Date: Sun, 27 Aug 2023 01:47:00 +0800
+Subject: ext4: correct offset of gdb backup in non meta_bg group to update_backups
+
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+
+commit 31f13421c004a420c0e9d288859c9ea9259ea0cc upstream.
+
+Commit 0aeaa2559d6d5 ("ext4: fix corruption when online resizing a 1K
+bigalloc fs") found that primary superblock's offset in its group is
+not equal to offset of backup superblock in its group when block size
+is 1K and bigalloc is enabled. As group descriptor blocks are right
+after superblock, we can't pass block number of gdb to update_backups
+for the same reason.
+
+The root casue of the issue above is that leading 1K padding block is
+count as data block offset for primary block while backup block has no
+padding block offset in its group.
+
+Remove padding data block count to fix the issue for gdb backups.
+
+For meta_bg case, update_backups treat blk_off as block number, do no
+conversion in this case.
+
+Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
+Reviewed-by: Theodore Ts'o <tytso@mit.edu>
+Link: https://lore.kernel.org/r/20230826174712.4059355-2-shikemeng@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/resize.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1601,6 +1601,8 @@ exit_journal:
+ int gdb_num_end = ((group + flex_gd->count - 1) /
+ EXT4_DESC_PER_BLOCK(sb));
+ int meta_bg = ext4_has_feature_meta_bg(sb);
++ sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr -
++ ext4_group_first_block_no(sb, 0);
+ sector_t old_gdb = 0;
+
+ update_backups(sb, ext4_group_first_block_no(sb, 0),
+@@ -1612,8 +1614,8 @@ exit_journal:
+ gdb_num);
+ if (old_gdb == gdb_bh->b_blocknr)
+ continue;
+- update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
+- gdb_bh->b_size, meta_bg);
++ update_backups(sb, gdb_bh->b_blocknr - padding_blocks,
++ gdb_bh->b_data, gdb_bh->b_size, meta_bg);
+ old_gdb = gdb_bh->b_blocknr;
+ }
+ }
--- /dev/null
+From 48f1551592c54f7d8e2befc72a99ff4e47f7dca0 Mon Sep 17 00:00:00 2001
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+Date: Sun, 27 Aug 2023 01:47:02 +0800
+Subject: ext4: correct return value of ext4_convert_meta_bg
+
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+
+commit 48f1551592c54f7d8e2befc72a99ff4e47f7dca0 upstream.
+
+Avoid to ignore error in "err".
+
+Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
+Link: https://lore.kernel.org/r/20230826174712.4059355-4-shikemeng@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/resize.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1982,9 +1982,7 @@ static int ext4_convert_meta_bg(struct s
+
+ errout:
+ ret = ext4_journal_stop(handle);
+- if (!err)
+- err = ret;
+- return ret;
++ return err ? err : ret;
+
+ invalid_resize_inode:
+ ext4_error(sb, "corrupted/inconsistent resize inode");
--- /dev/null
+From 40ea98396a3659062267d1fe5f99af4f7e4f05e3 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Thu, 24 Aug 2023 17:26:04 +0800
+Subject: ext4: correct the start block of counting reserved clusters
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 40ea98396a3659062267d1fe5f99af4f7e4f05e3 upstream.
+
+When big allocate feature is enabled, we need to count and update
+reserved clusters before removing a delayed only extent_status entry.
+{init|count|get}_rsvd() have already done this, but the start block
+number of this counting isn't correct in the following case.
+
+ lblk end
+ | |
+ v v
+ -------------------------
+ | | orig_es
+ -------------------------
+ ^ ^
+ len1 is 0 | len2 |
+
+If the start block of the orig_es entry founded is bigger than lblk, we
+passed lblk as start block to count_rsvd(), but the length is correct,
+finally, the range to be counted is offset. This patch fix this by
+passing the start blocks to 'orig_es->lblk + len1'.
+
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Cc: stable@kernel.org
+Link: https://lore.kernel.org/r/20230824092619.1327976-2-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/extents_status.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/extents_status.c
++++ b/fs/ext4/extents_status.c
+@@ -1431,8 +1431,8 @@ static int __es_remove_extent(struct ino
+ }
+ }
+ if (count_reserved)
+- count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
+- &orig_es, &rc);
++ count_rsvd(inode, orig_es.es_lblk + len1,
++ orig_es.es_len - len1 - len2, &orig_es, &rc);
+ goto out_get_reserved;
+ }
+
--- /dev/null
+From 745f17a4166e79315e4b7f33ce89d03e75a76983 Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Wed, 24 May 2023 15:25:38 +0800
+Subject: ext4: fix race between writepages and remount
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit 745f17a4166e79315e4b7f33ce89d03e75a76983 upstream.
+
+We got a WARNING in ext4_add_complete_io:
+==================================================================
+ WARNING: at fs/ext4/page-io.c:231 ext4_put_io_end_defer+0x182/0x250
+ CPU: 10 PID: 77 Comm: ksoftirqd/10 Tainted: 6.3.0-rc2 #85
+ RIP: 0010:ext4_put_io_end_defer+0x182/0x250 [ext4]
+ [...]
+ Call Trace:
+ <TASK>
+ ext4_end_bio+0xa8/0x240 [ext4]
+ bio_endio+0x195/0x310
+ blk_update_request+0x184/0x770
+ scsi_end_request+0x2f/0x240
+ scsi_io_completion+0x75/0x450
+ scsi_finish_command+0xef/0x160
+ scsi_complete+0xa3/0x180
+ blk_complete_reqs+0x60/0x80
+ blk_done_softirq+0x25/0x40
+ __do_softirq+0x119/0x4c8
+ run_ksoftirqd+0x42/0x70
+ smpboot_thread_fn+0x136/0x3c0
+ kthread+0x140/0x1a0
+ ret_from_fork+0x2c/0x50
+==================================================================
+
+Above issue may happen as follows:
+
+ cpu1 cpu2
+----------------------------|----------------------------
+mount -o dioread_lock
+ext4_writepages
+ ext4_do_writepages
+ *if (ext4_should_dioread_nolock(inode))*
+ // rsv_blocks is not assigned here
+ mount -o remount,dioread_nolock
+ ext4_journal_start_with_reserve
+ __ext4_journal_start
+ __ext4_journal_start_sb
+ jbd2__journal_start
+ *if (rsv_blocks)*
+ // h_rsv_handle is not initialized here
+ mpage_map_and_submit_extent
+ mpage_map_one_extent
+ dioread_nolock = ext4_should_dioread_nolock(inode)
+ if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN))
+ mpd->io_submit.io_end->handle = handle->h_rsv_handle
+ ext4_set_io_unwritten_flag
+ io_end->flag |= EXT4_IO_END_UNWRITTEN
+ // now io_end->handle is NULL but has EXT4_IO_END_UNWRITTEN flag
+
+scsi_finish_command
+ scsi_io_completion
+ scsi_io_completion_action
+ scsi_end_request
+ blk_update_request
+ req_bio_endio
+ bio_endio
+ bio->bi_end_io > ext4_end_bio
+ ext4_put_io_end_defer
+ ext4_add_complete_io
+ // trigger WARN_ON(!io_end->handle && sbi->s_journal);
+
+The immediate cause of this problem is that ext4_should_dioread_nolock()
+function returns inconsistent values in the ext4_do_writepages() and
+mpage_map_one_extent(). There are four conditions in this function that
+can be changed at mount time to cause this problem. These four conditions
+can be divided into two categories:
+
+ (1) journal_data and EXT4_EXTENTS_FL, which can be changed by ioctl
+ (2) DELALLOC and DIOREAD_NOLOCK, which can be changed by remount
+
+The two in the first category have been fixed by commit c8585c6fcaf2
+("ext4: fix races between changing inode journal mode and ext4_writepages")
+and commit cb85f4d23f79 ("ext4: fix race between writepages and enabling
+EXT4_EXTENTS_FL") respectively.
+
+Two cases in the other category have not yet been fixed, and the above
+issue is caused by this situation. We refer to the fix for the first
+category, when applying options during remount, we grab s_writepages_rwsem
+to avoid racing with writepages ops to trigger this problem.
+
+Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io")
+Cc: stable@vger.kernel.org
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230524072538.2883391-1-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/ext4.h | 3 ++-
+ fs/ext4/super.c | 14 ++++++++++++++
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1676,7 +1676,8 @@ struct ext4_sb_info {
+
+ /*
+ * Barrier between writepages ops and changing any inode's JOURNAL_DATA
+- * or EXTENTS flag.
++ * or EXTENTS flag or between writepages ops and changing DELALLOC or
++ * DIOREAD_NOLOCK mount options on remount.
+ */
+ struct percpu_rw_semaphore s_writepages_rwsem;
+ struct dax_device *s_daxdev;
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -6443,6 +6443,7 @@ static int __ext4_remount(struct fs_cont
+ struct ext4_mount_options old_opts;
+ ext4_group_t g;
+ int err = 0;
++ int alloc_ctx;
+ #ifdef CONFIG_QUOTA
+ int enable_quota = 0;
+ int i, j;
+@@ -6483,7 +6484,16 @@ static int __ext4_remount(struct fs_cont
+
+ }
+
++ /*
++ * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
++ * two calls to ext4_should_dioread_nolock() to return inconsistent
++ * values, triggering WARN_ON in ext4_add_complete_io(). we grab
++ * here s_writepages_rwsem to avoid race between writepages ops and
++ * remount.
++ */
++ alloc_ctx = ext4_writepages_down_write(sb);
+ ext4_apply_options(fc, sb);
++ ext4_writepages_up_write(sb, alloc_ctx);
+
+ if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
+ test_opt(sb, JOURNAL_CHECKSUM)) {
+@@ -6701,6 +6711,8 @@ restore_opts:
+ if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
+ sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
++
++ alloc_ctx = ext4_writepages_down_write(sb);
+ sb->s_flags = old_sb_flags;
+ sbi->s_mount_opt = old_opts.s_mount_opt;
+ sbi->s_mount_opt2 = old_opts.s_mount_opt2;
+@@ -6709,6 +6721,8 @@ restore_opts:
+ sbi->s_commit_interval = old_opts.s_commit_interval;
+ sbi->s_min_batch_time = old_opts.s_min_batch_time;
+ sbi->s_max_batch_time = old_opts.s_max_batch_time;
++ ext4_writepages_up_write(sb, alloc_ctx);
++
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
+ ext4_release_system_zone(sb);
+ #ifdef CONFIG_QUOTA
--- /dev/null
+From ce56d21355cd6f6937aca32f1f44ca749d1e4808 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Mon, 2 Oct 2023 14:50:20 -0400
+Subject: ext4: fix racy may inline data check in dio write
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit ce56d21355cd6f6937aca32f1f44ca749d1e4808 upstream.
+
+syzbot reports that the following warning from ext4_iomap_begin()
+triggers as of the commit referenced below:
+
+ if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+ return -ERANGE;
+
+This occurs during a dio write, which is never expected to encounter
+an inode with inline data. To enforce this behavior,
+ext4_dio_write_iter() checks the current inline state of the inode
+and clears the MAY_INLINE_DATA state flag to either fall back to
+buffered writes, or enforce that any other writers in progress on
+the inode are not allowed to create inline data.
+
+The problem is that the check for existing inline data and the state
+flag can span a lock cycle. For example, if the ilock is originally
+locked shared and subsequently upgraded to exclusive, another writer
+may have reacquired the lock and created inline data before the dio
+write task acquires the lock and proceeds.
+
+The commit referenced below loosens the lock requirements to allow
+some forms of unaligned dio writes to occur under shared lock, but
+AFAICT the inline data check was technically already racy for any
+dio write that would have involved a lock cycle. Regardless, lift
+clearing of the state bit to the same lock critical section that
+checks for preexisting inline data on the inode to close the race.
+
+Cc: stable@kernel.org
+Reported-by: syzbot+307da6ca5cb0d01d581a@syzkaller.appspotmail.com
+Fixes: 310ee0902b8d ("ext4: allow concurrent unaligned dio overwrites")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Link: https://lore.kernel.org/r/20231002185020.531537-1-bfoster@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/file.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -537,18 +537,20 @@ static ssize_t ext4_dio_write_iter(struc
+ return ext4_buffered_write_iter(iocb, from);
+ }
+
++ /*
++ * Prevent inline data from being created since we are going to allocate
++ * blocks for DIO. We know the inode does not currently have inline data
++ * because ext4_should_use_dio() checked for it, but we have to clear
++ * the state flag before the write checks because a lock cycle could
++ * introduce races with other writers.
++ */
++ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
++
+ ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend,
+ &unwritten, &dio_flags);
+ if (ret <= 0)
+ return ret;
+
+- /*
+- * Make sure inline data cannot be created anymore since we are going
+- * to allocate blocks for DIO. We know the inode does not have any
+- * inline data now because ext4_dio_supported() checked for that.
+- */
+- ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+-
+ offset = iocb->ki_pos;
+ count = ret;
+
--- /dev/null
+From 8e387c89e96b9543a339f84043cf9df15fed2632 Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Thu, 24 Aug 2023 17:26:05 +0800
+Subject: ext4: make sure allocate pending entry not fail
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 8e387c89e96b9543a339f84043cf9df15fed2632 upstream.
+
+__insert_pending() allocate memory in atomic context, so the allocation
+could fail, but we are not handling that failure now. It could lead
+ext4_es_remove_extent() to get wrong reserved clusters, and the global
+data blocks reservation count will be incorrect. The same to
+extents_status entry preallocation, preallocate pending entry out of the
+i_es_lock with __GFP_NOFAIL, make sure __insert_pending() and
+__revise_pending() always succeeds.
+
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Cc: stable@kernel.org
+Link: https://lore.kernel.org/r/20230824092619.1327976-3-yi.zhang@huaweicloud.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/extents_status.c | 123 ++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 89 insertions(+), 34 deletions(-)
+
+--- a/fs/ext4/extents_status.c
++++ b/fs/ext4/extents_status.c
+@@ -152,8 +152,9 @@ static int __es_remove_extent(struct ino
+ static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
+ static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei);
+-static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
+- ext4_lblk_t len);
++static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
++ ext4_lblk_t len,
++ struct pending_reservation **prealloc);
+
+ int __init ext4_init_es(void)
+ {
+@@ -448,6 +449,19 @@ static void ext4_es_list_del(struct inod
+ spin_unlock(&sbi->s_es_lock);
+ }
+
++static inline struct pending_reservation *__alloc_pending(bool nofail)
++{
++ if (!nofail)
++ return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
++
++ return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
++}
++
++static inline void __free_pending(struct pending_reservation *pr)
++{
++ kmem_cache_free(ext4_pending_cachep, pr);
++}
++
+ /*
+ * Returns true if we cannot fail to allocate memory for this extent_status
+ * entry and cannot reclaim it until its status changes.
+@@ -836,11 +850,12 @@ void ext4_es_insert_extent(struct inode
+ {
+ struct extent_status newes;
+ ext4_lblk_t end = lblk + len - 1;
+- int err1 = 0;
+- int err2 = 0;
++ int err1 = 0, err2 = 0, err3 = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct extent_status *es1 = NULL;
+ struct extent_status *es2 = NULL;
++ struct pending_reservation *pr = NULL;
++ bool revise_pending = false;
+
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return;
+@@ -868,11 +883,17 @@ void ext4_es_insert_extent(struct inode
+
+ ext4_es_insert_extent_check(inode, &newes);
+
++ revise_pending = sbi->s_cluster_ratio > 1 &&
++ test_opt(inode->i_sb, DELALLOC) &&
++ (status & (EXTENT_STATUS_WRITTEN |
++ EXTENT_STATUS_UNWRITTEN));
+ retry:
+ if (err1 && !es1)
+ es1 = __es_alloc_extent(true);
+ if ((err1 || err2) && !es2)
+ es2 = __es_alloc_extent(true);
++ if ((err1 || err2 || err3) && revise_pending && !pr)
++ pr = __alloc_pending(true);
+ write_lock(&EXT4_I(inode)->i_es_lock);
+
+ err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
+@@ -897,13 +918,18 @@ retry:
+ es2 = NULL;
+ }
+
+- if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
+- (status & EXTENT_STATUS_WRITTEN ||
+- status & EXTENT_STATUS_UNWRITTEN))
+- __revise_pending(inode, lblk, len);
++ if (revise_pending) {
++ err3 = __revise_pending(inode, lblk, len, &pr);
++ if (err3 != 0)
++ goto error;
++ if (pr) {
++ __free_pending(pr);
++ pr = NULL;
++ }
++ }
+ error:
+ write_unlock(&EXT4_I(inode)->i_es_lock);
+- if (err1 || err2)
++ if (err1 || err2 || err3)
+ goto retry;
+
+ ext4_es_print_tree(inode);
+@@ -1311,7 +1337,7 @@ static unsigned int get_rsvd(struct inod
+ rc->ndelonly--;
+ node = rb_next(&pr->rb_node);
+ rb_erase(&pr->rb_node, &tree->root);
+- kmem_cache_free(ext4_pending_cachep, pr);
++ __free_pending(pr);
+ if (!node)
+ break;
+ pr = rb_entry(node, struct pending_reservation,
+@@ -1907,11 +1933,13 @@ static struct pending_reservation *__get
+ *
+ * @inode - file containing the cluster
+ * @lblk - logical block in the cluster to be added
++ * @prealloc - preallocated pending entry
+ *
+ * Returns 0 on successful insertion and -ENOMEM on failure. If the
+ * pending reservation is already in the set, returns successfully.
+ */
+-static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
++static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
++ struct pending_reservation **prealloc)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
+@@ -1937,10 +1965,15 @@ static int __insert_pending(struct inode
+ }
+ }
+
+- pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
+- if (pr == NULL) {
+- ret = -ENOMEM;
+- goto out;
++ if (likely(*prealloc == NULL)) {
++ pr = __alloc_pending(false);
++ if (!pr) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ } else {
++ pr = *prealloc;
++ *prealloc = NULL;
+ }
+ pr->lclu = lclu;
+
+@@ -1970,7 +2003,7 @@ static void __remove_pending(struct inod
+ if (pr != NULL) {
+ tree = &EXT4_I(inode)->i_pending_tree;
+ rb_erase(&pr->rb_node, &tree->root);
+- kmem_cache_free(ext4_pending_cachep, pr);
++ __free_pending(pr);
+ }
+ }
+
+@@ -2029,10 +2062,10 @@ void ext4_es_insert_delayed_block(struct
+ bool allocated)
+ {
+ struct extent_status newes;
+- int err1 = 0;
+- int err2 = 0;
++ int err1 = 0, err2 = 0, err3 = 0;
+ struct extent_status *es1 = NULL;
+ struct extent_status *es2 = NULL;
++ struct pending_reservation *pr = NULL;
+
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return;
+@@ -2052,6 +2085,8 @@ retry:
+ es1 = __es_alloc_extent(true);
+ if ((err1 || err2) && !es2)
+ es2 = __es_alloc_extent(true);
++ if ((err1 || err2 || err3) && allocated && !pr)
++ pr = __alloc_pending(true);
+ write_lock(&EXT4_I(inode)->i_es_lock);
+
+ err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
+@@ -2074,11 +2109,18 @@ retry:
+ es2 = NULL;
+ }
+
+- if (allocated)
+- __insert_pending(inode, lblk);
++ if (allocated) {
++ err3 = __insert_pending(inode, lblk, &pr);
++ if (err3 != 0)
++ goto error;
++ if (pr) {
++ __free_pending(pr);
++ pr = NULL;
++ }
++ }
+ error:
+ write_unlock(&EXT4_I(inode)->i_es_lock);
+- if (err1 || err2)
++ if (err1 || err2 || err3)
+ goto retry;
+
+ ext4_es_print_tree(inode);
+@@ -2184,21 +2226,24 @@ unsigned int ext4_es_delayed_clu(struct
+ * @inode - file containing the range
+ * @lblk - logical block defining the start of range
+ * @len - length of range in blocks
++ * @prealloc - preallocated pending entry
+ *
+ * Used after a newly allocated extent is added to the extents status tree.
+ * Requires that the extents in the range have either written or unwritten
+ * status. Must be called while holding i_es_lock.
+ */
+-static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
+- ext4_lblk_t len)
++static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
++ ext4_lblk_t len,
++ struct pending_reservation **prealloc)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ ext4_lblk_t end = lblk + len - 1;
+ ext4_lblk_t first, last;
+ bool f_del = false, l_del = false;
++ int ret = 0;
+
+ if (len == 0)
+- return;
++ return 0;
+
+ /*
+ * Two cases - block range within single cluster and block range
+@@ -2219,7 +2264,9 @@ static void __revise_pending(struct inod
+ f_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ first, lblk - 1);
+ if (f_del) {
+- __insert_pending(inode, first);
++ ret = __insert_pending(inode, first, prealloc);
++ if (ret < 0)
++ goto out;
+ } else {
+ last = EXT4_LBLK_CMASK(sbi, end) +
+ sbi->s_cluster_ratio - 1;
+@@ -2227,9 +2274,11 @@ static void __revise_pending(struct inod
+ l_del = __es_scan_range(inode,
+ &ext4_es_is_delonly,
+ end + 1, last);
+- if (l_del)
+- __insert_pending(inode, last);
+- else
++ if (l_del) {
++ ret = __insert_pending(inode, last, prealloc);
++ if (ret < 0)
++ goto out;
++ } else
+ __remove_pending(inode, last);
+ }
+ } else {
+@@ -2237,18 +2286,24 @@ static void __revise_pending(struct inod
+ if (first != lblk)
+ f_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ first, lblk - 1);
+- if (f_del)
+- __insert_pending(inode, first);
+- else
++ if (f_del) {
++ ret = __insert_pending(inode, first, prealloc);
++ if (ret < 0)
++ goto out;
++ } else
+ __remove_pending(inode, first);
+
+ last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
+ if (last != end)
+ l_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ end + 1, last);
+- if (l_del)
+- __insert_pending(inode, last);
+- else
++ if (l_del) {
++ ret = __insert_pending(inode, last, prealloc);
++ if (ret < 0)
++ goto out;
++ } else
+ __remove_pending(inode, last);
+ }
++out:
++ return ret;
+ }
--- /dev/null
+From 2cd8bdb5efc1e0d5b11a4b7ba6b922fd2736a87f Mon Sep 17 00:00:00 2001
+From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Date: Mon, 18 Sep 2023 16:15:50 +0530
+Subject: ext4: mark buffer new if it is unwritten to avoid stale data exposure
+
+From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+
+commit 2cd8bdb5efc1e0d5b11a4b7ba6b922fd2736a87f upstream.
+
+** Short Version **
+
+In ext4 with dioread_nolock, we could have a scenario where the bh returned by
+get_blocks (ext4_get_block_unwritten()) in __block_write_begin_int() has
+UNWRITTEN and MAPPED flag set. Since such a bh does not have NEW flag set we
+never zero out the range of bh that is not under write, causing whatever stale
+data is present in the folio at that time to be written out to disk. To fix this
+mark the buffer as new, in case it is unwritten, in ext4_get_block_unwritten().
+
+** Long Version **
+
+The issue mentioned above was resulting in two different bugs:
+
+1. On block size < page size case in ext4, generic/269 was reliably
+failing with dioread_nolock. The state of the write was as follows:
+
+ * The write was extending i_size.
+ * The last block of the file was fallocated and had an unwritten extent
+ * We were near ENOSPC and hence we were switching to non-delayed alloc
+ allocation.
+
+In this case, the back trace that triggers the bug is as follows:
+
+ ext4_da_write_begin()
+ /* switch to nodelalloc due to low space */
+ ext4_write_begin()
+ ext4_should_dioread_nolock() // true since mount flags still have delalloc
+ __block_write_begin(..., ext4_get_block_unwritten)
+ __block_write_begin_int()
+ for(each buffer head in page) {
+ /* first iteration, this is bh1 which contains i_size */
+ if (!buffer_mapped)
+ get_block() /* returns bh with only UNWRITTEN and MAPPED */
+ /* second iteration, bh2 */
+ if (!buffer_mapped)
+ get_block() /* we fail here, could be ENOSPC */
+ }
+ if (err)
+ /*
+ * this would zero out all new buffers and mark them uptodate.
+ * Since bh1 was never marked new, we skip it here which causes
+ * the bug later.
+ */
+ folio_zero_new_buffers();
+ /* ext4_wrte_begin() error handling */
+ ext4_truncate_failed_write()
+ ext4_truncate()
+ ext4_block_truncate_page()
+ __ext4_block_zero_page_range()
+ if(!buffer_uptodate())
+ ext4_read_bh_lock()
+ ext4_read_bh() -> ... ext4_submit_bh_wbc()
+ BUG_ON(buffer_unwritten(bh)); /* !!! */
+
+2. The second issue is stale data exposure with page size >= blocksize
+with dioread_nolock. The conditions needed for it to happen are same as
+the previous issue ie dioread_nolock around ENOSPC condition. The issue
+is also similar where in __block_write_begin_int() when we call
+ext4_get_block_unwritten() on the buffer_head and the underlying extent
+is unwritten, we get an unwritten and mapped buffer head. Since it is
+not new, we never zero out the partial range which is not under write,
+thus writing stale data to disk. This can be easily observed with the
+following reproducer:
+
+ fallocate -l 4k testfile
+ xfs_io -c "pwrite 2k 2k" testfile
+ # hexdump output will have stale data in from byte 0 to 2k in testfile
+ hexdump -C testfile
+
+NOTE: To trigger this, we need dioread_nolock enabled and write happening via
+ext4_write_begin(), which is usually used when we have -o nodealloc. Since
+dioread_nolock is disabled with nodelalloc, the only alternate way to call
+ext4_write_begin() is to ensure that delayed alloc switches to nodelalloc ie
+ext4_da_write_begin() calls ext4_write_begin(). This will usually happen when
+ext4 is almost full like the way generic/269 was triggering it in Issue 1 above.
+This might make the issue harder to hit. Hence, for reliable replication, I used
+the below patch to temporarily allow dioread_nolock with nodelalloc and then
+mount the disk with -o nodealloc,dioread_nolock. With this you can hit the stale
+data issue 100% of times:
+
+@@ -508,8 +508,8 @@ static inline int ext4_should_dioread_nolock(struct inode *inode)
+ if (ext4_should_journal_data(inode))
+ return 0;
+ /* temporary fix to prevent generic/422 test failures */
+- if (!test_opt(inode->i_sb, DELALLOC))
+- return 0;
++ // if (!test_opt(inode->i_sb, DELALLOC))
++ // return 0;
+ return 1;
+ }
+
+After applying this patch to mark buffer as NEW, both the above issues are
+fixed.
+
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Cc: stable@kernel.org
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/d0ed09d70a9733fbb5349c5c7b125caac186ecdf.1695033645.git.ojaswin@linux.ibm.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c | 14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -789,10 +789,22 @@ int ext4_get_block(struct inode *inode,
+ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+ {
++ int ret = 0;
++
+ ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
+ inode->i_ino, create);
+- return _ext4_get_block(inode, iblock, bh_result,
++ ret = _ext4_get_block(inode, iblock, bh_result,
+ EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
++
++ /*
++ * If the buffer is marked unwritten, mark it as new to make sure it is
++ * zeroed out correctly in case of partial writes. Otherwise, there is
++ * a chance of stale data getting exposed.
++ */
++ if (ret == 0 && buffer_unwritten(bh_result))
++ set_buffer_new(bh_result);
++
++ return ret;
+ }
+
+ /* Maximum number of blocks we map for direct IO at once. */
--- /dev/null
+From ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 Mon Sep 17 00:00:00 2001
+From: Wang Jianjian <wangjianjian0@foxmail.com>
+Date: Thu, 24 Aug 2023 23:56:31 +0800
+Subject: ext4: no need to generate from free list in mballoc
+
+From: Wang Jianjian <wangjianjian0@foxmail.com>
+
+commit ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 upstream.
+
+Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in
+buddy cache init") added a code to mark as used blocks in the list of not yet
+committed freed blocks during initialization of a buddy page. However
+ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a
+reference to it so it cannot happen that ext4_mb_init_cache() is called
+when efd list is non-empty. Just remove the
+ext4_mb_generate_from_freelist() call.
+
+Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init')
+Signed-off-by: Wang Jianjian <wangjianjian0@foxmail.com>
+Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c | 39 ++++++---------------------------------
+ 1 file changed, 6 insertions(+), 33 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -417,8 +417,6 @@ static const char * const ext4_groupinfo
+
+ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ ext4_group_t group);
+-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+- ext4_group_t group);
+ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
+
+ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
+@@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct pag
+ * We place the buddy block and bitmap block
+ * close together
+ */
++ grinfo = ext4_get_group_info(sb, group);
++ if (!grinfo) {
++ err = -EFSCORRUPTED;
++ goto out;
++ }
+ if ((first_block + i) & 1) {
+ /* this is block of buddy */
+ BUG_ON(incore == NULL);
+ mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
+ group, page->index, i * blocksize);
+ trace_ext4_mb_buddy_bitmap_load(sb, group);
+- grinfo = ext4_get_group_info(sb, group);
+- if (!grinfo) {
+- err = -EFSCORRUPTED;
+- goto out;
+- }
+ grinfo->bb_fragments = 0;
+ memset(grinfo->bb_counters, 0,
+ sizeof(*grinfo->bb_counters) *
+@@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct pag
+
+ /* mark all preallocated blks used in in-core bitmap */
+ ext4_mb_generate_from_pa(sb, data, group);
+- ext4_mb_generate_from_freelist(sb, data, group);
++ WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
+ ext4_unlock_group(sb, group);
+
+ /* set incore so that the buddy information can be
+@@ -4959,31 +4957,6 @@ try_group_pa:
+ }
+
+ /*
+- * the function goes through all block freed in the group
+- * but not yet committed and marks them used in in-core bitmap.
+- * buddy must be generated from this bitmap
+- * Need to be called with the ext4 group lock held
+- */
+-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+- ext4_group_t group)
+-{
+- struct rb_node *n;
+- struct ext4_group_info *grp;
+- struct ext4_free_data *entry;
+-
+- grp = ext4_get_group_info(sb, group);
+- if (!grp)
+- return;
+- n = rb_first(&(grp->bb_free_root));
+-
+- while (n) {
+- entry = rb_entry(n, struct ext4_free_data, efd_node);
+- mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
+- n = rb_next(n);
+- }
+-}
+-
+-/*
+ * the function goes through all preallocation in this group and marks them
+ * used in in-core bitmap. buddy must be generated from this bitmap
+ * Need to be called with ext4 group lock held
--- /dev/null
+From 91562895f8030cb9a0470b1db49de79346a69f91 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 13 Oct 2023 14:13:50 +0200
+Subject: ext4: properly sync file size update after O_SYNC direct IO
+
+From: Jan Kara <jack@suse.cz>
+
+commit 91562895f8030cb9a0470b1db49de79346a69f91 upstream.
+
+Gao Xiang has reported that on ext4 O_SYNC direct IO does not properly
+sync file size update and thus if we crash at unfortunate moment, the
+file can have smaller size although O_SYNC IO has reported successful
+completion. The problem happens because update of on-disk inode size is
+handled in ext4_dio_write_iter() *after* iomap_dio_rw() (and thus
+dio_complete() in particular) has returned and generic_file_sync() gets
+called by dio_complete(). Fix the problem by handling on-disk inode size
+update directly in our ->end_io completion handler.
+
+References: https://lore.kernel.org/all/02d18236-26ef-09b0-90ad-030c4fe3ee20@linux.alibaba.com
+Reported-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+CC: stable@vger.kernel.org
+Fixes: 378f32bab371 ("ext4: introduce direct I/O write using iomap infrastructure")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Tested-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Reviewed-by: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20231013121350.26872-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/file.c | 153 ++++++++++++++++++++++++---------------------------------
+ 1 file changed, 65 insertions(+), 88 deletions(-)
+
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -306,80 +306,38 @@ out:
+ }
+
+ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
+- ssize_t written, size_t count)
++ ssize_t count)
+ {
+ handle_t *handle;
+- bool truncate = false;
+- u8 blkbits = inode->i_blkbits;
+- ext4_lblk_t written_blk, end_blk;
+- int ret;
+-
+- /*
+- * Note that EXT4_I(inode)->i_disksize can get extended up to
+- * inode->i_size while the I/O was running due to writeback of delalloc
+- * blocks. But, the code in ext4_iomap_alloc() is careful to use
+- * zeroed/unwritten extents if this is possible; thus we won't leave
+- * uninitialized blocks in a file even if we didn't succeed in writing
+- * as much as we intended.
+- */
+- WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
+- if (offset + count <= EXT4_I(inode)->i_disksize) {
+- /*
+- * We need to ensure that the inode is removed from the orphan
+- * list if it has been added prematurely, due to writeback of
+- * delalloc blocks.
+- */
+- if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
+- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+-
+- if (IS_ERR(handle)) {
+- ext4_orphan_del(NULL, inode);
+- return PTR_ERR(handle);
+- }
+-
+- ext4_orphan_del(handle, inode);
+- ext4_journal_stop(handle);
+- }
+-
+- return written;
+- }
+-
+- if (written < 0)
+- goto truncate;
+
++ lockdep_assert_held_write(&inode->i_rwsem);
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+- if (IS_ERR(handle)) {
+- written = PTR_ERR(handle);
+- goto truncate;
+- }
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
+
+- if (ext4_update_inode_size(inode, offset + written)) {
+- ret = ext4_mark_inode_dirty(handle, inode);
++ if (ext4_update_inode_size(inode, offset + count)) {
++ int ret = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(ret)) {
+- written = ret;
+ ext4_journal_stop(handle);
+- goto truncate;
++ return ret;
+ }
+ }
+
+- /*
+- * We may need to truncate allocated but not written blocks beyond EOF.
+- */
+- written_blk = ALIGN(offset + written, 1 << blkbits);
+- end_blk = ALIGN(offset + count, 1 << blkbits);
+- if (written_blk < end_blk && ext4_can_truncate(inode))
+- truncate = true;
+-
+- /*
+- * Remove the inode from the orphan list if it has been extended and
+- * everything went OK.
+- */
+- if (!truncate && inode->i_nlink)
++ if (inode->i_nlink)
+ ext4_orphan_del(handle, inode);
+ ext4_journal_stop(handle);
+
+- if (truncate) {
+-truncate:
++ return count;
++}
++
++/*
++ * Clean up the inode after DIO or DAX extending write has completed and the
++ * inode size has been updated using ext4_handle_inode_extension().
++ */
++static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count)
++{
++ lockdep_assert_held_write(&inode->i_rwsem);
++ if (count < 0) {
+ ext4_truncate_failed_write(inode);
+ /*
+ * If the truncate operation failed early, then the inode may
+@@ -388,9 +346,28 @@ truncate:
+ */
+ if (inode->i_nlink)
+ ext4_orphan_del(NULL, inode);
++ return;
+ }
++ /*
++ * If i_disksize got extended due to writeback of delalloc blocks while
++ * the DIO was running we could fail to cleanup the orphan list in
++ * ext4_handle_inode_extension(). Do it now.
++ */
++ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
++ handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+
+- return written;
++ if (IS_ERR(handle)) {
++ /*
++ * The write has successfully completed. Not much to
++ * do with the error here so just cleanup the orphan
++ * list and hope for the best.
++ */
++ ext4_orphan_del(NULL, inode);
++ return;
++ }
++ ext4_orphan_del(handle, inode);
++ ext4_journal_stop(handle);
++ }
+ }
+
+ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
+@@ -399,31 +376,22 @@ static int ext4_dio_write_end_io(struct
+ loff_t pos = iocb->ki_pos;
+ struct inode *inode = file_inode(iocb->ki_filp);
+
++ if (!error && size && flags & IOMAP_DIO_UNWRITTEN)
++ error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
+ if (error)
+ return error;
+-
+- if (size && flags & IOMAP_DIO_UNWRITTEN) {
+- error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
+- if (error < 0)
+- return error;
+- }
+ /*
+- * If we are extending the file, we have to update i_size here before
+- * page cache gets invalidated in iomap_dio_rw(). Otherwise racing
+- * buffered reads could zero out too much from page cache pages. Update
+- * of on-disk size will happen later in ext4_dio_write_iter() where
+- * we have enough information to also perform orphan list handling etc.
+- * Note that we perform all extending writes synchronously under
+- * i_rwsem held exclusively so i_size update is safe here in that case.
+- * If the write was not extending, we cannot see pos > i_size here
+- * because operations reducing i_size like truncate wait for all
+- * outstanding DIO before updating i_size.
++ * Note that EXT4_I(inode)->i_disksize can get extended up to
++ * inode->i_size while the I/O was running due to writeback of delalloc
++ * blocks. But the code in ext4_iomap_alloc() is careful to use
++ * zeroed/unwritten extents if this is possible; thus we won't leave
++ * uninitialized blocks in a file even if we didn't succeed in writing
++ * as much as we intended.
+ */
+- pos += size;
+- if (pos > i_size_read(inode))
+- i_size_write(inode, pos);
+-
+- return 0;
++ WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize));
++ if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize))
++ return size;
++ return ext4_handle_inode_extension(inode, pos, size);
+ }
+
+ static const struct iomap_dio_ops ext4_dio_write_ops = {
+@@ -606,9 +574,16 @@ static ssize_t ext4_dio_write_iter(struc
+ dio_flags, NULL, 0);
+ if (ret == -ENOTBLK)
+ ret = 0;
+-
+- if (extend)
+- ret = ext4_handle_inode_extension(inode, offset, ret, count);
++ if (extend) {
++ /*
++ * We always perform extending DIO write synchronously so by
++ * now the IO is completed and ext4_handle_inode_extension()
++ * was called. Cleanup the inode in case of error or race with
++ * writeback of delalloc blocks.
++ */
++ WARN_ON_ONCE(ret == -EIOCBQUEUED);
++ ext4_inode_extension_cleanup(inode, ret);
++ }
+
+ out:
+ if (ilock_shared)
+@@ -689,8 +664,10 @@ ext4_dax_write_iter(struct kiocb *iocb,
+
+ ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
+
+- if (extend)
+- ret = ext4_handle_inode_extension(inode, offset, ret, count);
++ if (extend) {
++ ret = ext4_handle_inode_extension(inode, offset, ret);
++ ext4_inode_extension_cleanup(inode, ret);
++ }
+ out:
+ inode_unlock(inode);
+ if (ret > 0)
--- /dev/null
+From 40dd7953f4d606c280074f10d23046b6812708ce Mon Sep 17 00:00:00 2001
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+Date: Sun, 27 Aug 2023 01:47:03 +0800
+Subject: ext4: remove gdb backup copy for meta bg in setup_new_flex_group_blocks
+
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+
+commit 40dd7953f4d606c280074f10d23046b6812708ce upstream.
+
+Wrong check of gdb backup in meta bg as following:
+first_group is the first group of meta_bg which contains target group, so
+target group is always >= first_group. We check if target group has gdb
+backup by comparing first_group with [group + 1] and [group +
+EXT4_DESC_PER_BLOCK(sb) - 1]. As group >= first_group, then [group + N] is
+> first_group. So no copy of gdb backup in meta bg is done in
+setup_new_flex_group_blocks.
+
+No need to do gdb backup copy in meta bg from setup_new_flex_group_blocks
+as we always copy updated gdb block to backups at end of
+ext4_flex_group_add as following:
+
+ext4_flex_group_add
+ /* no gdb backup copy for meta bg any more */
+ setup_new_flex_group_blocks
+
+ /* update current group number */
+ ext4_update_super
+ sbi->s_groups_count += flex_gd->count;
+
+ /*
+ * if group in meta bg contains backup is added, the primary gdb block
+ * of the meta bg will be copy to backup in new added group here.
+ */
+ for (; gdb_num <= gdb_num_end; gdb_num++)
+ update_backups(...)
+
+In summary, we can remove wrong gdb backup copy code in
+setup_new_flex_group_blocks.
+
+Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
+Reviewed-by: Theodore Ts'o <tytso@mit.edu>
+Link: https://lore.kernel.org/r/20230826174712.4059355-5-shikemeng@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/resize.c | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -560,13 +560,8 @@ static int setup_new_flex_group_blocks(s
+ if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
+ goto handle_itb;
+
+- if (meta_bg == 1) {
+- ext4_group_t first_group;
+- first_group = ext4_meta_bg_first_group(sb, group);
+- if (first_group != group + 1 &&
+- first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
+- goto handle_itb;
+- }
++ if (meta_bg == 1)
++ goto handle_itb;
+
+ block = start + ext4_bg_has_super(sb, group);
+ /* Copy all of the GDT blocks into the backup in this group */
--- /dev/null
+From 5b4ffb176d7979ac66b349addf3f7de433335e00 Mon Sep 17 00:00:00 2001
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Tue, 21 Nov 2023 14:38:11 +0100
+Subject: Revert "HID: logitech-dj: Add support for a new lightspeed receiver iteration"
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+commit 5b4ffb176d7979ac66b349addf3f7de433335e00 upstream.
+
+This reverts commit 9d1bd9346241cd6963b58da7ffb7ed303285f684.
+
+Multiple people reported misbehaving devices and reverting this commit fixes
+the problem for them. As soon as the original commit author starts reacting
+again, we can try to figure out why he hasn't seen the issues (mismatching
+report descriptors?), but for the time being, fix for 6.7 by reverting.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=218172
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=218094
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hid/hid-ids.h | 1 -
+ drivers/hid/hid-logitech-dj.c | 11 +++--------
+ 2 files changed, 3 insertions(+), 9 deletions(-)
+
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -869,7 +869,6 @@
+ #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2 0xc534
+ #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1 0xc539
+ #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1 0xc53f
+-#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2 0xc547
+ #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY 0xc53a
+ #define USB_DEVICE_ID_SPACETRAVELLER 0xc623
+ #define USB_DEVICE_ID_SPACENAVIGATOR 0xc626
+--- a/drivers/hid/hid-logitech-dj.c
++++ b/drivers/hid/hid-logitech-dj.c
+@@ -1695,12 +1695,11 @@ static int logi_dj_raw_event(struct hid_
+ }
+ /*
+ * Mouse-only receivers send unnumbered mouse data. The 27 MHz
+- * receiver uses 6 byte packets, the nano receiver 8 bytes,
+- * the lightspeed receiver (Pro X Superlight) 13 bytes.
++ * receiver uses 6 byte packets, the nano receiver 8 bytes.
+ */
+ if (djrcv_dev->unnumbered_application == HID_GD_MOUSE &&
+- size <= 13){
+- u8 mouse_report[14];
++ size <= 8) {
++ u8 mouse_report[9];
+
+ /* Prepend report id */
+ mouse_report[0] = REPORT_TYPE_MOUSE;
+@@ -1984,10 +1983,6 @@ static const struct hid_device_id logi_d
+ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
+ USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1),
+ .driver_data = recvr_type_gaming_hidpp},
+- { /* Logitech lightspeed receiver (0xc547) */
+- HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
+- USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2),
+- .driver_data = recvr_type_gaming_hidpp},
+
+ { /* Logitech 27 MHz HID++ 1.0 receiver (0xc513) */
+ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER),
--- /dev/null
+From 6a26310273c323380da21eb23fcfd50e31140913 Mon Sep 17 00:00:00 2001
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Tue, 21 Nov 2023 09:09:33 +0100
+Subject: Revert "net: r8169: Disable multicast filter for RTL8168H and RTL8107E"
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+commit 6a26310273c323380da21eb23fcfd50e31140913 upstream.
+
+This reverts commit efa5f1311c4998e9e6317c52bc5ee93b3a0f36df.
+
+I couldn't reproduce the reported issue. What I did, based on a pcap
+packet log provided by the reporter:
+- Used same chip version (RTL8168h)
+- Set MAC address to the one used on the reporters system
+- Replayed the EAPOL unicast packet that, according to the reporter,
+ was filtered out by the mc filter.
+The packet was properly received.
+
+Therefore the root cause of the reported issue seems to be somewhere
+else. Disabling mc filtering completely for the most common chip
+version is a quite big hammer. Therefore revert the change and wait
+for further analysis results from the reporter.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169_main.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -2599,9 +2599,7 @@ static void rtl_set_rx_mode(struct net_d
+ rx_mode &= ~AcceptMulticast;
+ } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT ||
+ dev->flags & IFF_ALLMULTI ||
+- tp->mac_version == RTL_GIGA_MAC_VER_35 ||
+- tp->mac_version == RTL_GIGA_MAC_VER_46 ||
+- tp->mac_version == RTL_GIGA_MAC_VER_48) {
++ tp->mac_version == RTL_GIGA_MAC_VER_35) {
+ /* accept all multicasts */
+ } else if (netdev_mc_empty(dev)) {
+ rx_mode &= ~AcceptMulticast;
media-qcom-camss-fix-set-csi2_rx_cfg1_vc_mode-when-vc-is-greater-than-3.patch
media-qcom-camss-fix-invalid-clock-enable-bit-disjunction.patch
media-qcom-camss-fix-csid-gen2-for-test-pattern-generator.patch
+revert-hid-logitech-dj-add-support-for-a-new-lightspeed-receiver-iteration.patch
+revert-net-r8169-disable-multicast-filter-for-rtl8168h-and-rtl8107e.patch
+ext4-fix-race-between-writepages-and-remount.patch
+ext4-no-need-to-generate-from-free-list-in-mballoc.patch
+ext4-make-sure-allocate-pending-entry-not-fail.patch
+ext4-apply-umask-if-acl-support-is-disabled.patch
+ext4-correct-offset-of-gdb-backup-in-non-meta_bg-group-to-update_backups.patch
+ext4-mark-buffer-new-if-it-is-unwritten-to-avoid-stale-data-exposure.patch
+ext4-correct-return-value-of-ext4_convert_meta_bg.patch
+ext4-correct-the-start-block-of-counting-reserved-clusters.patch
+ext4-remove-gdb-backup-copy-for-meta-bg-in-setup_new_flex_group_blocks.patch
+ext4-add-missed-brelse-in-update_backups.patch
+ext4-properly-sync-file-size-update-after-o_sync-direct-io.patch
+ext4-fix-racy-may-inline-data-check-in-dio-write.patch
+drm-amd-pm-handle-non-terminated-overdrive-commands.patch
+drm-bridge-it66121-get_edid-callback-must-not-return-err-pointers.patch
+x86-srso-move-retbleed-ibpb-check-into-existing-has_microcode-code-block.patch
--- /dev/null
+From 351236947a45a512c517153bbe109fe868d05e6d Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+Date: Mon, 4 Sep 2023 22:04:59 -0700
+Subject: x86/srso: Move retbleed IBPB check into existing 'has_microcode' code block
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+commit 351236947a45a512c517153bbe109fe868d05e6d upstream.
+
+Simplify the code flow a bit by moving the retbleed IBPB check into the
+existing 'has_microcode' block.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/0a22b86b1f6b07f9046a9ab763fc0e0d1b7a91d4.1693889988.git.jpoimboe@kernel.org
+Cc: Caleb Jorden <cjorden@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -2422,10 +2422,8 @@ static void __init srso_select_mitigatio
+ setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
+ return;
+ }
+- }
+
+- if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
+- if (has_microcode) {
++ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
+ srso_mitigation = SRSO_MITIGATION_IBPB;
+ goto out;
+ }