--- /dev/null
+From c164c7bc9775be7bcc68754bb3431fce5823822e Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@redhat.com>
+Date: Thu, 17 Aug 2023 22:17:51 +0800
+Subject: blk-cgroup: hold queue_lock when removing blkg->q_node
+
+From: Ming Lei <ming.lei@redhat.com>
+
+commit c164c7bc9775be7bcc68754bb3431fce5823822e upstream.
+
+When blkg is removed from q->blkg_list from blkg_free_workfn(), queue_lock
+has to be held, otherwise, all kinds of bugs(list corruption, hard lockup,
+..) can be triggered from blkg_destroy_all().
+
+Fixes: f1c006f1c685 ("blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy()")
+Cc: Yu Kuai <yukuai3@huawei.com>
+Cc: xiaoli feng <xifeng@redhat.com>
+Cc: Chunyu Hu <chuhu@redhat.com>
+Cc: Mike Snitzer <snitzer@kernel.org>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230817141751.1128970-1-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-cgroup.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/block/blk-cgroup.c
++++ b/block/blk-cgroup.c
+@@ -136,7 +136,9 @@ static void blkg_free_workfn(struct work
+ blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
+ if (blkg->parent)
+ blkg_put(blkg->parent);
++ spin_lock_irq(&q->queue_lock);
+ list_del_init(&blkg->q_node);
++ spin_unlock_irq(&q->queue_lock);
+ mutex_unlock(&q->blkcg_mutex);
+
+ blk_put_queue(q);
--- /dev/null
+From 29eefa6d0d07e185f7bfe9576f91e6dba98189c2 Mon Sep 17 00:00:00 2001
+From: xiaoshoukui <xiaoshoukui@gmail.com>
+Date: Tue, 15 Aug 2023 02:55:59 -0400
+Subject: btrfs: fix BUG_ON condition in btrfs_cancel_balance
+
+From: xiaoshoukui <xiaoshoukui@gmail.com>
+
+commit 29eefa6d0d07e185f7bfe9576f91e6dba98189c2 upstream.
+
+Pausing and canceling balance can race to interrupt balance lead to BUG_ON
+panic in btrfs_cancel_balance. The BUG_ON condition in btrfs_cancel_balance
+does not take this race scenario into account.
+
+However, the race condition has no other side effects. We can fix that.
+
+Reproducing it with panic trace like this:
+
+ kernel BUG at fs/btrfs/volumes.c:4618!
+ RIP: 0010:btrfs_cancel_balance+0x5cf/0x6a0
+ Call Trace:
+ <TASK>
+ ? do_nanosleep+0x60/0x120
+ ? hrtimer_nanosleep+0xb7/0x1a0
+ ? sched_core_clone_cookie+0x70/0x70
+ btrfs_ioctl_balance_ctl+0x55/0x70
+ btrfs_ioctl+0xa46/0xd20
+ __x64_sys_ioctl+0x7d/0xa0
+ do_syscall_64+0x38/0x80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+ Race scenario as follows:
+ > mutex_unlock(&fs_info->balance_mutex);
+ > --------------------
+ > .......issue pause and cancel req in another thread
+ > --------------------
+ > ret = __btrfs_balance(fs_info);
+ >
+ > mutex_lock(&fs_info->balance_mutex);
+ > if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
+ > btrfs_info(fs_info, "balance: paused");
+ > btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED);
+ > }
+
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: xiaoshoukui <xiaoshoukui@ruijie.com.cn>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/volumes.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4631,8 +4631,7 @@ int btrfs_cancel_balance(struct btrfs_fs
+ }
+ }
+
+- BUG_ON(fs_info->balance_ctl ||
+- test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
++ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ atomic_dec(&fs_info->balance_cancel_req);
+ mutex_unlock(&fs_info->balance_mutex);
+ return 0;
--- /dev/null
+From c962098ca4af146f2625ed64399926a098752c9c Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Thu, 17 Aug 2023 16:57:30 -0400
+Subject: btrfs: fix incorrect splitting in btrfs_drop_extent_map_range
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit c962098ca4af146f2625ed64399926a098752c9c upstream.
+
+In production we were seeing a variety of WARN_ON()'s in the extent_map
+code, specifically in btrfs_drop_extent_map_range() when we have to call
+add_extent_mapping() for our second split.
+
+Consider the following extent map layout
+
+ PINNED
+ [0 16K) [32K, 48K)
+
+and then we call btrfs_drop_extent_map_range for [0, 36K), with
+skip_pinned == true. The initial loop will have
+
+ start = 0
+ end = 36K
+ len = 36K
+
+we will find the [0, 16k) extent, but since we are pinned we will skip
+it, which has this code
+
+ start = em_end;
+ if (end != (u64)-1)
+ len = start + len - em_end;
+
+em_end here is 16K, so now the values are
+
+ start = 16K
+ len = 16K + 36K - 16K = 36K
+
+len should instead be 20K. This is a problem when we find the next
+extent at [32K, 48K), we need to split this extent to leave [36K, 48k),
+however the code for the split looks like this
+
+ split->start = start + len;
+ split->len = em_end - (start + len);
+
+In this case we have
+
+ em_end = 48K
+ split->start = 16K + 36K // this should be 16K + 20K
+ split->len = 48K - (16K + 36K) // this overflows as 16K + 36K is 52K
+
+and now we have an invalid extent_map in the tree that potentially
+overlaps other entries in the extent map. Even in the non-overlapping
+case we will have split->start set improperly, which will cause problems
+with any block related calculations.
+
+We don't actually need len in this loop, we can simply use end as our
+end point, and only adjust start up when we find a pinned extent we need
+to skip.
+
+Adjust the logic to do this, which keeps us from inserting an invalid
+extent map.
+
+We only skip_pinned in the relocation case, so this is relatively rare,
+except in the case where you are running relocation a lot, which can
+happen with auto relocation on.
+
+Fixes: 55ef68990029 ("Btrfs: Fix btrfs_drop_extent_cache for skip pinned case")
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_map.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/extent_map.c
++++ b/fs/btrfs/extent_map.c
+@@ -758,8 +758,6 @@ void btrfs_drop_extent_map_range(struct
+
+ if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
+ start = em_end;
+- if (end != (u64)-1)
+- len = start + len - em_end;
+ goto next;
+ }
+
+@@ -827,8 +825,8 @@ void btrfs_drop_extent_map_range(struct
+ if (!split)
+ goto remove_em;
+ }
+- split->start = start + len;
+- split->len = em_end - (start + len);
++ split->start = end;
++ split->len = em_end - end;
+ split->block_start = em->block_start;
+ split->flags = flags;
+ split->compress_type = em->compress_type;
--- /dev/null
+From 9b378f6ad48cfa195ed868db9123c09ee7ec5ea2 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sun, 13 Aug 2023 12:34:08 +0100
+Subject: btrfs: fix infinite directory reads
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 9b378f6ad48cfa195ed868db9123c09ee7ec5ea2 upstream.
+
+The readdir implementation currently processes always up to the last index
+it finds. This however can result in an infinite loop if the directory has
+a large number of entries such that they won't all fit in the given buffer
+passed to the readdir callback, that is, dir_emit() returns a non-zero
+value. Because in that case readdir() will be called again and if in the
+meanwhile new directory entries were added and we still can't put all the
+remaining entries in the buffer, we keep repeating this over and over.
+
+The following C program and test script reproduce the problem:
+
+ $ cat /mnt/readdir_prog.c
+ #include <sys/types.h>
+ #include <dirent.h>
+ #include <stdio.h>
+
+ int main(int argc, char *argv[])
+ {
+ DIR *dir = opendir(".");
+ struct dirent *dd;
+
+ while ((dd = readdir(dir))) {
+ printf("%s\n", dd->d_name);
+ rename(dd->d_name, "TEMPFILE");
+ rename("TEMPFILE", dd->d_name);
+ }
+ closedir(dir);
+ }
+
+ $ gcc -o /mnt/readdir_prog /mnt/readdir_prog.c
+
+ $ cat test.sh
+ #!/bin/bash
+
+ DEV=/dev/sdi
+ MNT=/mnt/sdi
+
+ mkfs.btrfs -f $DEV &> /dev/null
+ #mkfs.xfs -f $DEV &> /dev/null
+ #mkfs.ext4 -F $DEV &> /dev/null
+
+ mount $DEV $MNT
+
+ mkdir $MNT/testdir
+ for ((i = 1; i <= 2000; i++)); do
+ echo -n > $MNT/testdir/file_$i
+ done
+
+ cd $MNT/testdir
+ /mnt/readdir_prog
+
+ cd /mnt
+
+ umount $MNT
+
+This behaviour is surprising to applications and it's unlike ext4, xfs,
+tmpfs, vfat and other filesystems, which always finish. In this case where
+new entries were added due to renames, some file names may be reported
+more than once, but this varies according to each filesystem - for example
+ext4 never reported the same file more than once while xfs reports the
+first 13 file names twice.
+
+So change our readdir implementation to track the last index number when
+opendir() is called and then make readdir() never process beyond that
+index number. This gives the same behaviour as ext4.
+
+Reported-by: Rob Landley <rob@landley.net>
+Link: https://lore.kernel.org/linux-btrfs/2c8c55ec-04c6-e0dc-9c5c-8c7924778c35@landley.net/
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217681
+CC: stable@vger.kernel.org # 6.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h | 1
+ fs/btrfs/delayed-inode.c | 5 +
+ fs/btrfs/delayed-inode.h | 1
+ fs/btrfs/inode.c | 131 ++++++++++++++++++++++++++++-------------------
+ 4 files changed, 84 insertions(+), 54 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -443,6 +443,7 @@ struct btrfs_drop_extents_args {
+
+ struct btrfs_file_private {
+ void *filldir_buf;
++ u64 last_index;
+ struct extent_state *llseek_cached_state;
+ };
+
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1632,6 +1632,7 @@ int btrfs_inode_delayed_dir_index_count(
+ }
+
+ bool btrfs_readdir_get_delayed_items(struct inode *inode,
++ u64 last_index,
+ struct list_head *ins_list,
+ struct list_head *del_list)
+ {
+@@ -1651,14 +1652,14 @@ bool btrfs_readdir_get_delayed_items(str
+
+ mutex_lock(&delayed_node->mutex);
+ item = __btrfs_first_delayed_insertion_item(delayed_node);
+- while (item) {
++ while (item && item->index <= last_index) {
+ refcount_inc(&item->refs);
+ list_add_tail(&item->readdir_list, ins_list);
+ item = __btrfs_next_delayed_item(item);
+ }
+
+ item = __btrfs_first_delayed_deletion_item(delayed_node);
+- while (item) {
++ while (item && item->index <= last_index) {
+ refcount_inc(&item->refs);
+ list_add_tail(&item->readdir_list, del_list);
+ item = __btrfs_next_delayed_item(item);
+--- a/fs/btrfs/delayed-inode.h
++++ b/fs/btrfs/delayed-inode.h
+@@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct
+
+ /* Used for readdir() */
+ bool btrfs_readdir_get_delayed_items(struct inode *inode,
++ u64 last_index,
+ struct list_head *ins_list,
+ struct list_head *del_list);
+ void btrfs_readdir_put_delayed_items(struct inode *inode,
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -5745,6 +5745,74 @@ static struct dentry *btrfs_lookup(struc
+ }
+
+ /*
++ * Find the highest existing sequence number in a directory and then set the
++ * in-memory index_cnt variable to the first free sequence number.
++ */
++static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
++{
++ struct btrfs_root *root = inode->root;
++ struct btrfs_key key, found_key;
++ struct btrfs_path *path;
++ struct extent_buffer *leaf;
++ int ret;
++
++ key.objectid = btrfs_ino(inode);
++ key.type = BTRFS_DIR_INDEX_KEY;
++ key.offset = (u64)-1;
++
++ path = btrfs_alloc_path();
++ if (!path)
++ return -ENOMEM;
++
++ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
++ if (ret < 0)
++ goto out;
++ /* FIXME: we should be able to handle this */
++ if (ret == 0)
++ goto out;
++ ret = 0;
++
++ if (path->slots[0] == 0) {
++ inode->index_cnt = BTRFS_DIR_START_INDEX;
++ goto out;
++ }
++
++ path->slots[0]--;
++
++ leaf = path->nodes[0];
++ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
++
++ if (found_key.objectid != btrfs_ino(inode) ||
++ found_key.type != BTRFS_DIR_INDEX_KEY) {
++ inode->index_cnt = BTRFS_DIR_START_INDEX;
++ goto out;
++ }
++
++ inode->index_cnt = found_key.offset + 1;
++out:
++ btrfs_free_path(path);
++ return ret;
++}
++
++static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
++{
++ if (dir->index_cnt == (u64)-1) {
++ int ret;
++
++ ret = btrfs_inode_delayed_dir_index_count(dir);
++ if (ret) {
++ ret = btrfs_set_inode_index_count(dir);
++ if (ret)
++ return ret;
++ }
++ }
++
++ *index = dir->index_cnt;
++
++ return 0;
++}
++
++/*
+ * All this infrastructure exists because dir_emit can fault, and we are holding
+ * the tree lock when doing readdir. For now just allocate a buffer and copy
+ * our information into that, and then dir_emit from the buffer. This is
+@@ -5756,10 +5824,17 @@ static struct dentry *btrfs_lookup(struc
+ static int btrfs_opendir(struct inode *inode, struct file *file)
+ {
+ struct btrfs_file_private *private;
++ u64 last_index;
++ int ret;
++
++ ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
++ if (ret)
++ return ret;
+
+ private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
+ if (!private)
+ return -ENOMEM;
++ private->last_index = last_index;
+ private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!private->filldir_buf) {
+ kfree(private);
+@@ -5826,7 +5901,8 @@ static int btrfs_real_readdir(struct fil
+
+ INIT_LIST_HEAD(&ins_list);
+ INIT_LIST_HEAD(&del_list);
+- put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
++ put = btrfs_readdir_get_delayed_items(inode, private->last_index,
++ &ins_list, &del_list);
+
+ again:
+ key.type = BTRFS_DIR_INDEX_KEY;
+@@ -5844,6 +5920,8 @@ again:
+ break;
+ if (found_key.offset < ctx->pos)
+ continue;
++ if (found_key.offset > private->last_index)
++ break;
+ if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
+ continue;
+ di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+@@ -5980,57 +6058,6 @@ static int btrfs_update_time(struct inod
+ }
+
+ /*
+- * find the highest existing sequence number in a directory
+- * and then set the in-memory index_cnt variable to reflect
+- * free sequence numbers
+- */
+-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+-{
+- struct btrfs_root *root = inode->root;
+- struct btrfs_key key, found_key;
+- struct btrfs_path *path;
+- struct extent_buffer *leaf;
+- int ret;
+-
+- key.objectid = btrfs_ino(inode);
+- key.type = BTRFS_DIR_INDEX_KEY;
+- key.offset = (u64)-1;
+-
+- path = btrfs_alloc_path();
+- if (!path)
+- return -ENOMEM;
+-
+- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+- if (ret < 0)
+- goto out;
+- /* FIXME: we should be able to handle this */
+- if (ret == 0)
+- goto out;
+- ret = 0;
+-
+- if (path->slots[0] == 0) {
+- inode->index_cnt = BTRFS_DIR_START_INDEX;
+- goto out;
+- }
+-
+- path->slots[0]--;
+-
+- leaf = path->nodes[0];
+- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+-
+- if (found_key.objectid != btrfs_ino(inode) ||
+- found_key.type != BTRFS_DIR_INDEX_KEY) {
+- inode->index_cnt = BTRFS_DIR_START_INDEX;
+- goto out;
+- }
+-
+- inode->index_cnt = found_key.offset + 1;
+-out:
+- btrfs_free_path(path);
+- return ret;
+-}
+-
+-/*
+ * helper to find a free sequence number in a given directory. This current
+ * code is very simple, later versions will do smarter things in the btree
+ */
--- /dev/null
+From b471965fdb2daa225850e5972d86600992fa398e Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 28 Jul 2023 14:48:13 +0800
+Subject: btrfs: fix replace/scrub failure with metadata_uuid
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit b471965fdb2daa225850e5972d86600992fa398e upstream.
+
+Fstests with POST_MKFS_CMD="btrfstune -m" (as in the mailing list)
+reported a few of the test cases failing.
+
+The failure scenario can be summarized and simplified as follows:
+
+ $ mkfs.btrfs -fq -draid1 -mraid1 /dev/sdb1 /dev/sdb2 :0
+ $ btrfstune -m /dev/sdb1 :0
+ $ wipefs -a /dev/sdb1 :0
+ $ mount -o degraded /dev/sdb2 /btrfs :0
+ $ btrfs replace start -B -f -r 1 /dev/sdb1 /btrfs :1
+ STDERR:
+ ERROR: ioctl(DEV_REPLACE_START) failed on "/btrfs": Input/output error
+
+ [11290.583502] BTRFS warning (device sdb2): tree block 22036480 mirror 2 has bad fsid, has 99835c32-49f0-4668-9e66-dc277a96b4a6 want da40350c-33ac-4872-92a8-4948ed8c04d0
+ [11290.586580] BTRFS error (device sdb2): unable to fix up (regular) error at logical 22020096 on dev /dev/sdb8 physical 1048576
+
+As above, the replace is failing because we are verifying the header with
+fs_devices::fsid instead of fs_devices::metadata_uuid, despite the
+metadata_uuid actually being present.
+
+To fix this, use fs_devices::metadata_uuid. We copy fsid into
+fs_devices::metadata_uuid if there is no metadata_uuid, so its fine.
+
+Fixes: a3ddbaebc7c9 ("btrfs: scrub: introduce a helper to verify one metadata block")
+CC: stable@vger.kernel.org # 6.4+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/scrub.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -655,7 +655,8 @@ static void scrub_verify_one_metadata(st
+ btrfs_stack_header_bytenr(header), logical);
+ return;
+ }
+- if (memcmp(header->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE) != 0) {
++ if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid,
++ BTRFS_FSID_SIZE) != 0) {
+ bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ btrfs_warn_rl(fs_info,
--- /dev/null
+From 09c3717c3a60e3ef599bc17c70cd3ae2b979ad41 Mon Sep 17 00:00:00 2001
+From: Chris Mason <clm@fb.com>
+Date: Tue, 1 Aug 2023 09:28:28 -0700
+Subject: btrfs: only subtract from len_to_oe_boundary when it is tracking an extent
+
+From: Chris Mason <clm@fb.com>
+
+commit 09c3717c3a60e3ef599bc17c70cd3ae2b979ad41 upstream.
+
+bio_ctrl->len_to_oe_boundary is used to make sure we stay inside a zone
+as we submit bios for writes. Every time we add a page to the bio, we
+decrement those bytes from len_to_oe_boundary, and then we submit the
+bio if we happen to hit zero.
+
+Most of the time, len_to_oe_boundary gets set to U32_MAX.
+submit_extent_page() adds pages into our bio, and the size of the bio
+ends up limited by:
+
+- Are we contiguous on disk?
+- Does bio_add_page() allow us to stuff more in?
+- is len_to_oe_boundary > 0?
+
+The len_to_oe_boundary math starts with U32_MAX, which isn't page or
+sector aligned, and subtracts from it until it hits zero. In the
+non-zoned case, the last IO we submit before we hit zero is going to be
+unaligned, triggering BUGs.
+
+This is hard to trigger because bio_add_page() isn't going to make a bio
+of U32_MAX size unless you give it a perfect set of pages and fully
+contiguous extents on disk. We can hit it pretty reliably while making
+large swapfiles during provisioning because the machine is freshly
+booted, mostly idle, and the disk is freshly formatted. It's also
+possible to trigger with reads when read_ahead_kb is set to 4GB.
+
+The code has been clean up and shifted around a few times, but this flaw
+has been lurking since the counter was added. I think the commit
+24e6c8082208 ("btrfs: simplify main loop in submit_extent_page") ended
+up exposing the bug.
+
+The fix used here is to skip doing math on len_to_oe_boundary unless
+we've changed it from the default U32_MAX value. bio_add_page() is the
+real limit we want, and there's no reason to do extra math when block
+layer is doing it for us.
+
+Sample reproducer, note you'll need to change the path to the bdi and
+device:
+
+ SUBVOL=/btrfs/swapvol
+ SWAPFILE=$SUBVOL/swapfile
+ SZMB=8192
+
+ mkfs.btrfs -f /dev/vdb
+ mount /dev/vdb /btrfs
+
+ btrfs subvol create $SUBVOL
+ chattr +C $SUBVOL
+ dd if=/dev/zero of=$SWAPFILE bs=1M count=$SZMB
+ sync
+
+ echo 4 > /proc/sys/vm/drop_caches
+
+ echo 4194304 > /sys/class/bdi/btrfs-2/read_ahead_kb
+
+ while true; do
+ echo 1 > /proc/sys/vm/drop_caches
+ echo 1 > /proc/sys/vm/drop_caches
+ dd of=/dev/zero if=$SWAPFILE bs=4096M count=2 iflag=fullblock
+ done
+
+Fixes: 24e6c8082208 ("btrfs: simplify main loop in submit_extent_page")
+CC: stable@vger.kernel.org # 6.4+
+Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 25 ++++++++++++++++++++++++-
+ 1 file changed, 24 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -962,7 +962,30 @@ static void submit_extent_page(struct bt
+ size -= len;
+ pg_offset += len;
+ disk_bytenr += len;
+- bio_ctrl->len_to_oe_boundary -= len;
++
++ /*
++ * len_to_oe_boundary defaults to U32_MAX, which isn't page or
++ * sector aligned. alloc_new_bio() then sets it to the end of
++ * our ordered extent for writes into zoned devices.
++ *
++ * When len_to_oe_boundary is tracking an ordered extent, we
++ * trust the ordered extent code to align things properly, and
++ * the check above to cap our write to the ordered extent
++ * boundary is correct.
++ *
++ * When len_to_oe_boundary is U32_MAX, the cap above would
++ * result in a 4095 byte IO for the last page right before
++ * we hit the bio limit of UINT_MAX. bio_add_page() has all
++ * the checks required to make sure we don't overflow the bio,
++ * and we should just ignore len_to_oe_boundary completely
++ * unless we're using it to track an ordered extent.
++ *
++ * It's pretty hard to make a bio sized U32_MAX, but it can
++ * happen when the page cache is able to feed us contiguous
++ * pages for large extents.
++ */
++ if (bio_ctrl->len_to_oe_boundary != U32_MAX)
++ bio_ctrl->len_to_oe_boundary -= len;
+
+ /* Ordered extent boundary: move on to a new bio. */
+ if (bio_ctrl->len_to_oe_boundary == 0)
--- /dev/null
+From 0872b2c0abc0e84ac82472959c8e14e35277549c Mon Sep 17 00:00:00 2001
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Date: Fri, 28 Jul 2023 01:03:18 +0800
+Subject: fbdev: mmp: fix value check in mmphw_probe()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+commit 0872b2c0abc0e84ac82472959c8e14e35277549c upstream.
+
+in mmphw_probe(), check the return value of clk_prepare_enable()
+and return the error code if clk_prepare_enable() returns an
+unexpected value.
+
+Fixes: d63028c38905 ("video: mmp display controller support")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/video/fbdev/mmp/hw/mmp_ctrl.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
++++ b/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
+@@ -519,7 +519,9 @@ static int mmphw_probe(struct platform_d
+ "unable to get clk %s\n", mi->clk_name);
+ goto failed;
+ }
+- clk_prepare_enable(ctrl->clk);
++ ret = clk_prepare_enable(ctrl->clk);
++ if (ret)
++ goto failed;
+
+ /* init global regs */
+ ctrl_set_default(ctrl);
--- /dev/null
+From 4caf4cb1eaed469742ef719f2cc024b1ec3fa9e6 Mon Sep 17 00:00:00 2001
+From: Chengfeng Ye <dg573847474@gmail.com>
+Date: Fri, 7 Jul 2023 08:49:41 +0000
+Subject: i2c: bcm-iproc: Fix bcm_iproc_i2c_isr deadlock issue
+
+From: Chengfeng Ye <dg573847474@gmail.com>
+
+commit 4caf4cb1eaed469742ef719f2cc024b1ec3fa9e6 upstream.
+
+iproc_i2c_rd_reg() and iproc_i2c_wr_reg() are called from both
+interrupt context (e.g. bcm_iproc_i2c_isr) and process context
+(e.g. bcm_iproc_i2c_suspend). Therefore, interrupts should be
+disabled to avoid potential deadlock. To prevent this scenario,
+use spin_lock_irqsave().
+
+Fixes: 9a1038728037 ("i2c: iproc: add NIC I2C support")
+Signed-off-by: Chengfeng Ye <dg573847474@gmail.com>
+Acked-by: Ray Jui <ray.jui@broadcom.com>
+Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-bcm-iproc.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-bcm-iproc.c
++++ b/drivers/i2c/busses/i2c-bcm-iproc.c
+@@ -233,13 +233,14 @@ static inline u32 iproc_i2c_rd_reg(struc
+ u32 offset)
+ {
+ u32 val;
++ unsigned long flags;
+
+ if (iproc_i2c->idm_base) {
+- spin_lock(&iproc_i2c->idm_lock);
++ spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
+ writel(iproc_i2c->ape_addr_mask,
+ iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
+ val = readl(iproc_i2c->base + offset);
+- spin_unlock(&iproc_i2c->idm_lock);
++ spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
+ } else {
+ val = readl(iproc_i2c->base + offset);
+ }
+@@ -250,12 +251,14 @@ static inline u32 iproc_i2c_rd_reg(struc
+ static inline void iproc_i2c_wr_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
+ u32 offset, u32 val)
+ {
++ unsigned long flags;
++
+ if (iproc_i2c->idm_base) {
+- spin_lock(&iproc_i2c->idm_lock);
++ spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
+ writel(iproc_i2c->ape_addr_mask,
+ iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
+ writel(val, iproc_i2c->base + offset);
+- spin_unlock(&iproc_i2c->idm_lock);
++ spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
+ } else {
+ writel(val, iproc_i2c->base + offset);
+ }
--- /dev/null
+From 49d4db3953cb9004ff94efc0c176e026c820af5a Mon Sep 17 00:00:00 2001
+From: Quan Nguyen <quan@os.amperecomputing.com>
+Date: Wed, 26 Jul 2023 15:00:00 +0700
+Subject: i2c: designware: Correct length byte validation logic
+
+From: Quan Nguyen <quan@os.amperecomputing.com>
+
+commit 49d4db3953cb9004ff94efc0c176e026c820af5a upstream.
+
+Commit 0daede80f870 ("i2c: designware: Convert driver to using regmap API")
+changes the logic to validate the whole 32-bit return value of
+DW_IC_DATA_CMD register instead of 8-bit LSB without reason.
+
+Later, commit f53f15ba5a85 ("i2c: designware: Get right data length"),
+introduced partial fix but not enough because the "tmp > 0" still test
+tmp as 32-bit value and is wrong in case the IC_DATA_CMD[11] is set.
+
+Revert the logic to just before commit 0daede80f870
+("i2c: designware: Convert driver to using regmap API").
+
+Fixes: f53f15ba5a85 ("i2c: designware: Get right data length")
+Fixes: 0daede80f870 ("i2c: designware: Convert driver to using regmap API")
+Cc: stable@vger.kernel.org
+Signed-off-by: Tam Nguyen <tamnguyenchi@os.amperecomputing.com>
+Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
+Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+Link: https://lore.kernel.org/r/20230726080001.337353-2-tamnguyenchi@os.amperecomputing.com
+Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-designware-master.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/i2c/busses/i2c-designware-master.c
++++ b/drivers/i2c/busses/i2c-designware-master.c
+@@ -526,9 +526,10 @@ i2c_dw_read(struct dw_i2c_dev *dev)
+ u32 flags = msgs[dev->msg_read_idx].flags;
+
+ regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
++ tmp &= DW_IC_DATA_CMD_DAT;
+ /* Ensure length byte is a valid value */
+ if (flags & I2C_M_RECV_LEN &&
+- (tmp & DW_IC_DATA_CMD_DAT) <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
++ tmp <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
+ len = i2c_dw_recv_len(dev, tmp);
+ }
+ *buf++ = tmp;
--- /dev/null
+From 69f035c480d76f12bf061148ccfd578e1099e5fc Mon Sep 17 00:00:00 2001
+From: Tam Nguyen <tamnguyenchi@os.amperecomputing.com>
+Date: Wed, 26 Jul 2023 15:00:01 +0700
+Subject: i2c: designware: Handle invalid SMBus block data response length value
+
+From: Tam Nguyen <tamnguyenchi@os.amperecomputing.com>
+
+commit 69f035c480d76f12bf061148ccfd578e1099e5fc upstream.
+
+In the I2C_FUNC_SMBUS_BLOCK_DATA case, the invalid length byte value
+(outside of 1-32) of the SMBus block data response from the Slave device
+is not correctly handled by the I2C Designware driver.
+
+In case IC_EMPTYFIFO_HOLD_MASTER_EN==1, which cannot be detected
+from the registers, the Master can be disabled only if the STOP bit
+is set. Without STOP bit set, the Master remains active, holding the bus
+until receiving a block data response length. This hangs the bus and
+is unrecoverable.
+
+Avoid this by issuing another dump read to reach the stop condition when
+an invalid length byte is received.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Tam Nguyen <tamnguyenchi@os.amperecomputing.com>
+Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+Link: https://lore.kernel.org/r/20230726080001.337353-3-tamnguyenchi@os.amperecomputing.com
+Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-designware-master.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-designware-master.c
++++ b/drivers/i2c/busses/i2c-designware-master.c
+@@ -528,8 +528,19 @@ i2c_dw_read(struct dw_i2c_dev *dev)
+ regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
+ tmp &= DW_IC_DATA_CMD_DAT;
+ /* Ensure length byte is a valid value */
+- if (flags & I2C_M_RECV_LEN &&
+- tmp <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
++ if (flags & I2C_M_RECV_LEN) {
++ /*
++ * if IC_EMPTYFIFO_HOLD_MASTER_EN is set, which cannot be
++ * detected from the registers, the controller can be
++ * disabled if the STOP bit is set. But it is only set
++ * after receiving block data response length in
++ * I2C_FUNC_SMBUS_BLOCK_DATA case. That needs to read
++ * another byte with STOP bit set when the block data
++ * response length is invalid to complete the transaction.
++ */
++ if (!tmp || tmp > I2C_SMBUS_BLOCK_MAX)
++ tmp = 1;
++
+ len = i2c_dw_recv_len(dev, tmp);
+ }
+ *buf++ = tmp;
--- /dev/null
+From fff67c1b17ee093947bdcbac6f64d072e644159a Mon Sep 17 00:00:00 2001
+From: Yicong Yang <yangyicong@hisilicon.com>
+Date: Tue, 1 Aug 2023 20:46:25 +0800
+Subject: i2c: hisi: Only handle the interrupt of the driver's transfer
+
+From: Yicong Yang <yangyicong@hisilicon.com>
+
+commit fff67c1b17ee093947bdcbac6f64d072e644159a upstream.
+
+The controller may be shared with other port, for example the firmware.
+Handle the interrupt from other sources will cause crash since some
+data are not initialized. So only handle the interrupt of the driver's
+transfer and discard others.
+
+Fixes: d62fbdb99a85 ("i2c: add support for HiSilicon I2C controller")
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
+Link: https://lore.kernel.org/r/20230801124625.63587-1-yangyicong@huawei.com
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-hisi.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/i2c/busses/i2c-hisi.c
++++ b/drivers/i2c/busses/i2c-hisi.c
+@@ -330,6 +330,14 @@ static irqreturn_t hisi_i2c_irq(int irq,
+ struct hisi_i2c_controller *ctlr = context;
+ u32 int_stat;
+
++ /*
++ * Don't handle the interrupt if cltr->completion is NULL. We may
++ * reach here because the interrupt is spurious or the transfer is
++ * started by another port (e.g. firmware) rather than us.
++ */
++ if (!ctlr->completion)
++ return IRQ_NONE;
++
+ int_stat = readl(ctlr->iobase + HISI_I2C_INT_MSTAT);
+ hisi_i2c_clear_int(ctlr, int_stat);
+ if (!(int_stat & HISI_I2C_INT_ALL))
--- /dev/null
+From 27ec43c77b5db780a56fc3a6d6de6bf2f74614f7 Mon Sep 17 00:00:00 2001
+From: Parker Newman <pnewman@connecttech.com>
+Date: Tue, 8 Aug 2023 16:01:06 +0200
+Subject: i2c: tegra: Fix i2c-tegra DMA config option processing
+
+From: Parker Newman <pnewman@connecttech.com>
+
+commit 27ec43c77b5db780a56fc3a6d6de6bf2f74614f7 upstream.
+
+Tegra processors prior to Tegra186 used APB DMA for I2C requiring
+CONFIG_TEGRA20_APB_DMA=y while Tegra186 and later use GPC DMA requiring
+CONFIG_TEGRA186_GPC_DMA=y.
+
+The check for if the processor uses APB DMA is inverted and so the wrong
+DMA config options are checked.
+
+This means if CONFIG_TEGRA20_APB_DMA=y but CONFIG_TEGRA186_GPC_DMA=n
+with a Tegra186 or later processor the driver will incorrectly think DMA is
+enabled and attempt to request DMA channels that will never be availible,
+leaving the driver in a perpetual EPROBE_DEFER state.
+
+Fixes: 48cb6356fae1 ("i2c: tegra: Add GPCDMA support")
+Signed-off-by: Parker Newman <pnewman@connecttech.com>
+Acked-by: Andi Shyti <andi.shyti@kernel.org>
+Acked-by: Akhil R <akhilrajeev@nvidia.com>
+Link: https://lore.kernel.org/r/fcfcf9b3-c8c4-9b34-2ff8-cd60a3d490bd@connecttech.com
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-tegra.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/i2c/busses/i2c-tegra.c
++++ b/drivers/i2c/busses/i2c-tegra.c
+@@ -449,7 +449,7 @@ static int tegra_i2c_init_dma(struct teg
+ if (i2c_dev->is_vi)
+ return 0;
+
+- if (!i2c_dev->hw->has_apb_dma) {
++ if (i2c_dev->hw->has_apb_dma) {
+ if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) {
+ dev_dbg(i2c_dev->dev, "APB DMA support not enabled\n");
+ return 0;
--- /dev/null
+From 8329d0c7355bfb7237baf09ec979c3e8144d2781 Mon Sep 17 00:00:00 2001
+From: Chen-Yu Tsai <wenst@chromium.org>
+Date: Mon, 10 Jul 2023 08:51:36 +0200
+Subject: media: mtk-jpeg: Set platform driver data earlier
+
+From: Chen-Yu Tsai <wenst@chromium.org>
+
+commit 8329d0c7355bfb7237baf09ec979c3e8144d2781 upstream.
+
+In the multi-core JPEG encoder/decoder setup, the driver for the
+individual cores references the parent device's platform driver data.
+However, in the parent driver, this is only set at the end of the probe
+function, way later than devm_of_platform_populate(), which triggers
+the probe of the cores. This causes a kernel splat in the sub-device
+probe function.
+
+Move platform_set_drvdata() to before devm_of_platform_populate() to
+fix this.
+
+Fixes: 934e8bccac95 ("mtk-jpegenc: support jpegenc multi-hardware")
+Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
++++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
+@@ -1310,6 +1310,8 @@ static int mtk_jpeg_probe(struct platfor
+ jpeg->dev = &pdev->dev;
+ jpeg->variant = of_device_get_match_data(jpeg->dev);
+
++ platform_set_drvdata(pdev, jpeg);
++
+ ret = devm_of_platform_populate(&pdev->dev);
+ if (ret) {
+ v4l2_err(&jpeg->v4l2_dev, "Master of platform populate failed.");
+@@ -1381,8 +1383,6 @@ static int mtk_jpeg_probe(struct platfor
+ jpeg->variant->dev_name, jpeg->vdev->num,
+ VIDEO_MAJOR, jpeg->vdev->minor);
+
+- platform_set_drvdata(pdev, jpeg);
+-
+ pm_runtime_enable(&pdev->dev);
+
+ return 0;
--- /dev/null
+From 4ae68b26c3ab5a82aa271e6e9fc9b1a06e1d6b40 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:29 +0200
+Subject: objtool/x86: Fix SRSO mess
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4ae68b26c3ab5a82aa271e6e9fc9b1a06e1d6b40 upstream.
+
+Objtool --rethunk does two things:
+
+ - it collects all (tail) call's of __x86_return_thunk and places them
+ into .return_sites. These are typically compiler generated, but
+ RET also emits this same.
+
+ - it fudges the validation of the __x86_return_thunk symbol; because
+ this symbol is inside another instruction, it can't actually find
+ the instruction pointed to by the symbol offset and gets upset.
+
+Because these two things pertained to the same symbol, there was no
+pressing need to separate these two separate things.
+
+However, alas, along comes SRSO and more crazy things to deal with
+appeared.
+
+The SRSO patch itself added the following symbol names to identify as
+rethunk:
+
+ 'srso_untrain_ret', 'srso_safe_ret' and '__ret'
+
+Where '__ret' is the old retbleed return thunk, 'srso_safe_ret' is a
+new similarly embedded return thunk, and 'srso_untrain_ret' is
+completely unrelated to anything the above does (and was only included
+because of that INT3 vs UD2 issue fixed previous).
+
+Clear things up by adding a second category for the embedded instruction
+thing.
+
+Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121148.704502245@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/x86/decode.c | 11 +++++++----
+ tools/objtool/check.c | 24 ++++++++++++++++++++++--
+ tools/objtool/include/objtool/arch.h | 1 +
+ tools/objtool/include/objtool/elf.h | 1 +
+ 4 files changed, 31 insertions(+), 6 deletions(-)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -824,8 +824,11 @@ bool arch_is_retpoline(struct symbol *sy
+
+ bool arch_is_rethunk(struct symbol *sym)
+ {
+- return !strcmp(sym->name, "__x86_return_thunk") ||
+- !strcmp(sym->name, "srso_untrain_ret") ||
+- !strcmp(sym->name, "srso_safe_ret") ||
+- !strcmp(sym->name, "__ret");
++ return !strcmp(sym->name, "__x86_return_thunk");
++}
++
++bool arch_is_embedded_insn(struct symbol *sym)
++{
++ return !strcmp(sym->name, "__ret") ||
++ !strcmp(sym->name, "srso_safe_ret");
+ }
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -495,7 +495,7 @@ static int decode_instructions(struct ob
+ return -1;
+ }
+
+- if (func->return_thunk || func->alias != func)
++ if (func->embedded_insn || func->alias != func)
+ continue;
+
+ if (!find_insn(file, sec, func->offset)) {
+@@ -1346,16 +1346,33 @@ static int add_ignore_alternatives(struc
+ return 0;
+ }
+
++/*
++ * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
++ * will be added to the .retpoline_sites section.
++ */
+ __weak bool arch_is_retpoline(struct symbol *sym)
+ {
+ return false;
+ }
+
++/*
++ * Symbols that replace INSN_RETURN, every (tail) call to such a symbol
++ * will be added to the .return_sites section.
++ */
+ __weak bool arch_is_rethunk(struct symbol *sym)
+ {
+ return false;
+ }
+
++/*
++ * Symbols that are embedded inside other instructions, because sometimes crazy
++ * code exists. These are mostly ignored for validation purposes.
++ */
++__weak bool arch_is_embedded_insn(struct symbol *sym)
++{
++ return false;
++}
++
+ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
+ {
+ struct reloc *reloc;
+@@ -1645,7 +1662,7 @@ static int add_jump_destinations(struct
+ * middle of another instruction. Objtool only
+ * knows about the outer instruction.
+ */
+- if (sym && sym->return_thunk) {
++ if (sym && sym->embedded_insn) {
+ add_return_call(file, insn, false);
+ continue;
+ }
+@@ -2550,6 +2567,9 @@ static int classify_symbols(struct objto
+ if (arch_is_rethunk(func))
+ func->return_thunk = true;
+
++ if (arch_is_embedded_insn(func))
++ func->embedded_insn = true;
++
+ if (arch_ftrace_match(func->name))
+ func->fentry = true;
+
+--- a/tools/objtool/include/objtool/arch.h
++++ b/tools/objtool/include/objtool/arch.h
+@@ -90,6 +90,7 @@ int arch_decode_hint_reg(u8 sp_reg, int
+
+ bool arch_is_retpoline(struct symbol *sym);
+ bool arch_is_rethunk(struct symbol *sym);
++bool arch_is_embedded_insn(struct symbol *sym);
+
+ int arch_rewrite_retpolines(struct objtool_file *file);
+
+--- a/tools/objtool/include/objtool/elf.h
++++ b/tools/objtool/include/objtool/elf.h
+@@ -61,6 +61,7 @@ struct symbol {
+ u8 return_thunk : 1;
+ u8 fentry : 1;
+ u8 profiling_func : 1;
++ u8 embedded_insn : 1;
+ struct list_head pv_target;
+ struct list_head reloc_list;
+ };
--- /dev/null
+From dbf46008775516f7f25c95b7760041c286299783 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 16 Aug 2023 13:59:21 +0200
+Subject: objtool/x86: Fixup frame-pointer vs rethunk
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit dbf46008775516f7f25c95b7760041c286299783 upstream.
+
+For stack-validation of a frame-pointer build, objtool validates that
+every CALL instruction is preceded by a frame-setup. The new SRSO
+return thunks violate this with their RSB stuffing trickery.
+
+Extend the __fentry__ exception to also cover the embedded_insn case
+used for this. This cures:
+
+ vmlinux.o: warning: objtool: srso_untrain_ret+0xd: call without frame pointer save/setup
+
+Fixes: 4ae68b26c3ab ("objtool/x86: Fix SRSO mess")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Link: https://lore.kernel.org/r/20230816115921.GH980931@hirez.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/check.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -2698,12 +2698,17 @@ static int decode_sections(struct objtoo
+ return 0;
+ }
+
+-static bool is_fentry_call(struct instruction *insn)
++static bool is_special_call(struct instruction *insn)
+ {
+- if (insn->type == INSN_CALL &&
+- insn_call_dest(insn) &&
+- insn_call_dest(insn)->fentry)
+- return true;
++ if (insn->type == INSN_CALL) {
++ struct symbol *dest = insn_call_dest(insn);
++
++ if (!dest)
++ return false;
++
++ if (dest->fentry || dest->embedded_insn)
++ return true;
++ }
+
+ return false;
+ }
+@@ -3701,7 +3706,7 @@ static int validate_branch(struct objtoo
+ if (ret)
+ return ret;
+
+- if (opts.stackval && func && !is_fentry_call(insn) &&
++ if (opts.stackval && func && !is_special_call(insn) &&
+ !has_valid_stack_frame(&state)) {
+ WARN_INSN(insn, "call without frame pointer save/setup");
+ return 1;
--- /dev/null
+From 4f3175979e62de3b929bfa54a0db4b87d36257a7 Mon Sep 17 00:00:00 2001
+From: Nathan Lynch <nathanl@linux.ibm.com>
+Date: Thu, 10 Aug 2023 22:37:55 -0500
+Subject: powerpc/rtas_flash: allow user copy to flash block cache objects
+
+From: Nathan Lynch <nathanl@linux.ibm.com>
+
+commit 4f3175979e62de3b929bfa54a0db4b87d36257a7 upstream.
+
+With hardened usercopy enabled (CONFIG_HARDENED_USERCOPY=y), using the
+/proc/powerpc/rtas/firmware_update interface to prepare a system
+firmware update yields a BUG():
+
+ kernel BUG at mm/usercopy.c:102!
+ Oops: Exception in kernel mode, sig: 5 [#1]
+ LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+ Modules linked in:
+ CPU: 0 PID: 2232 Comm: dd Not tainted 6.5.0-rc3+ #2
+ Hardware name: IBM,8408-E8E POWER8E (raw) 0x4b0201 0xf000004 of:IBM,FW860.50 (SV860_146) hv:phyp pSeries
+ NIP: c0000000005991d0 LR: c0000000005991cc CTR: 0000000000000000
+ REGS: c0000000148c76a0 TRAP: 0700 Not tainted (6.5.0-rc3+)
+ MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE> CR: 24002242 XER: 0000000c
+ CFAR: c0000000001fbd34 IRQMASK: 0
+ [ ... GPRs omitted ... ]
+ NIP usercopy_abort+0xa0/0xb0
+ LR usercopy_abort+0x9c/0xb0
+ Call Trace:
+ usercopy_abort+0x9c/0xb0 (unreliable)
+ __check_heap_object+0x1b4/0x1d0
+ __check_object_size+0x2d0/0x380
+ rtas_flash_write+0xe4/0x250
+ proc_reg_write+0xfc/0x160
+ vfs_write+0xfc/0x4e0
+ ksys_write+0x90/0x160
+ system_call_exception+0x178/0x320
+ system_call_common+0x160/0x2c4
+
+The blocks of the firmware image are copied directly from user memory
+to objects allocated from flash_block_cache, so flash_block_cache must
+be created using kmem_cache_create_usercopy() to mark it safe for user
+access.
+
+Fixes: 6d07d1cd300f ("usercopy: Restrict non-usercopy caches to size 0")
+Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+[mpe: Trim and indent oops]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20230810-rtas-flash-vs-hardened-usercopy-v2-1-dcf63793a938@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/rtas_flash.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/kernel/rtas_flash.c
++++ b/arch/powerpc/kernel/rtas_flash.c
+@@ -709,9 +709,9 @@ static int __init rtas_flash_init(void)
+ if (!rtas_validate_flash_data.buf)
+ return -ENOMEM;
+
+- flash_block_cache = kmem_cache_create("rtas_flash_cache",
+- RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
+- NULL);
++ flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
++ RTAS_BLK_SIZE, RTAS_BLK_SIZE,
++ 0, 0, RTAS_BLK_SIZE, NULL);
+ if (!flash_block_cache) {
+ printk(KERN_ERR "%s: failed to create block cache\n",
+ __func__);
--- /dev/null
+From 3fa7187eceee11998f756481e45ce8c4f9d9dc48 Mon Sep 17 00:00:00 2001
+From: Qingsong Chen <changxian.cqs@antgroup.com>
+Date: Tue, 8 Aug 2023 10:54:01 +0800
+Subject: rust: macros: vtable: fix `HAS_*` redefinition (`gen_const_name`)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Qingsong Chen <changxian.cqs@antgroup.com>
+
+commit 3fa7187eceee11998f756481e45ce8c4f9d9dc48 upstream.
+
+If we define the same function name twice in a trait (using `#[cfg]`),
+the `vtable` macro will redefine its `gen_const_name`, e.g. this will
+define `HAS_BAR` twice:
+
+ #[vtable]
+ pub trait Foo {
+ #[cfg(CONFIG_X)]
+ fn bar();
+
+ #[cfg(not(CONFIG_X))]
+ fn bar(x: usize);
+ }
+
+Fixes: b44becc5ee80 ("rust: macros: add `#[vtable]` proc macro")
+Signed-off-by: Qingsong Chen <changxian.cqs@antgroup.com>
+Reviewed-by: Andreas Hindborg <a.hindborg@samsung.com>
+Reviewed-by: Gary Guo <gary@garyguo.net>
+Reviewed-by: Sergio González Collado <sergio.collado@gmail.com>
+Link: https://lore.kernel.org/r/20230808025404.2053471-1-changxian.cqs@antgroup.com
+Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ rust/macros/vtable.rs | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/rust/macros/vtable.rs b/rust/macros/vtable.rs
+index 34d5e7fb5768..ee06044fcd4f 100644
+--- a/rust/macros/vtable.rs
++++ b/rust/macros/vtable.rs
+@@ -74,6 +74,7 @@ pub(crate) fn vtable(_attr: TokenStream, ts: TokenStream) -> TokenStream {
+ const {gen_const_name}: bool = false;",
+ )
+ .unwrap();
++ consts.insert(gen_const_name);
+ }
+ } else {
+ const_items = "const USE_VTABLE_ATTR: () = ();".to_owned();
+--
+2.41.0
+
vdpa-mlx5-fix-mr-initialized-semantics.patch
vdpa-mlx5-delete-control-vq-iotlb-in-destroy_mr-only.patch
cifs-fix-potential-oops-in-cifs_oplock_break.patch
+rust-macros-vtable-fix-has_-redefinition-gen_const_name.patch
+i2c-bcm-iproc-fix-bcm_iproc_i2c_isr-deadlock-issue.patch
+i2c-hisi-only-handle-the-interrupt-of-the-driver-s-transfer.patch
+i2c-tegra-fix-i2c-tegra-dma-config-option-processing.patch
+blk-cgroup-hold-queue_lock-when-removing-blkg-q_node.patch
+fbdev-mmp-fix-value-check-in-mmphw_probe.patch
+media-mtk-jpeg-set-platform-driver-data-earlier.patch
+powerpc-rtas_flash-allow-user-copy-to-flash-block-cache-objects.patch
+vdpa-add-features-attr-to-vdpa_nl_policy-for-nlattr-length-check.patch
+vdpa-add-queue-index-attr-to-vdpa_nl_policy-for-nlattr-length-check.patch
+vdpa-add-max-vqp-attr-to-vdpa_nl_policy-for-nlattr-length-check.patch
+vdpa-enable-strict-validation-for-netlinks-ops.patch
+smb3-display-network-namespace-in-debug-information.patch
+tty-n_gsm-fix-the-uaf-caused-by-race-condition-in-gsm_cleanup_mux.patch
+tty-serial-fsl_lpuart-clear-the-error-flags-by-writing-1-for-lpuart32-platforms.patch
+btrfs-fix-infinite-directory-reads.patch
+btrfs-fix-incorrect-splitting-in-btrfs_drop_extent_map_range.patch
+btrfs-fix-bug_on-condition-in-btrfs_cancel_balance.patch
+btrfs-fix-replace-scrub-failure-with-metadata_uuid.patch
+btrfs-only-subtract-from-len_to_oe_boundary-when-it-is-tracking-an-extent.patch
+i2c-designware-correct-length-byte-validation-logic.patch
+i2c-designware-handle-invalid-smbus-block-data-response-length-value.patch
+x86-cpu-fix-__x86_return_thunk-symbol-type.patch
+x86-cpu-fix-up-srso_safe_ret-and-__x86_return_thunk.patch
+objtool-x86-fix-srso-mess.patch
+x86-alternative-make-custom-return-thunk-unconditional.patch
+x86-cpu-clean-up-srso-return-thunk-mess.patch
+x86-cpu-rename-original-retbleed-methods.patch
+x86-cpu-rename-srso_-.-_alias-to-srso_alias_-1.patch
+x86-cpu-cleanup-the-untrain-mess.patch
+x86-cpu-kvm-provide-untrain_ret_vm.patch
+x86-srso-explain-the-untraining-sequences-a-bit-more.patch
+objtool-x86-fixup-frame-pointer-vs-rethunk.patch
+x86-static_call-fix-__static_call_fixup.patch
+x86-retpoline-don-t-clobber-rflags-during-srso_safe_ret.patch
+x86-cpu-amd-fix-the-div-0-initial-fix-attempt.patch
+x86-srso-disable-the-mitigation-on-unaffected-configurations.patch
+x86-retpoline-kprobes-fix-position-of-thunk-sections-with-config_lto_clang.patch
+x86-retpoline-kprobes-skip-optprobe-check-for-indirect-jumps-with-retpolines-and-ibt.patch
+x86-srso-correct-the-mitigation-status-when-smt-is-disabled.patch
--- /dev/null
+From 7b38f6ddc97bf572c3422d3175e8678dd95502fa Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Thu, 10 Aug 2023 21:41:03 -0500
+Subject: smb3: display network namespace in debug information
+
+From: Steve French <stfrench@microsoft.com>
+
+commit 7b38f6ddc97bf572c3422d3175e8678dd95502fa upstream.
+
+We recently had problems where a network namespace was deleted
+causing hard to debug reconnect problems. To help deal with
+configuration issues like this it is useful to dump the network
+namespace to better debug what happened.
+
+So add this to information displayed in /proc/fs/cifs/DebugData for
+the server (and channels if mounted with multichannel). For example:
+
+ Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
+
+This can be easily compared with what is displayed for the
+processes on the system. For example /proc/1/ns/net in this case
+showed the same thing (see below), and we can see that the namespace
+is still valid in this example.
+
+ 'net:[4026531840]'
+
+Cc: stable@vger.kernel.org
+Acked-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/cifs_debug.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/fs/smb/client/cifs_debug.c
++++ b/fs/smb/client/cifs_debug.c
+@@ -153,6 +153,11 @@ cifs_dump_channel(struct seq_file *m, in
+ in_flight(server),
+ atomic_read(&server->in_send),
+ atomic_read(&server->num_waiters));
++#ifdef CONFIG_NET_NS
++ if (server->net)
++ seq_printf(m, " Net namespace: %u ", server->net->ns.inum);
++#endif /* NET_NS */
++
+ }
+
+ static inline const char *smb_speed_to_str(size_t bps)
+@@ -429,10 +434,15 @@ skip_rdma:
+ server->reconnect_instance,
+ server->srv_count,
+ server->sec_mode, in_flight(server));
++#ifdef CONFIG_NET_NS
++ if (server->net)
++ seq_printf(m, " Net namespace: %u ", server->net->ns.inum);
++#endif /* NET_NS */
+
+ seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d",
+ atomic_read(&server->in_send),
+ atomic_read(&server->num_waiters));
++
+ if (server->leaf_fullpath) {
+ seq_printf(m, "\nDFS leaf full path: %s",
+ server->leaf_fullpath);
--- /dev/null
+From 3c4f8333b582487a2d1e02171f1465531cde53e3 Mon Sep 17 00:00:00 2001
+From: Yi Yang <yiyang13@huawei.com>
+Date: Fri, 11 Aug 2023 11:11:21 +0800
+Subject: tty: n_gsm: fix the UAF caused by race condition in gsm_cleanup_mux
+
+From: Yi Yang <yiyang13@huawei.com>
+
+commit 3c4f8333b582487a2d1e02171f1465531cde53e3 upstream.
+
+In commit 9b9c8195f3f0 ("tty: n_gsm: fix UAF in gsm_cleanup_mux"), the UAF
+problem is not completely fixed. There is a race condition in
+gsm_cleanup_mux(), which caused this UAF.
+
+The UAF problem is triggered by the following race:
+task[5046] task[5054]
+----------------------- -----------------------
+gsm_cleanup_mux();
+dlci = gsm->dlci[0];
+mutex_lock(&gsm->mutex);
+ gsm_cleanup_mux();
+ dlci = gsm->dlci[0]; //Didn't take the lock
+gsm_dlci_release(gsm->dlci[i]);
+gsm->dlci[i] = NULL;
+mutex_unlock(&gsm->mutex);
+ mutex_lock(&gsm->mutex);
+ dlci->dead = true; //UAF
+
+Fix it by assigning values after mutex_lock().
+
+Link: https://syzkaller.appspot.com/text?tag=CrashReport&x=176188b5a80000
+Cc: stable <stable@kernel.org>
+Fixes: 9b9c8195f3f0 ("tty: n_gsm: fix UAF in gsm_cleanup_mux")
+Fixes: aa371e96f05d ("tty: n_gsm: fix restart handling via CLD command")
+Signed-off-by: Yi Yang <yiyang13@huawei.com>
+Co-developed-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Link: https://lore.kernel.org/r/20230811031121.153237-1-yiyang13@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/n_gsm.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/tty/n_gsm.c
++++ b/drivers/tty/n_gsm.c
+@@ -3042,12 +3042,13 @@ static void gsm_error(struct gsm_mux *gs
+ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
+ {
+ int i;
+- struct gsm_dlci *dlci = gsm->dlci[0];
++ struct gsm_dlci *dlci;
+ struct gsm_msg *txq, *ntxq;
+
+ gsm->dead = true;
+ mutex_lock(&gsm->mutex);
+
++ dlci = gsm->dlci[0];
+ if (dlci) {
+ if (disc && dlci->state != DLCI_CLOSED) {
+ gsm_dlci_begin_close(dlci);
--- /dev/null
+From 282069845af388b08d622ad192b831dcd0549c62 Mon Sep 17 00:00:00 2001
+From: Sherry Sun <sherry.sun@nxp.com>
+Date: Tue, 1 Aug 2023 10:23:04 +0800
+Subject: tty: serial: fsl_lpuart: Clear the error flags by writing 1 for lpuart32 platforms
+
+From: Sherry Sun <sherry.sun@nxp.com>
+
+commit 282069845af388b08d622ad192b831dcd0549c62 upstream.
+
+Do not read the data register to clear the error flags for lpuart32
+platforms, the additional read may cause the receive FIFO underflow
+since the DMA has already read the data register.
+Actually all lpuart32 platforms support write 1 to clear those error
+bits, let's use this method to better clear the error flags.
+
+Fixes: 42b68768e51b ("serial: fsl_lpuart: DMA support for 32-bit variant")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
+Link: https://lore.kernel.org/r/20230801022304.24251-1-sherry.sun@nxp.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/serial/fsl_lpuart.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/tty/serial/fsl_lpuart.c
++++ b/drivers/tty/serial/fsl_lpuart.c
+@@ -1137,8 +1137,8 @@ static void lpuart_copy_rx_to_tty(struct
+ unsigned long sr = lpuart32_read(&sport->port, UARTSTAT);
+
+ if (sr & (UARTSTAT_PE | UARTSTAT_FE)) {
+- /* Read DR to clear the error flags */
+- lpuart32_read(&sport->port, UARTDATA);
++ /* Clear the error flags */
++ lpuart32_write(&sport->port, sr, UARTSTAT);
+
+ if (sr & UARTSTAT_PE)
+ sport->port.icount.parity++;
--- /dev/null
+From 79c8651587504ba263d2fd67fd4406240fb21f69 Mon Sep 17 00:00:00 2001
+From: Lin Ma <linma@zju.edu.cn>
+Date: Thu, 27 Jul 2023 20:57:48 +0300
+Subject: vdpa: Add features attr to vdpa_nl_policy for nlattr length check
+
+From: Lin Ma <linma@zju.edu.cn>
+
+commit 79c8651587504ba263d2fd67fd4406240fb21f69 upstream.
+
+The vdpa_nl_policy structure is used to validate the nlattr when parsing
+the incoming nlmsg. It will ensure the attribute being described produces
+a valid nlattr pointer in info->attrs before entering into each handler
+in vdpa_nl_ops.
+
+That is to say, the missing part in vdpa_nl_policy may lead to illegal
+nlattr after parsing, which could lead to OOB read just like CVE-2023-3773.
+
+This patch adds the missing nla_policy for vdpa features attr to avoid
+such bugs.
+
+Fixes: 90fea5a800c3 ("vdpa: device feature provisioning")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Cc: stable@vger.kernel.org
+Message-Id: <20230727175757.73988-3-dtatulea@nvidia.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vdpa/vdpa.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/vdpa/vdpa.c
++++ b/drivers/vdpa/vdpa.c
+@@ -1249,6 +1249,7 @@ static const struct nla_policy vdpa_nl_p
+ [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
+ /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
+ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
++ [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 },
+ };
+
+ static const struct genl_ops vdpa_nl_ops[] = {
--- /dev/null
+From 5d6ba607d6cb5c58a4ddf33381e18c83dbb4098f Mon Sep 17 00:00:00 2001
+From: Lin Ma <linma@zju.edu.cn>
+Date: Thu, 27 Jul 2023 20:57:52 +0300
+Subject: vdpa: Add max vqp attr to vdpa_nl_policy for nlattr length check
+
+From: Lin Ma <linma@zju.edu.cn>
+
+commit 5d6ba607d6cb5c58a4ddf33381e18c83dbb4098f upstream.
+
+The vdpa_nl_policy structure is used to validate the nlattr when parsing
+the incoming nlmsg. It will ensure the attribute being described produces
+a valid nlattr pointer in info->attrs before entering into each handler
+in vdpa_nl_ops.
+
+That is to say, the missing part in vdpa_nl_policy may lead to illegal
+nlattr after parsing, which could lead to OOB read just like CVE-2023-3773.
+
+This patch adds the missing nla_policy for vdpa max vqp attr to avoid
+such bugs.
+
+Fixes: ad69dd0bf26b ("vdpa: Introduce query of device config layout")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Cc: stable@vger.kernel.org
+Message-Id: <20230727175757.73988-7-dtatulea@nvidia.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vdpa/vdpa.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/vdpa/vdpa.c
++++ b/drivers/vdpa/vdpa.c
+@@ -1247,6 +1247,7 @@ static const struct nla_policy vdpa_nl_p
+ [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
+ [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
+ [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
++ [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 },
+ /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
+ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
+ [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 },
--- /dev/null
+From b3003e1b54e057f5f3124e437b80c3bef26ed3fe Mon Sep 17 00:00:00 2001
+From: Lin Ma <linma@zju.edu.cn>
+Date: Thu, 27 Jul 2023 20:57:50 +0300
+Subject: vdpa: Add queue index attr to vdpa_nl_policy for nlattr length check
+
+From: Lin Ma <linma@zju.edu.cn>
+
+commit b3003e1b54e057f5f3124e437b80c3bef26ed3fe upstream.
+
+The vdpa_nl_policy structure is used to validate the nlattr when parsing
+the incoming nlmsg. It will ensure the attribute being described produces
+a valid nlattr pointer in info->attrs before entering into each handler
+in vdpa_nl_ops.
+
+That is to say, the missing part in vdpa_nl_policy may lead to illegal
+nlattr after parsing, which could lead to OOB read just like CVE-2023-3773.
+
+This patch adds the missing nla_policy for vdpa queue index attr to avoid
+such bugs.
+
+Fixes: 13b00b135665 ("vdpa: Add support for querying vendor statistics")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Cc: stable@vger.kernelorg
+Message-Id: <20230727175757.73988-5-dtatulea@nvidia.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vdpa/vdpa.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/vdpa/vdpa.c
++++ b/drivers/vdpa/vdpa.c
+@@ -1249,6 +1249,7 @@ static const struct nla_policy vdpa_nl_p
+ [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
+ /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
+ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
++ [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 },
+ [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 },
+ };
+
--- /dev/null
+From f46c1e1620c6bbc9aad5693082efd1b80822e97c Mon Sep 17 00:00:00 2001
+From: Dragos Tatulea <dtatulea@nvidia.com>
+Date: Thu, 27 Jul 2023 20:57:54 +0300
+Subject: vdpa: Enable strict validation for netlinks ops
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+commit f46c1e1620c6bbc9aad5693082efd1b80822e97c upstream.
+
+The previous patches added the missing nla policies that were required for
+validation to work.
+
+Now strict validation on netlink ops can be enabled. This patch does it.
+
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Cc: stable@vger.kernel.org
+Message-Id: <20230727175757.73988-9-dtatulea@nvidia.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vdpa/vdpa.c | 6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/drivers/vdpa/vdpa.c
++++ b/drivers/vdpa/vdpa.c
+@@ -1257,37 +1257,31 @@ static const struct nla_policy vdpa_nl_p
+ static const struct genl_ops vdpa_nl_ops[] = {
+ {
+ .cmd = VDPA_CMD_MGMTDEV_GET,
+- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = vdpa_nl_cmd_mgmtdev_get_doit,
+ .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit,
+ },
+ {
+ .cmd = VDPA_CMD_DEV_NEW,
+- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = vdpa_nl_cmd_dev_add_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = VDPA_CMD_DEV_DEL,
+- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = vdpa_nl_cmd_dev_del_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = VDPA_CMD_DEV_GET,
+- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = vdpa_nl_cmd_dev_get_doit,
+ .dumpit = vdpa_nl_cmd_dev_get_dumpit,
+ },
+ {
+ .cmd = VDPA_CMD_DEV_CONFIG_GET,
+- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = vdpa_nl_cmd_dev_config_get_doit,
+ .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
+ },
+ {
+ .cmd = VDPA_CMD_DEV_VSTATS_GET,
+- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = vdpa_nl_cmd_dev_stats_get_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
--- /dev/null
+From 095b8303f3835c68ac4a8b6d754ca1c3b6230711 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:30 +0200
+Subject: x86/alternative: Make custom return thunk unconditional
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 095b8303f3835c68ac4a8b6d754ca1c3b6230711 upstream.
+
+There is infrastructure to rewrite return thunks to point to any
+random thunk one desires, unwrap that from CALL_THUNKS, which up to
+now was the sole user of that.
+
+ [ bp: Make the thunks visible on 32-bit and add ifdeffery for the
+ 32-bit builds. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121148.775293785@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 9 +++++----
+ arch/x86/kernel/alternative.c | 4 ----
+ arch/x86/kernel/cpu/bugs.c | 2 ++
+ 3 files changed, 7 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -337,17 +337,18 @@ extern retpoline_thunk_t __x86_indirect_
+ extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
+ extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
+
++#ifdef CONFIG_RETHUNK
+ extern void __x86_return_thunk(void);
++#else
++static inline void __x86_return_thunk(void) {}
++#endif
++
+ extern void zen_untrain_ret(void);
+ extern void srso_untrain_ret(void);
+ extern void srso_untrain_ret_alias(void);
+ extern void entry_ibpb(void);
+
+-#ifdef CONFIG_CALL_THUNKS
+ extern void (*x86_return_thunk)(void);
+-#else
+-#define x86_return_thunk (&__x86_return_thunk)
+-#endif
+
+ #ifdef CONFIG_CALL_DEPTH_TRACKING
+ extern void __x86_return_skl(void);
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -571,10 +571,6 @@ void __init_or_module noinline apply_ret
+
+ #ifdef CONFIG_RETHUNK
+
+-#ifdef CONFIG_CALL_THUNKS
+-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+-#endif
+-
+ /*
+ * Rewrite the compiler generated return thunk tail-calls.
+ *
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -63,6 +63,8 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd);
+
+ static DEFINE_MUTEX(spec_ctrl_mutex);
+
++void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
++
+ /* Update SPEC_CTRL MSR and its cached copy unconditionally */
+ static void update_spec_ctrl(u64 val)
+ {
--- /dev/null
+From f58d6fbcb7c848b7f2469be339bc571f2e9d245b Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Fri, 11 Aug 2023 23:38:24 +0200
+Subject: x86/CPU/AMD: Fix the DIV(0) initial fix attempt
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit f58d6fbcb7c848b7f2469be339bc571f2e9d245b upstream.
+
+Initially, it was thought that doing an innocuous division in the #DE
+handler would take care to prevent any leaking of old data from the
+divider but by the time the fault is raised, the speculation has already
+advanced too far and such data could already have been used by younger
+operations.
+
+Therefore, do the innocuous division on every exit to userspace so that
+userspace doesn't see any potentially old data from integer divisions in
+kernel space.
+
+Do the same before VMRUN too, to protect host data from leaking into the
+guest too.
+
+Fixes: 77245f1c3c64 ("x86/CPU/AMD: Do not leak quotient data after a division by 0")
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/20230811213824.10025-1-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/entry-common.h | 1 +
+ arch/x86/kernel/cpu/amd.c | 1 +
+ arch/x86/kernel/traps.c | 2 --
+ arch/x86/kvm/svm/svm.c | 2 ++
+ 4 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/entry-common.h
++++ b/arch/x86/include/asm/entry-common.h
+@@ -92,6 +92,7 @@ static inline void arch_exit_to_user_mod
+ static __always_inline void arch_exit_to_user_mode(void)
+ {
+ mds_user_clear_cpu_buffers();
++ amd_clear_divider();
+ }
+ #define arch_exit_to_user_mode arch_exit_to_user_mode
+
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -1329,3 +1329,4 @@ void noinstr amd_clear_divider(void)
+ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+ :: "a" (0), "d" (0), "r" (1));
+ }
++EXPORT_SYMBOL_GPL(amd_clear_divider);
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -206,8 +206,6 @@ DEFINE_IDTENTRY(exc_divide_error)
+ {
+ do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
+ FPE_INTDIV, error_get_trap_addr(regs));
+-
+- amd_clear_divider();
+ }
+
+ DEFINE_IDTENTRY(exc_overflow)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4034,6 +4034,8 @@ static noinstr void svm_vcpu_enter_exit(
+
+ guest_state_enter_irqoff();
+
++ amd_clear_divider();
++
+ if (sev_es_guest(vcpu->kvm))
+ __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
+ else
--- /dev/null
+From d43490d0ab824023e11d0b57d0aeec17a6e0ca13 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:31 +0200
+Subject: x86/cpu: Clean up SRSO return thunk mess
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d43490d0ab824023e11d0b57d0aeec17a6e0ca13 upstream.
+
+Use the existing configurable return thunk. There is absolute no
+justification for having created this __x86_return_thunk alternative.
+
+To clarify, the whole thing looks like:
+
+Zen3/4 does:
+
+ srso_alias_untrain_ret:
+ nop2
+ lfence
+ jmp srso_alias_return_thunk
+ int3
+
+ srso_alias_safe_ret: // aliasses srso_alias_untrain_ret just so
+ add $8, %rsp
+ ret
+ int3
+
+ srso_alias_return_thunk:
+ call srso_alias_safe_ret
+ ud2
+
+While Zen1/2 does:
+
+ srso_untrain_ret:
+ movabs $foo, %rax
+ lfence
+ call srso_safe_ret (jmp srso_return_thunk ?)
+ int3
+
+ srso_safe_ret: // embedded in movabs instruction
+ add $8,%rsp
+ ret
+ int3
+
+ srso_return_thunk:
+ call srso_safe_ret
+ ud2
+
+While retbleed does:
+
+ zen_untrain_ret:
+ test $0xcc, %bl
+ lfence
+ jmp zen_return_thunk
+ int3
+
+ zen_return_thunk: // embedded in the test instruction
+ ret
+ int3
+
+Where Zen1/2 flush the BTB entry using the instruction decoder trick
+(test,movabs) Zen3/4 use BTB aliasing. SRSO adds a return sequence
+(srso_safe_ret()) which forces the function return instruction to
+speculate into a trap (UD2). This RET will then mispredict and
+execution will continue at the return site read from the top of the
+stack.
+
+Pick one of three options at boot (evey function can only ever return
+once).
+
+ [ bp: Fixup commit message uarch details and add them in a comment in
+ the code too. Add a comment about the srso_select_mitigation()
+ dependency on retbleed_select_mitigation(). Add moar ifdeffery for
+ 32-bit builds. Add a dummy srso_untrain_ret_alias() definition for
+ 32-bit alternatives needing the symbol. ]
+
+Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121148.842775684@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 5 +++
+ arch/x86/kernel/cpu/bugs.c | 17 ++++++++--
+ arch/x86/kernel/vmlinux.lds.S | 4 +-
+ arch/x86/lib/retpoline.S | 58 +++++++++++++++++++++++++----------
+ tools/objtool/arch/x86/decode.c | 2 -
+ 5 files changed, 64 insertions(+), 22 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -343,9 +343,14 @@ extern void __x86_return_thunk(void);
+ static inline void __x86_return_thunk(void) {}
+ #endif
+
++extern void zen_return_thunk(void);
++extern void srso_return_thunk(void);
++extern void srso_alias_return_thunk(void);
++
+ extern void zen_untrain_ret(void);
+ extern void srso_untrain_ret(void);
+ extern void srso_untrain_ret_alias(void);
++
+ extern void entry_ibpb(void);
+
+ extern void (*x86_return_thunk)(void);
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -167,8 +167,13 @@ void __init cpu_select_mitigations(void)
+ md_clear_select_mitigation();
+ srbds_select_mitigation();
+ l1d_flush_select_mitigation();
+- gds_select_mitigation();
++
++ /*
++ * srso_select_mitigation() depends and must run after
++ * retbleed_select_mitigation().
++ */
+ srso_select_mitigation();
++ gds_select_mitigation();
+ }
+
+ /*
+@@ -1037,6 +1042,9 @@ do_cmd_auto:
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
++ if (IS_ENABLED(CONFIG_RETHUNK))
++ x86_return_thunk = zen_return_thunk;
++
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ pr_err(RETBLEED_UNTRAIN_MSG);
+@@ -2451,10 +2459,13 @@ static void __init srso_select_mitigatio
+ */
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+
+- if (boot_cpu_data.x86 == 0x19)
++ if (boot_cpu_data.x86 == 0x19) {
+ setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
+- else
++ x86_return_thunk = srso_alias_return_thunk;
++ } else {
+ setup_force_cpu_cap(X86_FEATURE_SRSO);
++ x86_return_thunk = srso_return_thunk;
++ }
+ srso_mitigation = SRSO_MITIGATION_SAFE_RET;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -522,8 +522,8 @@ INIT_PER_CPU(irq_stack_backing_store);
+ "fixed_percpu_data is not at start of per-cpu area");
+ #endif
+
+- #ifdef CONFIG_RETHUNK
+-. = ASSERT((__ret & 0x3f) == 0, "__ret not cacheline-aligned");
++#ifdef CONFIG_RETHUNK
++. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned");
+ . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+ #endif
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -151,22 +151,27 @@ SYM_CODE_END(__x86_indirect_jump_thunk_a
+ .section .text.__x86.rethunk_untrain
+
+ SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
++ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ASM_NOP2
+ lfence
+- jmp __x86_return_thunk
++ jmp srso_alias_return_thunk
+ SYM_FUNC_END(srso_untrain_ret_alias)
+ __EXPORT_THUNK(srso_untrain_ret_alias)
+
+ .section .text.__x86.rethunk_safe
++#else
++/* dummy definition for alternatives */
++SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
++SYM_FUNC_END(srso_untrain_ret_alias)
+ #endif
+
+-/* Needs a definition for the __x86_return_thunk alternative below. */
+ SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
+-#ifdef CONFIG_CPU_SRSO
+ add $8, %_ASM_SP
+ UNWIND_HINT_FUNC
+-#endif
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+@@ -174,9 +179,16 @@ SYM_FUNC_END(srso_safe_ret_alias)
+
+ .section .text.__x86.return_thunk
+
++SYM_CODE_START(srso_alias_return_thunk)
++ UNWIND_HINT_FUNC
++ ANNOTATE_NOENDBR
++ call srso_safe_ret_alias
++ ud2
++SYM_CODE_END(srso_alias_return_thunk)
++
+ /*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+- * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
++ * 1) The RET at zen_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+@@ -184,7 +196,7 @@ SYM_FUNC_END(srso_safe_ret_alias)
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+- .skip 64 - (__ret - zen_untrain_ret), 0xcc
++ .skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc
+ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ ANNOTATE_NOENDBR
+ /*
+@@ -192,16 +204,16 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL,
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+- * JMP __x86_return_thunk
++ * JMP zen_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+- * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
++ * zen_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+- * As executed from __x86_return_thunk, this is a plain RET.
++ * As executed from zen_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+@@ -213,13 +225,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL,
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+- * evicted, __x86_return_thunk will suffer Straight Line Speculation
++ * evicted, zen_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
+-SYM_INNER_LABEL(__ret, SYM_L_GLOBAL)
++SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL)
+ ret
+ int3
+-SYM_CODE_END(__ret)
++SYM_CODE_END(zen_return_thunk)
+
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+@@ -230,7 +242,7 @@ SYM_CODE_END(__ret)
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+- jmp __ret
++ jmp zen_return_thunk
+ int3
+ SYM_FUNC_END(zen_untrain_ret)
+ __EXPORT_THUNK(zen_untrain_ret)
+@@ -251,12 +263,19 @@ SYM_START(srso_untrain_ret, SYM_L_GLOBAL
+ ANNOTATE_NOENDBR
+ .byte 0x48, 0xb8
+
++/*
++ * This forces the function return instruction to speculate into a trap
++ * (UD2 in srso_return_thunk() below). This RET will then mispredict
++ * and execution will continue at the return site read from the top of
++ * the stack.
++ */
+ SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+ add $8, %_ASM_SP
+ ret
+ int3
+ int3
+ int3
++ /* end of movabs */
+ lfence
+ call srso_safe_ret
+ ud2
+@@ -264,12 +283,19 @@ SYM_CODE_END(srso_safe_ret)
+ SYM_FUNC_END(srso_untrain_ret)
+ __EXPORT_THUNK(srso_untrain_ret)
+
+-SYM_CODE_START(__x86_return_thunk)
++SYM_CODE_START(srso_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+- ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \
+- "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS
++ call srso_safe_ret
+ ud2
++SYM_CODE_END(srso_return_thunk)
++
++SYM_CODE_START(__x86_return_thunk)
++ UNWIND_HINT_FUNC
++ ANNOTATE_NOENDBR
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+ SYM_CODE_END(__x86_return_thunk)
+ EXPORT_SYMBOL(__x86_return_thunk)
+
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -829,6 +829,6 @@ bool arch_is_rethunk(struct symbol *sym)
+
+ bool arch_is_embedded_insn(struct symbol *sym)
+ {
+- return !strcmp(sym->name, "__ret") ||
++ return !strcmp(sym->name, "zen_return_thunk") ||
+ !strcmp(sym->name, "srso_safe_ret");
+ }
--- /dev/null
+From e7c25c441e9e0fa75b4c83e0b26306b702cfe90d Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:34 +0200
+Subject: x86/cpu: Cleanup the untrain mess
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit e7c25c441e9e0fa75b4c83e0b26306b702cfe90d upstream.
+
+Since there can only be one active return_thunk, there only needs be
+one (matching) untrain_ret. It fundamentally doesn't make sense to
+allow multiple untrain_ret at the same time.
+
+Fold all the 3 different untrain methods into a single (temporary)
+helper stub.
+
+Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121149.042774962@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 19 +++++--------------
+ arch/x86/kernel/cpu/bugs.c | 1 +
+ arch/x86/lib/retpoline.S | 7 +++++++
+ 3 files changed, 13 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -268,9 +268,9 @@
+ .endm
+
+ #ifdef CONFIG_CPU_UNRET_ENTRY
+-#define CALL_ZEN_UNTRAIN_RET "call retbleed_untrain_ret"
++#define CALL_UNTRAIN_RET "call entry_untrain_ret"
+ #else
+-#define CALL_ZEN_UNTRAIN_RET ""
++#define CALL_UNTRAIN_RET ""
+ #endif
+
+ /*
+@@ -289,15 +289,10 @@
+ defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+ VALIDATE_UNRET_END
+ ALTERNATIVE_3 "", \
+- CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
++ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
+ __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+ #endif
+-
+-#ifdef CONFIG_CPU_SRSO
+- ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \
+- "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+-#endif
+ .endm
+
+ .macro UNTRAIN_RET_FROM_CALL
+@@ -305,15 +300,10 @@
+ defined(CONFIG_CALL_DEPTH_TRACKING)
+ VALIDATE_UNRET_END
+ ALTERNATIVE_3 "", \
+- CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
++ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
+ __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
+ #endif
+-
+-#ifdef CONFIG_CPU_SRSO
+- ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \
+- "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+-#endif
+ .endm
+
+
+@@ -351,6 +341,7 @@ extern void retbleed_untrain_ret(void);
+ extern void srso_untrain_ret(void);
+ extern void srso_alias_untrain_ret(void);
+
++extern void entry_untrain_ret(void);
+ extern void entry_ibpb(void);
+
+ extern void (*x86_return_thunk)(void);
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -2458,6 +2458,7 @@ static void __init srso_select_mitigatio
+ * like ftrace, static_call, etc.
+ */
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
++ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+ if (boot_cpu_data.x86 == 0x19) {
+ setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -290,6 +290,13 @@ SYM_CODE_START(srso_return_thunk)
+ ud2
+ SYM_CODE_END(srso_return_thunk)
+
++SYM_FUNC_START(entry_untrain_ret)
++ ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
++ "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
++ "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
++SYM_FUNC_END(entry_untrain_ret)
++__EXPORT_THUNK(entry_untrain_ret)
++
+ SYM_CODE_START(__x86_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
--- /dev/null
+From 77f67119004296a9b2503b377d610e08b08afc2a Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:27 +0200
+Subject: x86/cpu: Fix __x86_return_thunk symbol type
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 77f67119004296a9b2503b377d610e08b08afc2a upstream.
+
+Commit
+
+ fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation")
+
+reimplemented __x86_return_thunk with a mix of SYM_FUNC_START and
+SYM_CODE_END, this is not a sane combination.
+
+Since nothing should ever actually 'CALL' this, make it consistently
+CODE.
+
+Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121148.571027074@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -264,7 +264,9 @@ SYM_CODE_END(srso_safe_ret)
+ SYM_FUNC_END(srso_untrain_ret)
+ __EXPORT_THUNK(srso_untrain_ret)
+
+-SYM_FUNC_START(__x86_return_thunk)
++SYM_CODE_START(__x86_return_thunk)
++ UNWIND_HINT_FUNC
++ ANNOTATE_NOENDBR
+ ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \
+ "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS
+ int3
--- /dev/null
+From af023ef335f13c8b579298fc432daeef609a9e60 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:28 +0200
+Subject: x86/cpu: Fix up srso_safe_ret() and __x86_return_thunk()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit af023ef335f13c8b579298fc432daeef609a9e60 upstream.
+
+ vmlinux.o: warning: objtool: srso_untrain_ret() falls through to next function __x86_return_skl()
+ vmlinux.o: warning: objtool: __x86_return_thunk() falls through to next function __x86_return_skl()
+
+This is because these functions (can) end with CALL, which objtool
+does not consider a terminating instruction. Therefore, replace the
+INT3 instruction (which is a non-fatal trap) with UD2 (which is a
+fatal-trap).
+
+This indicates execution will not continue past this point.
+
+Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121148.637802730@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -259,7 +259,7 @@ SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLO
+ int3
+ lfence
+ call srso_safe_ret
+- int3
++ ud2
+ SYM_CODE_END(srso_safe_ret)
+ SYM_FUNC_END(srso_untrain_ret)
+ __EXPORT_THUNK(srso_untrain_ret)
+@@ -269,7 +269,7 @@ SYM_CODE_START(__x86_return_thunk)
+ ANNOTATE_NOENDBR
+ ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \
+ "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS
+- int3
++ ud2
+ SYM_CODE_END(__x86_return_thunk)
+ EXPORT_SYMBOL(__x86_return_thunk)
+
--- /dev/null
+From 864bcaa38ee44ec6c0e43f79c2d2997b977e26b2 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:35 +0200
+Subject: x86/cpu/kvm: Provide UNTRAIN_RET_VM
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 864bcaa38ee44ec6c0e43f79c2d2997b977e26b2 upstream.
+
+Similar to how it doesn't make sense to have UNTRAIN_RET have two
+untrain calls, it also doesn't make sense for VMEXIT to have an extra
+IBPB call.
+
+This cures VMEXIT doing potentially unret+IBPB or double IBPB.
+Also, the (SEV) VMEXIT case seems to have been overlooked.
+
+Redefine the meaning of the synthetic IBPB flags to:
+
+ - ENTRY_IBPB -- issue IBPB on entry (was: entry + VMEXIT)
+ - IBPB_ON_VMEXIT -- issue IBPB on VMEXIT
+
+And have 'retbleed=ibpb' set *BOTH* feature flags to ensure it retains
+the previous behaviour and issues IBPB on entry+VMEXIT.
+
+The new 'srso=ibpb_vmexit' option only sets IBPB_ON_VMEXIT.
+
+Create UNTRAIN_RET_VM specifically for the VMEXIT case, and have that
+check IBPB_ON_VMEXIT.
+
+All this avoids having the VMEXIT case having to check both ENTRY_IBPB
+and IBPB_ON_VMEXIT and simplifies the alternatives.
+
+Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121149.109557833@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 11 +++++++++++
+ arch/x86/kernel/cpu/bugs.c | 1 +
+ arch/x86/kvm/svm/vmenter.S | 7 ++-----
+ 3 files changed, 14 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -295,6 +295,17 @@
+ #endif
+ .endm
+
++.macro UNTRAIN_RET_VM
++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
++ defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
++ VALIDATE_UNRET_END
++ ALTERNATIVE_3 "", \
++ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
++ "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \
++ __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
++#endif
++.endm
++
+ .macro UNTRAIN_RET_FROM_CALL
+ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+ defined(CONFIG_CALL_DEPTH_TRACKING)
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1054,6 +1054,7 @@ do_cmd_auto:
+
+ case RETBLEED_MITIGATION_IBPB:
+ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
++ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
+ break;
+
+--- a/arch/x86/kvm/svm/vmenter.S
++++ b/arch/x86/kvm/svm/vmenter.S
+@@ -222,10 +222,7 @@ SYM_FUNC_START(__svm_vcpu_run)
+ * because interrupt handlers won't sanitize 'ret' if the return is
+ * from the kernel.
+ */
+- UNTRAIN_RET
+-
+- /* SRSO */
+- ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT
++ UNTRAIN_RET_VM
+
+ /*
+ * Clear all general purpose registers except RSP and RAX to prevent
+@@ -362,7 +359,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
+ * because interrupt handlers won't sanitize RET if the return is
+ * from the kernel.
+ */
+- UNTRAIN_RET
++ UNTRAIN_RET_VM
+
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
--- /dev/null
+From d025b7bac07a6e90b6b98b487f88854ad9247c39 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:32 +0200
+Subject: x86/cpu: Rename original retbleed methods
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d025b7bac07a6e90b6b98b487f88854ad9247c39 upstream.
+
+Rename the original retbleed return thunk and untrain_ret to
+retbleed_return_thunk() and retbleed_untrain_ret().
+
+No functional changes.
+
+Suggested-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121148.909378169@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 8 ++++----
+ arch/x86/kernel/cpu/bugs.c | 2 +-
+ arch/x86/kernel/vmlinux.lds.S | 2 +-
+ arch/x86/lib/retpoline.S | 30 +++++++++++++++---------------
+ tools/objtool/arch/x86/decode.c | 2 +-
+ tools/objtool/check.c | 2 +-
+ 6 files changed, 23 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -268,7 +268,7 @@
+ .endm
+
+ #ifdef CONFIG_CPU_UNRET_ENTRY
+-#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
++#define CALL_ZEN_UNTRAIN_RET "call retbleed_untrain_ret"
+ #else
+ #define CALL_ZEN_UNTRAIN_RET ""
+ #endif
+@@ -278,7 +278,7 @@
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+- * While zen_untrain_ret() doesn't clobber anything but requires stack,
++ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
+ * entry_ibpb() will clobber AX, CX, DX.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+@@ -343,11 +343,11 @@ extern void __x86_return_thunk(void);
+ static inline void __x86_return_thunk(void) {}
+ #endif
+
+-extern void zen_return_thunk(void);
++extern void retbleed_return_thunk(void);
+ extern void srso_return_thunk(void);
+ extern void srso_alias_return_thunk(void);
+
+-extern void zen_untrain_ret(void);
++extern void retbleed_untrain_ret(void);
+ extern void srso_untrain_ret(void);
+ extern void srso_untrain_ret_alias(void);
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1043,7 +1043,7 @@ do_cmd_auto:
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+ if (IS_ENABLED(CONFIG_RETHUNK))
+- x86_return_thunk = zen_return_thunk;
++ x86_return_thunk = retbleed_return_thunk;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -523,7 +523,7 @@ INIT_PER_CPU(irq_stack_backing_store);
+ #endif
+
+ #ifdef CONFIG_RETHUNK
+-. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned");
++. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
+ . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+ #endif
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -188,32 +188,32 @@ SYM_CODE_END(srso_alias_return_thunk)
+
+ /*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+- * 1) The RET at zen_return_thunk must be on a 64 byte boundary, for
++ * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+- * 2) The instruction at zen_untrain_ret must contain, and not
++ * 2) The instruction at retbleed_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+- .skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc
+-SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
++ .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
++SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ ANNOTATE_NOENDBR
+ /*
+- * As executed from zen_untrain_ret, this is:
++ * As executed from retbleed_untrain_ret, this is:
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+- * JMP zen_return_thunk
++ * JMP retbleed_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+- * zen_return_thunk + 1 isn't an instruction boundary at the moment.
++ * retbleed_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+- * As executed from zen_return_thunk, this is a plain RET.
++ * As executed from retbleed_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+@@ -225,13 +225,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL,
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+- * evicted, zen_return_thunk will suffer Straight Line Speculation
++ * evicted, retbleed_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
+-SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL)
++SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL)
+ ret
+ int3
+-SYM_CODE_END(zen_return_thunk)
++SYM_CODE_END(retbleed_return_thunk)
+
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+@@ -242,13 +242,13 @@ SYM_CODE_END(zen_return_thunk)
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+- jmp zen_return_thunk
++ jmp retbleed_return_thunk
+ int3
+-SYM_FUNC_END(zen_untrain_ret)
+-__EXPORT_THUNK(zen_untrain_ret)
++SYM_FUNC_END(retbleed_untrain_ret)
++__EXPORT_THUNK(retbleed_untrain_ret)
+
+ /*
+- * SRSO untraining sequence for Zen1/2, similar to zen_untrain_ret()
++ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+ * above. On kernel entry, srso_untrain_ret() is executed which is a
+ *
+ * movabs $0xccccccc308c48348,%rax
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -829,6 +829,6 @@ bool arch_is_rethunk(struct symbol *sym)
+
+ bool arch_is_embedded_insn(struct symbol *sym)
+ {
+- return !strcmp(sym->name, "zen_return_thunk") ||
++ return !strcmp(sym->name, "retbleed_return_thunk") ||
+ !strcmp(sym->name, "srso_safe_ret");
+ }
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1655,7 +1655,7 @@ static int add_jump_destinations(struct
+ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
+
+ /*
+- * This is a special case for zen_untrain_ret().
++ * This is a special case for retbleed_untrain_ret().
+ * It jumps to __x86_return_thunk(), but objtool
+ * can't find the thunk's starting RET
+ * instruction, because the RET is also in the
--- /dev/null
+From 42be649dd1f2eee6b1fb185f1a231b9494cf095f Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Aug 2023 13:44:33 +0200
+Subject: x86/cpu: Rename srso_(.*)_alias to srso_alias_\1
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 42be649dd1f2eee6b1fb185f1a231b9494cf095f upstream.
+
+For a more consistent namespace.
+
+ [ bp: Fixup names in the doc too. ]
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814121148.976236447@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/hw-vuln/srso.rst | 4 ++--
+ arch/x86/include/asm/nospec-branch.h | 6 +++---
+ arch/x86/kernel/vmlinux.lds.S | 8 ++++----
+ arch/x86/lib/retpoline.S | 26 +++++++++++++-------------
+ 4 files changed, 22 insertions(+), 22 deletions(-)
+
+--- a/Documentation/admin-guide/hw-vuln/srso.rst
++++ b/Documentation/admin-guide/hw-vuln/srso.rst
+@@ -124,8 +124,8 @@ sequence.
+ To ensure the safety of this mitigation, the kernel must ensure that the
+ safe return sequence is itself free from attacker interference. In Zen3
+ and Zen4, this is accomplished by creating a BTB alias between the
+-untraining function srso_untrain_ret_alias() and the safe return
+-function srso_safe_ret_alias() which results in evicting a potentially
++untraining function srso_alias_untrain_ret() and the safe return
++function srso_alias_safe_ret() which results in evicting a potentially
+ poisoned BTB entry and using that safe one for all function returns.
+
+ In older Zen1 and Zen2, this is accomplished using a reinterpretation
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -296,7 +296,7 @@
+
+ #ifdef CONFIG_CPU_SRSO
+ ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \
+- "call srso_untrain_ret_alias", X86_FEATURE_SRSO_ALIAS
++ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+ #endif
+ .endm
+
+@@ -312,7 +312,7 @@
+
+ #ifdef CONFIG_CPU_SRSO
+ ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \
+- "call srso_untrain_ret_alias", X86_FEATURE_SRSO_ALIAS
++ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+ #endif
+ .endm
+
+@@ -349,7 +349,7 @@ extern void srso_alias_return_thunk(void
+
+ extern void retbleed_untrain_ret(void);
+ extern void srso_untrain_ret(void);
+-extern void srso_untrain_ret_alias(void);
++extern void srso_alias_untrain_ret(void);
+
+ extern void entry_ibpb(void);
+
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -149,10 +149,10 @@ SECTIONS
+
+ #ifdef CONFIG_CPU_SRSO
+ /*
+- * See the comment above srso_untrain_ret_alias()'s
++ * See the comment above srso_alias_untrain_ret()'s
+ * definition.
+ */
+- . = srso_untrain_ret_alias | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
++ . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
+ *(.text.__x86.rethunk_safe)
+ #endif
+ ALIGN_ENTRY_TEXT_END
+@@ -538,8 +538,8 @@ INIT_PER_CPU(irq_stack_backing_store);
+ * Instead do: (A | B) - (A & B) in order to compute the XOR
+ * of the two function addresses:
+ */
+-. = ASSERT(((ABSOLUTE(srso_untrain_ret_alias) | srso_safe_ret_alias) -
+- (ABSOLUTE(srso_untrain_ret_alias) & srso_safe_ret_alias)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
++. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) -
++ (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
+ "SRSO function pair won't alias");
+ #endif
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -133,56 +133,56 @@ SYM_CODE_END(__x86_indirect_jump_thunk_a
+ #ifdef CONFIG_RETHUNK
+
+ /*
+- * srso_untrain_ret_alias() and srso_safe_ret_alias() are placed at
++ * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
+ * special addresses:
+ *
+- * - srso_untrain_ret_alias() is 2M aligned
+- * - srso_safe_ret_alias() is also in the same 2M page but bits 2, 8, 14
++ * - srso_alias_untrain_ret() is 2M aligned
++ * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
+ * and 20 in its virtual address are set (while those bits in the
+- * srso_untrain_ret_alias() function are cleared).
++ * srso_alias_untrain_ret() function are cleared).
+ *
+ * This guarantees that those two addresses will alias in the branch
+ * target buffer of Zen3/4 generations, leading to any potential
+ * poisoned entries at that BTB slot to get evicted.
+ *
+- * As a result, srso_safe_ret_alias() becomes a safe return.
++ * As a result, srso_alias_safe_ret() becomes a safe return.
+ */
+ #ifdef CONFIG_CPU_SRSO
+ .section .text.__x86.rethunk_untrain
+
+-SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
++SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ASM_NOP2
+ lfence
+ jmp srso_alias_return_thunk
+-SYM_FUNC_END(srso_untrain_ret_alias)
+-__EXPORT_THUNK(srso_untrain_ret_alias)
++SYM_FUNC_END(srso_alias_untrain_ret)
++__EXPORT_THUNK(srso_alias_untrain_ret)
+
+ .section .text.__x86.rethunk_safe
+ #else
+ /* dummy definition for alternatives */
+-SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
++SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+-SYM_FUNC_END(srso_untrain_ret_alias)
++SYM_FUNC_END(srso_alias_untrain_ret)
+ #endif
+
+-SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
++SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ add $8, %_ASM_SP
+ UNWIND_HINT_FUNC
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+-SYM_FUNC_END(srso_safe_ret_alias)
++SYM_FUNC_END(srso_alias_safe_ret)
+
+ .section .text.__x86.return_thunk
+
+ SYM_CODE_START(srso_alias_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+- call srso_safe_ret_alias
++ call srso_alias_safe_ret
+ ud2
+ SYM_CODE_END(srso_alias_return_thunk)
+
--- /dev/null
+From ba5ca5e5e6a1d55923e88b4a83da452166f5560e Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 11 Aug 2023 08:52:55 -0700
+Subject: x86/retpoline: Don't clobber RFLAGS during srso_safe_ret()
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ba5ca5e5e6a1d55923e88b4a83da452166f5560e upstream.
+
+Use LEA instead of ADD when adjusting %rsp in srso_safe_ret{,_alias}()
+so as to avoid clobbering flags. Drop one of the INT3 instructions to
+account for the LEA consuming one more byte than the ADD.
+
+KVM's emulator makes indirect calls into a jump table of sorts, where
+the destination of each call is a small blob of code that performs fast
+emulation by executing the target instruction with fixed operands.
+
+E.g. to emulate ADC, fastop() invokes adcb_al_dl():
+
+ adcb_al_dl:
+ <+0>: adc %dl,%al
+ <+2>: jmp <__x86_return_thunk>
+
+A major motivation for doing fast emulation is to leverage the CPU to
+handle consumption and manipulation of arithmetic flags, i.e. RFLAGS is
+both an input and output to the target of the call. fastop() collects
+the RFLAGS result by pushing RFLAGS onto the stack and popping them back
+into a variable (held in %rdi in this case):
+
+ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
+
+ <+71>: mov 0xc0(%r8),%rdx
+ <+78>: mov 0x100(%r8),%rcx
+ <+85>: push %rdi
+ <+86>: popf
+ <+87>: call *%rsi
+ <+89>: nop
+ <+90>: nop
+ <+91>: nop
+ <+92>: pushf
+ <+93>: pop %rdi
+
+and then propagating the arithmetic flags into the vCPU's emulator state:
+
+ ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
+
+ <+64>: and $0xfffffffffffff72a,%r9
+ <+94>: and $0x8d5,%edi
+ <+109>: or %rdi,%r9
+ <+122>: mov %r9,0x10(%r8)
+
+The failures can be most easily reproduced by running the "emulator"
+test in KVM-Unit-Tests.
+
+If you're feeling a bit of deja vu, see commit b63f20a778c8
+("x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386").
+
+In addition, this breaks booting of clang-compiled guest on
+a gcc-compiled host where the host contains the %rsp-modifying SRSO
+mitigations.
+
+ [ bp: Massage commit message, extend, remove addresses. ]
+
+Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation")
+Closes: https://lore.kernel.org/all/de474347-122d-54cd-eabf-9dcc95ab9eae@amd.com
+Reported-by: Srikanth Aithal <sraithal@amd.com>
+Reported-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: Nathan Chancellor <nathan@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/20230810013334.GA5354@dev-arch.thelio-3990X/
+Link: https://lore.kernel.org/r/20230811155255.250835-1-seanjc@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -170,7 +170,7 @@ SYM_FUNC_END(srso_alias_untrain_ret)
+ #endif
+
+ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+- add $8, %_ASM_SP
++ lea 8(%_ASM_SP), %_ASM_SP
+ UNWIND_HINT_FUNC
+ ANNOTATE_UNRET_SAFE
+ ret
+@@ -270,7 +270,7 @@ __EXPORT_THUNK(retbleed_untrain_ret)
+ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+ * above. On kernel entry, srso_untrain_ret() is executed which is a
+ *
+- * movabs $0xccccccc308c48348,%rax
++ * movabs $0xccccc30824648d48,%rax
+ *
+ * and when the return thunk executes the inner label srso_safe_ret()
+ * later, it is a stack manipulation and a RET which is mispredicted and
+@@ -289,11 +289,10 @@ SYM_START(srso_untrain_ret, SYM_L_GLOBAL
+ * the stack.
+ */
+ SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+- add $8, %_ASM_SP
++ lea 8(%_ASM_SP), %_ASM_SP
+ ret
+ int3
+ int3
+- int3
+ /* end of movabs */
+ lfence
+ call srso_safe_ret
--- /dev/null
+From 79cd2a11224eab86d6673fe8a11d2046ae9d2757 Mon Sep 17 00:00:00 2001
+From: Petr Pavlu <petr.pavlu@suse.com>
+Date: Tue, 11 Jul 2023 11:19:51 +0200
+Subject: x86/retpoline,kprobes: Fix position of thunk sections with CONFIG_LTO_CLANG
+
+From: Petr Pavlu <petr.pavlu@suse.com>
+
+commit 79cd2a11224eab86d6673fe8a11d2046ae9d2757 upstream.
+
+The linker script arch/x86/kernel/vmlinux.lds.S matches the thunk
+sections ".text.__x86.*" from arch/x86/lib/retpoline.S as follows:
+
+ .text {
+ [...]
+ TEXT_TEXT
+ [...]
+ __indirect_thunk_start = .;
+ *(.text.__x86.*)
+ __indirect_thunk_end = .;
+ [...]
+ }
+
+Macro TEXT_TEXT references TEXT_MAIN which normally expands to only
+".text". However, with CONFIG_LTO_CLANG, TEXT_MAIN becomes
+".text .text.[0-9a-zA-Z_]*" which wrongly matches also the thunk
+sections. The output layout is then different than expected. For
+instance, the currently defined range [__indirect_thunk_start,
+__indirect_thunk_end] becomes empty.
+
+Prevent the problem by using ".." as the first separator, for example,
+".text..__x86.indirect_thunk". This pattern is utilized by other
+explicit section names which start with one of the standard prefixes,
+such as ".text" or ".data", and that need to be individually selected in
+the linker script.
+
+ [ nathan: Fix conflicts with SRSO and fold in fix issue brought up by
+ Andrew Cooper in post-review:
+ https://lore.kernel.org/20230803230323.1478869-1-andrew.cooper3@citrix.com ]
+
+Fixes: dc5723b02e52 ("kbuild: add support for Clang LTO")
+Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230711091952.27944-2-petr.pavlu@suse.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/vmlinux.lds.S | 8 ++++----
+ arch/x86/lib/retpoline.S | 8 ++++----
+ tools/objtool/check.c | 2 +-
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -134,15 +134,15 @@ SECTIONS
+ SOFTIRQENTRY_TEXT
+ #ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+- *(.text.__x86.indirect_thunk)
+- *(.text.__x86.return_thunk)
++ *(.text..__x86.indirect_thunk)
++ *(.text..__x86.return_thunk)
+ __indirect_thunk_end = .;
+ #endif
+ STATIC_CALL_TEXT
+
+ ALIGN_ENTRY_TEXT_BEGIN
+ #ifdef CONFIG_CPU_SRSO
+- *(.text.__x86.rethunk_untrain)
++ *(.text..__x86.rethunk_untrain)
+ #endif
+
+ ENTRY_TEXT
+@@ -153,7 +153,7 @@ SECTIONS
+ * definition.
+ */
+ . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
+- *(.text.__x86.rethunk_safe)
++ *(.text..__x86.rethunk_safe)
+ #endif
+ ALIGN_ENTRY_TEXT_END
+ *(.gnu.warning)
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -13,7 +13,7 @@
+ #include <asm/frame.h>
+ #include <asm/nops.h>
+
+- .section .text.__x86.indirect_thunk
++ .section .text..__x86.indirect_thunk
+
+
+ .macro POLINE reg
+@@ -148,7 +148,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_a
+ * As a result, srso_alias_safe_ret() becomes a safe return.
+ */
+ #ifdef CONFIG_CPU_SRSO
+- .section .text.__x86.rethunk_untrain
++ .section .text..__x86.rethunk_untrain
+
+ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ UNWIND_HINT_FUNC
+@@ -159,7 +159,7 @@ SYM_START(srso_alias_untrain_ret, SYM_L_
+ SYM_FUNC_END(srso_alias_untrain_ret)
+ __EXPORT_THUNK(srso_alias_untrain_ret)
+
+- .section .text.__x86.rethunk_safe
++ .section .text..__x86.rethunk_safe
+ #else
+ /* dummy definition for alternatives */
+ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+@@ -177,7 +177,7 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLO
+ int3
+ SYM_FUNC_END(srso_alias_safe_ret)
+
+- .section .text.__x86.return_thunk
++ .section .text..__x86.return_thunk
+
+ SYM_CODE_START(srso_alias_return_thunk)
+ UNWIND_HINT_FUNC
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -429,7 +429,7 @@ static int decode_instructions(struct ob
+ if (!strcmp(sec->name, ".noinstr.text") ||
+ !strcmp(sec->name, ".entry.text") ||
+ !strcmp(sec->name, ".cpuidle.text") ||
+- !strncmp(sec->name, ".text.__x86.", 12))
++ !strncmp(sec->name, ".text..__x86.", 13))
+ sec->noinstr = true;
+
+ /*
--- /dev/null
+From 833fd800bf56b74d39d71d3f5936dffb3e0409c6 Mon Sep 17 00:00:00 2001
+From: Petr Pavlu <petr.pavlu@suse.com>
+Date: Tue, 11 Jul 2023 11:19:52 +0200
+Subject: x86/retpoline,kprobes: Skip optprobe check for indirect jumps with retpolines and IBT
+
+From: Petr Pavlu <petr.pavlu@suse.com>
+
+commit 833fd800bf56b74d39d71d3f5936dffb3e0409c6 upstream.
+
+The kprobes optimization check can_optimize() calls
+insn_is_indirect_jump() to detect indirect jump instructions in
+a target function. If any is found, creating an optprobe is disallowed
+in the function because the jump could be from a jump table and could
+potentially land in the middle of the target optprobe.
+
+With retpolines, insn_is_indirect_jump() additionally looks for calls to
+indirect thunks which the compiler potentially used to replace original
+jumps. This extra check is however unnecessary because jump tables are
+disabled when the kernel is built with retpolines. The same is currently
+the case with IBT.
+
+Based on this observation, remove the logic to look for calls to
+indirect thunks and skip the check for indirect jumps altogether if the
+kernel is built with retpolines or IBT. Remove subsequently the symbols
+__indirect_thunk_start and __indirect_thunk_end which are no longer
+needed.
+
+Dropping this logic indirectly fixes a problem where the range
+[__indirect_thunk_start, __indirect_thunk_end] wrongly included also the
+return thunk. It caused that machines which used the return thunk as
+a mitigation and didn't have it patched by any alternative ended up not
+being able to use optprobes in any regular function.
+
+Fixes: 0b53c374b9ef ("x86/retpoline: Use -mfunction-return")
+Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Suggested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Link: https://lore.kernel.org/r/20230711091952.27944-3-petr.pavlu@suse.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 3 --
+ arch/x86/kernel/kprobes/opt.c | 40 ++++++++++++++---------------------
+ arch/x86/kernel/vmlinux.lds.S | 2 -
+ tools/perf/util/thread-stack.c | 4 ---
+ 4 files changed, 17 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -482,9 +482,6 @@ enum ssb_mitigation {
+ SPEC_STORE_BYPASS_SECCOMP,
+ };
+
+-extern char __indirect_thunk_start[];
+-extern char __indirect_thunk_end[];
+-
+ static __always_inline
+ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+ {
+--- a/arch/x86/kernel/kprobes/opt.c
++++ b/arch/x86/kernel/kprobes/opt.c
+@@ -226,7 +226,7 @@ static int copy_optimized_instructions(u
+ }
+
+ /* Check whether insn is indirect jump */
+-static int __insn_is_indirect_jump(struct insn *insn)
++static int insn_is_indirect_jump(struct insn *insn)
+ {
+ return ((insn->opcode.bytes[0] == 0xff &&
+ (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+@@ -260,26 +260,6 @@ static int insn_jump_into_range(struct i
+ return (start <= target && target <= start + len);
+ }
+
+-static int insn_is_indirect_jump(struct insn *insn)
+-{
+- int ret = __insn_is_indirect_jump(insn);
+-
+-#ifdef CONFIG_RETPOLINE
+- /*
+- * Jump to x86_indirect_thunk_* is treated as an indirect jump.
+- * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
+- * older gcc may use indirect jump. So we add this check instead of
+- * replace indirect-jump check.
+- */
+- if (!ret)
+- ret = insn_jump_into_range(insn,
+- (unsigned long)__indirect_thunk_start,
+- (unsigned long)__indirect_thunk_end -
+- (unsigned long)__indirect_thunk_start);
+-#endif
+- return ret;
+-}
+-
+ /* Decode whole function to ensure any instructions don't jump into target */
+ static int can_optimize(unsigned long paddr)
+ {
+@@ -334,9 +314,21 @@ static int can_optimize(unsigned long pa
+ /* Recover address */
+ insn.kaddr = (void *)addr;
+ insn.next_byte = (void *)(addr + insn.length);
+- /* Check any instructions don't jump into target */
+- if (insn_is_indirect_jump(&insn) ||
+- insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
++ /*
++ * Check any instructions don't jump into target, indirectly or
++ * directly.
++ *
++ * The indirect case is present to handle a code with jump
++ * tables. When the kernel uses retpolines, the check should in
++ * theory additionally look for jumps to indirect thunks.
++ * However, the kernel built with retpolines or IBT has jump
++ * tables disabled so the check can be skipped altogether.
++ */
++ if (!IS_ENABLED(CONFIG_RETPOLINE) &&
++ !IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
++ insn_is_indirect_jump(&insn))
++ return 0;
++ if (insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
+ DISP32_SIZE))
+ return 0;
+ addr += insn.length;
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -133,10 +133,8 @@ SECTIONS
+ KPROBES_TEXT
+ SOFTIRQENTRY_TEXT
+ #ifdef CONFIG_RETPOLINE
+- __indirect_thunk_start = .;
+ *(.text..__x86.indirect_thunk)
+ *(.text..__x86.return_thunk)
+- __indirect_thunk_end = .;
+ #endif
+ STATIC_CALL_TEXT
+
+--- a/tools/perf/util/thread-stack.c
++++ b/tools/perf/util/thread-stack.c
+@@ -1037,9 +1037,7 @@ static int thread_stack__trace_end(struc
+
+ static bool is_x86_retpoline(const char *name)
+ {
+- const char *p = strstr(name, "__x86_indirect_thunk_");
+-
+- return p == name || !strcmp(name, "__indirect_thunk_start");
++ return strstr(name, "__x86_indirect_thunk_") == name;
+ }
+
+ /*
--- /dev/null
+From 6405b72e8d17bd1875a56ae52d23ec3cd51b9d66 Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Tue, 15 Aug 2023 11:53:13 +0200
+Subject: x86/srso: Correct the mitigation status when SMT is disabled
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit 6405b72e8d17bd1875a56ae52d23ec3cd51b9d66 upstream.
+
+Specify how is SRSO mitigated when SMT is disabled. Also, correct the
+SMT check for that.
+
+Fixes: e9fbc47b818b ("x86/srso: Disable the mitigation on unaffected configurations")
+Suggested-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Link: https://lore.kernel.org/r/20230814200813.p5czl47zssuej7nv@treble
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -2428,8 +2428,7 @@ static void __init srso_select_mitigatio
+ * Zen1/2 with SMT off aren't vulnerable after the right
+ * IBPB microcode has been applied.
+ */
+- if ((boot_cpu_data.x86 < 0x19) &&
+- (!cpu_smt_possible() || (cpu_smt_control == CPU_SMT_DISABLED))) {
++ if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) {
+ setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
+ return;
+ }
+@@ -2719,7 +2718,7 @@ static ssize_t gds_show_state(char *buf)
+ static ssize_t srso_show_state(char *buf)
+ {
+ if (boot_cpu_has(X86_FEATURE_SRSO_NO))
+- return sysfs_emit(buf, "Not affected\n");
++ return sysfs_emit(buf, "Mitigation: SMT disabled\n");
+
+ return sysfs_emit(buf, "%s%s\n",
+ srso_strings[srso_mitigation],
--- /dev/null
+From e9fbc47b818b964ddff5df5b2d5c0f5f32f4a147 Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Sun, 13 Aug 2023 12:39:34 +0200
+Subject: x86/srso: Disable the mitigation on unaffected configurations
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit e9fbc47b818b964ddff5df5b2d5c0f5f32f4a147 upstream.
+
+Skip the srso cmd line parsing which is not needed on Zen1/2 with SMT
+disabled and with the proper microcode applied (latter should be the
+case anyway) as those are not affected.
+
+Fixes: 5a15d8348881 ("x86/srso: Tie SBPB bit setting to microcode patch detection")
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230813104517.3346-1-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -2429,8 +2429,10 @@ static void __init srso_select_mitigatio
+ * IBPB microcode has been applied.
+ */
+ if ((boot_cpu_data.x86 < 0x19) &&
+- (!cpu_smt_possible() || (cpu_smt_control == CPU_SMT_DISABLED)))
++ (!cpu_smt_possible() || (cpu_smt_control == CPU_SMT_DISABLED))) {
+ setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
++ return;
++ }
+ }
+
+ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
+@@ -2716,6 +2718,9 @@ static ssize_t gds_show_state(char *buf)
+
+ static ssize_t srso_show_state(char *buf)
+ {
++ if (boot_cpu_has(X86_FEATURE_SRSO_NO))
++ return sysfs_emit(buf, "Not affected\n");
++
+ return sysfs_emit(buf, "%s%s\n",
+ srso_strings[srso_mitigation],
+ (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode"));
--- /dev/null
+From 9dbd23e42ff0b10c9b02c9e649c76e5228241a8e Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Mon, 14 Aug 2023 21:29:50 +0200
+Subject: x86/srso: Explain the untraining sequences a bit more
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit 9dbd23e42ff0b10c9b02c9e649c76e5228241a8e upstream.
+
+The goal is to eventually have a proper documentation about all this.
+
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230814164447.GFZNpZ/64H4lENIe94@fat_crate.local
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/retpoline.S | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -187,6 +187,25 @@ SYM_CODE_START(srso_alias_return_thunk)
+ SYM_CODE_END(srso_alias_return_thunk)
+
+ /*
++ * Some generic notes on the untraining sequences:
++ *
++ * They are interchangeable when it comes to flushing potentially wrong
++ * RET predictions from the BTB.
++ *
++ * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
++ * Retbleed sequence because the return sequence done there
++ * (srso_safe_ret()) is longer and the return sequence must fully nest
++ * (end before) the untraining sequence. Therefore, the untraining
++ * sequence must fully overlap the return sequence.
++ *
++ * Regarding alignment - the instructions which need to be untrained,
++ * must all start at a cacheline boundary for Zen1/2 generations. That
++ * is, instruction sequences starting at srso_safe_ret() and
++ * the respective instruction sequences at retbleed_return_thunk()
++ * must start at a cacheline boundary.
++ */
++
++/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
--- /dev/null
+From 54097309620ef0dc2d7083783dc521c6a5fef957 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 16 Aug 2023 12:44:19 +0200
+Subject: x86/static_call: Fix __static_call_fixup()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 54097309620ef0dc2d7083783dc521c6a5fef957 upstream.
+
+Christian reported spurious module load crashes after some of Song's
+module memory layout patches.
+
+Turns out that if the very last instruction on the very last page of the
+module is a 'JMP __x86_return_thunk' then __static_call_fixup() will
+trip a fault and die.
+
+And while the module rework made this slightly more likely to happen,
+it's always been possible.
+
+Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding")
+Reported-by: Christian Bricart <christian@bricart.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Link: https://lkml.kernel.org/r/20230816104419.GA982867@hirez.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/static_call.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -186,6 +186,19 @@ EXPORT_SYMBOL_GPL(arch_static_call_trans
+ */
+ bool __static_call_fixup(void *tramp, u8 op, void *dest)
+ {
++ unsigned long addr = (unsigned long)tramp;
++ /*
++ * Not all .return_sites are a static_call trampoline (most are not).
++ * Check if the 3 bytes after the return are still kernel text, if not,
++ * then this definitely is not a trampoline and we need not worry
++ * further.
++ *
++ * This avoids the memcmp() below tripping over pagefaults etc..
++ */
++ if (((addr >> PAGE_SHIFT) != ((addr + 7) >> PAGE_SHIFT)) &&
++ !kernel_text_address(addr + 7))
++ return false;
++
+ if (memcmp(tramp+5, tramp_ud, 3)) {
+ /* Not a trampoline site, not our problem. */
+ return false;