From 846e055660108c96dc19f142b9c2bfe43ac1d239 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Oct 2023 19:47:03 +0200 Subject: [PATCH] 6.5-stable patches added patches: audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch mptcp-more-conservative-check-for-zero-probes.patch netfilter-nft_payload-fix-wrong-mac-header-matching.patch revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch x86-kvm-svm-always-update-the-x2avic-msr-interception.patch x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch x86-sev-disable-mmio-emulation-from-user-mode.patch --- ...gers-audit-reference-count-underflow.patch | 165 ++++++++++ ...tfs3-fix-deadlock-in-mark_as_free_ex.patch | 41 +++ ...-fix-oob-read-in-ntfs_init_from_boot.patch | 41 +++ ...out-of-bounds-caused-by-ntfs_list_ea.patch | 64 ++++ ...ll-pointer-dereference-in-hdr_find_e.patch | 53 +++ ...ift-out-of-bounds-in-ntfs_fill_super.patch | 73 +++++ ...ed-xfeatures-only-at-kvm_get_xsave-2.patch | 115 +++++++ ...nter-value-to-allowed-width-on-write.patch | 92 ++++++ ...ches-smaller-than-arch_slab_minalign.patch | 73 +++++ ...e-conservative-check-for-zero-probes.patch | 93 ++++++ ...ayload-fix-wrong-mac-header-matching.patch | 37 +++ ...m-enable-runtime-pm-support-for-7560.patch | 306 ++++++++++++++++++ ...tcp-join-no-rst-when-rm-subflow-addr.patch | 133 ++++++++ queue-6.5/series | 21 ++ ...l-constraints-for-backlog-coalescing.patch | 47 +++ ...features-when-copying-to-uabi-buffer.patch | 156 +++++++++ ...-for-invalid-ipi-vector-interception.patch | 61 ++++ ...s-update-the-x2avic-msr-interception.patch | 55 ++++ ...-avic-inhibition-in-svm_leave_nested.patch | 40 +++ ...-space-ioio-pointing-to-kernel-space.patch | 95 ++++++ ...-for-ioio-exceptions-from-user-space.patch | 172 ++++++++++ ...isable-mmio-emulation-from-user-mode.patch | 42 +++ 22 files changed, 1975 insertions(+) create mode 100644 queue-6.5/audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch create mode 100644 queue-6.5/fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch create mode 100644 queue-6.5/fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch create mode 100644 queue-6.5/fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch create mode 100644 queue-6.5/fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch create mode 100644 queue-6.5/fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch create mode 100644 queue-6.5/kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch create mode 100644 queue-6.5/kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch create mode 100644 queue-6.5/mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch create mode 100644 queue-6.5/mptcp-more-conservative-check-for-zero-probes.patch create mode 100644 queue-6.5/netfilter-nft_payload-fix-wrong-mac-header-matching.patch create mode 100644 queue-6.5/revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch create mode 100644 queue-6.5/selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch create mode 100644 queue-6.5/tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch create mode 100644 queue-6.5/x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch create mode 100644 queue-6.5/x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch create mode 100644 queue-6.5/x86-kvm-svm-always-update-the-x2avic-msr-interception.patch create mode 100644 queue-6.5/x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch create mode 100644 queue-6.5/x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch create mode 100644 queue-6.5/x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch create mode 100644 queue-6.5/x86-sev-disable-mmio-emulation-from-user-mode.patch diff --git a/queue-6.5/audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch b/queue-6.5/audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch new file mode 100644 index 00000000000..cfc5eb21928 --- /dev/null +++ b/queue-6.5/audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch @@ -0,0 +1,165 @@ +From 03adc61edad49e1bbecfb53f7ea5d78f398fe368 Mon Sep 17 00:00:00 2001 +From: Dan Clash +Date: Thu, 12 Oct 2023 14:55:18 -0700 +Subject: audit,io_uring: io_uring openat triggers audit reference count underflow + +From: Dan Clash + +commit 03adc61edad49e1bbecfb53f7ea5d78f398fe368 upstream. + +An io_uring openat operation can update an audit reference count +from multiple threads resulting in the call trace below. + +A call to io_uring_submit() with a single openat op with a flag of +IOSQE_ASYNC results in the following reference count updates. + +These first part of the system call performs two increments that do not race. + +do_syscall_64() + __do_sys_io_uring_enter() + io_submit_sqes() + io_openat_prep() + __io_openat_prep() + getname() + getname_flags() /* update 1 (increment) */ + __audit_getname() /* update 2 (increment) */ + +The openat op is queued to an io_uring worker thread which starts the +opportunity for a race. The system call exit performs one decrement. + +do_syscall_64() + syscall_exit_to_user_mode() + syscall_exit_to_user_mode_prepare() + __audit_syscall_exit() + audit_reset_context() + putname() /* update 3 (decrement) */ + +The io_uring worker thread performs one increment and two decrements. +These updates can race with the system call decrement. + +io_wqe_worker() + io_worker_handle_work() + io_wq_submit_work() + io_issue_sqe() + io_openat() + io_openat2() + do_filp_open() + path_openat() + __audit_inode() /* update 4 (increment) */ + putname() /* update 5 (decrement) */ + __audit_uring_exit() + audit_reset_context() + putname() /* update 6 (decrement) */ + +The fix is to change the refcnt member of struct audit_names +from int to atomic_t. + +kernel BUG at fs/namei.c:262! +Call Trace: +... + ? putname+0x68/0x70 + audit_reset_context.part.0.constprop.0+0xe1/0x300 + __audit_uring_exit+0xda/0x1c0 + io_issue_sqe+0x1f3/0x450 + ? lock_timer_base+0x3b/0xd0 + io_wq_submit_work+0x8d/0x2b0 + ? __try_to_del_timer_sync+0x67/0xa0 + io_worker_handle_work+0x17c/0x2b0 + io_wqe_worker+0x10a/0x350 + +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/lkml/MW2PR2101MB1033FFF044A258F84AEAA584F1C9A@MW2PR2101MB1033.namprd21.prod.outlook.com/ +Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") +Signed-off-by: Dan Clash +Link: https://lore.kernel.org/r/20231012215518.GA4048@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net +Reviewed-by: Jens Axboe +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/namei.c | 9 +++++---- + include/linux/fs.h | 2 +- + kernel/auditsc.c | 8 ++++---- + 3 files changed, 10 insertions(+), 9 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -188,7 +188,7 @@ getname_flags(const char __user *filenam + } + } + +- result->refcnt = 1; ++ atomic_set(&result->refcnt, 1); + /* The empty path is special. */ + if (unlikely(!len)) { + if (empty) +@@ -249,7 +249,7 @@ getname_kernel(const char * filename) + memcpy((char *)result->name, filename, len); + result->uptr = NULL; + result->aname = NULL; +- result->refcnt = 1; ++ atomic_set(&result->refcnt, 1); + audit_getname(result); + + return result; +@@ -261,9 +261,10 @@ void putname(struct filename *name) + if (IS_ERR(name)) + return; + +- BUG_ON(name->refcnt <= 0); ++ if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) ++ return; + +- if (--name->refcnt > 0) ++ if (!atomic_dec_and_test(&name->refcnt)) + return; + + if (name->name != name->iname) { +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2318,7 +2318,7 @@ struct audit_names; + struct filename { + const char *name; /* pointer to actual string */ + const __user char *uptr; /* original userland pointer */ +- int refcnt; ++ atomic_t refcnt; + struct audit_names *aname; + const char iname[]; + }; +--- a/kernel/auditsc.c ++++ b/kernel/auditsc.c +@@ -2210,7 +2210,7 @@ __audit_reusename(const __user char *upt + if (!n->name) + continue; + if (n->name->uptr == uptr) { +- n->name->refcnt++; ++ atomic_inc(&n->name->refcnt); + return n->name; + } + } +@@ -2239,7 +2239,7 @@ void __audit_getname(struct filename *na + n->name = name; + n->name_len = AUDIT_NAME_FULL; + name->aname = n; +- name->refcnt++; ++ atomic_inc(&name->refcnt); + } + + static inline int audit_copy_fcaps(struct audit_names *name, +@@ -2371,7 +2371,7 @@ out_alloc: + return; + if (name) { + n->name = name; +- name->refcnt++; ++ atomic_inc(&name->refcnt); + } + + out: +@@ -2498,7 +2498,7 @@ void __audit_inode_child(struct inode *p + if (found_parent) { + found_child->name = found_parent->name; + found_child->name_len = AUDIT_NAME_FULL; +- found_child->name->refcnt++; ++ atomic_inc(&found_child->name->refcnt); + } + } + diff --git a/queue-6.5/fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch b/queue-6.5/fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch new file mode 100644 index 00000000000..7e7895a03b4 --- /dev/null +++ b/queue-6.5/fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch @@ -0,0 +1,41 @@ +From bfbe5b31caa74ab97f1784fe9ade5f45e0d3de91 Mon Sep 17 00:00:00 2001 +From: Konstantin Komarov +Date: Fri, 30 Jun 2023 16:22:53 +0400 +Subject: fs/ntfs3: fix deadlock in mark_as_free_ex + +From: Konstantin Komarov + +commit bfbe5b31caa74ab97f1784fe9ade5f45e0d3de91 upstream. + +Reported-by: syzbot+e94d98936a0ed08bde43@syzkaller.appspotmail.com +Signed-off-by: Konstantin Komarov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ntfs3/fsntfs.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/ntfs3/fsntfs.c ++++ b/fs/ntfs3/fsntfs.c +@@ -2461,10 +2461,12 @@ void mark_as_free_ex(struct ntfs_sb_info + { + CLST end, i, zone_len, zlen; + struct wnd_bitmap *wnd = &sbi->used.bitmap; ++ bool dirty = false; + + down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); + if (!wnd_is_used(wnd, lcn, len)) { +- ntfs_set_state(sbi, NTFS_DIRTY_ERROR); ++ /* mark volume as dirty out of wnd->rw_lock */ ++ dirty = true; + + end = lcn + len; + len = 0; +@@ -2518,6 +2520,8 @@ void mark_as_free_ex(struct ntfs_sb_info + + out: + up_write(&wnd->rw_lock); ++ if (dirty) ++ ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + } + + /* diff --git a/queue-6.5/fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch b/queue-6.5/fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch new file mode 100644 index 00000000000..0ea4f1a0d45 --- /dev/null +++ b/queue-6.5/fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch @@ -0,0 +1,41 @@ +From 34e6552a442f268eefd408e47f4f2d471aa64829 Mon Sep 17 00:00:00 2001 +From: Pavel Skripkin +Date: Thu, 13 Jul 2023 22:41:46 +0300 +Subject: fs/ntfs3: Fix OOB read in ntfs_init_from_boot + +From: Pavel Skripkin + +commit 34e6552a442f268eefd408e47f4f2d471aa64829 upstream. + +Syzbot was able to create a device which has the last sector of size +512. + +After failing to boot from initial sector, reading from boot info from +offset 511 causes OOB read. + +To prevent such reports add sanity check to validate if size of buffer_head +if big enough to hold ntfs3 bootinfo + +Fixes: 6a4cd3ea7d77 ("fs/ntfs3: Alternative boot if primary boot is corrupted") +Reported-by: syzbot+53ce40c8c0322c06aea5@syzkaller.appspotmail.com +Signed-off-by: Pavel Skripkin +Signed-off-by: Konstantin Komarov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ntfs3/super.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/ntfs3/super.c ++++ b/fs/ntfs3/super.c +@@ -855,6 +855,11 @@ static int ntfs_init_from_boot(struct su + + check_boot: + err = -EINVAL; ++ ++ /* Corrupted image; do not read OOB */ ++ if (bh->b_size - sizeof(*boot) < boot_off) ++ goto out; ++ + boot = (struct NTFS_BOOT *)Add2Ptr(bh->b_data, boot_off); + + if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1)) { diff --git a/queue-6.5/fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch b/queue-6.5/fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch new file mode 100644 index 00000000000..974d66663d1 --- /dev/null +++ b/queue-6.5/fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch @@ -0,0 +1,64 @@ +From 8e7e27b2ee1e19c4040d4987e345f678a74c0aed Mon Sep 17 00:00:00 2001 +From: Zeng Heng +Date: Thu, 20 Apr 2023 15:46:22 +0800 +Subject: fs/ntfs3: fix panic about slab-out-of-bounds caused by ntfs_list_ea() + +From: Zeng Heng + +commit 8e7e27b2ee1e19c4040d4987e345f678a74c0aed upstream. + +Here is a BUG report about linux-6.1 from syzbot, but it still remains +within upstream: + +BUG: KASAN: slab-out-of-bounds in ntfs_list_ea fs/ntfs3/xattr.c:191 [inline] +BUG: KASAN: slab-out-of-bounds in ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710 +Read of size 1 at addr ffff888021acaf3d by task syz-executor128/3632 + +Call Trace: + kasan_report+0x139/0x170 mm/kasan/report.c:495 + ntfs_list_ea fs/ntfs3/xattr.c:191 [inline] + ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710 + vfs_listxattr fs/xattr.c:457 [inline] + listxattr+0x293/0x2d0 fs/xattr.c:804 + path_listxattr fs/xattr.c:828 [inline] + __do_sys_llistxattr fs/xattr.c:846 [inline] + +Before derefering field members of `ea` in unpacked_ea_size(), we need to +check whether the EA_FULL struct is located in access validate range. + +Similarly, when derefering `ea->name` field member, we need to check +whethe the ea->name is located in access validate range, too. + +Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") +Reported-by: syzbot+9fcea5ef6dc4dc72d334@syzkaller.appspotmail.com +Signed-off-by: Zeng Heng +[almaz.alexandrovich@paragon-software.com: took the ret variable out of the loop block] +Signed-off-by: Konstantin Komarov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ntfs3/xattr.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/fs/ntfs3/xattr.c ++++ b/fs/ntfs3/xattr.c +@@ -211,7 +211,8 @@ static ssize_t ntfs_list_ea(struct ntfs_ + size = le32_to_cpu(info->size); + + /* Enumerate all xattrs. */ +- for (ret = 0, off = 0; off < size; off += ea_size) { ++ ret = 0; ++ for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) { + ea = Add2Ptr(ea_all, off); + ea_size = unpacked_ea_size(ea); + +@@ -219,6 +220,10 @@ static ssize_t ntfs_list_ea(struct ntfs_ + break; + + if (buffer) { ++ /* Check if we can use field ea->name */ ++ if (off + ea_size > size) ++ break; ++ + if (ret + ea->name_len + 1 > bytes_per_buffer) { + err = -ERANGE; + goto out; diff --git a/queue-6.5/fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch b/queue-6.5/fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch new file mode 100644 index 00000000000..a2be8c99fb6 --- /dev/null +++ b/queue-6.5/fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch @@ -0,0 +1,53 @@ +From 1f9b94af923c88539426ed811ae7e9543834a5c5 Mon Sep 17 00:00:00 2001 +From: Ziqi Zhao +Date: Wed, 9 Aug 2023 12:11:18 -0700 +Subject: fs/ntfs3: Fix possible null-pointer dereference in hdr_find_e() + +From: Ziqi Zhao + +commit 1f9b94af923c88539426ed811ae7e9543834a5c5 upstream. + +Upon investigation of the C reproducer provided by Syzbot, it seemed +the reproducer was trying to mount a corrupted NTFS filesystem, then +issue a rename syscall to some nodes in the filesystem. This can be +shown by modifying the reproducer to only include the mount syscall, +and investigating the filesystem by e.g. `ls` and `rm` commands. As a +result, during the problematic call to `hdr_fine_e`, the `inode` being +supplied did not go through `indx_init`, hence the `cmp` function +pointer was never set. + +The fix is simply to check whether `cmp` is not set, and return NULL +if that's the case, in order to be consistent with other error +scenarios of the `hdr_find_e` method. The rationale behind this patch +is that: + +- We should prevent crashing the kernel even if the mounted filesystem + is corrupted. Any syscalls made on the filesystem could return + invalid, but the kernel should be able to sustain these calls. + +- Only very specific corruption would lead to this bug, so it would be + a pretty rare case in actual usage anyways. Therefore, introducing a + check to specifically protect against this bug seems appropriate. + Because of its rarity, an `unlikely` clause is used to wrap around + this nullity check. + +Reported-by: syzbot+60cf892fc31d1f4358fc@syzkaller.appspotmail.com +Signed-off-by: Ziqi Zhao +Signed-off-by: Konstantin Komarov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ntfs3/index.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/ntfs3/index.c ++++ b/fs/ntfs3/index.c +@@ -729,6 +729,9 @@ static struct NTFS_DE *hdr_find_e(const + u32 total = le32_to_cpu(hdr->total); + u16 offs[128]; + ++ if (unlikely(!cmp)) ++ return NULL; ++ + fill_table: + if (end > total) + return NULL; diff --git a/queue-6.5/fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch b/queue-6.5/fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch new file mode 100644 index 00000000000..1fd5d7f29d7 --- /dev/null +++ b/queue-6.5/fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch @@ -0,0 +1,73 @@ +From 91a4b1ee78cb100b19b70f077c247f211110348f Mon Sep 17 00:00:00 2001 +From: Konstantin Komarov +Date: Fri, 30 Jun 2023 16:25:25 +0400 +Subject: fs/ntfs3: Fix shift-out-of-bounds in ntfs_fill_super + +From: Konstantin Komarov + +commit 91a4b1ee78cb100b19b70f077c247f211110348f upstream. + +Reported-by: syzbot+478c1bf0e6bf4a8f3a04@syzkaller.appspotmail.com +Signed-off-by: Konstantin Komarov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ntfs3/ntfs_fs.h | 2 ++ + fs/ntfs3/super.c | 26 ++++++++++++++++++++------ + 2 files changed, 22 insertions(+), 6 deletions(-) + +--- a/fs/ntfs3/ntfs_fs.h ++++ b/fs/ntfs3/ntfs_fs.h +@@ -42,9 +42,11 @@ enum utf16_endian; + #define MINUS_ONE_T ((size_t)(-1)) + /* Biggest MFT / smallest cluster */ + #define MAXIMUM_BYTES_PER_MFT 4096 ++#define MAXIMUM_SHIFT_BYTES_PER_MFT 12 + #define NTFS_BLOCKS_PER_MFT_RECORD (MAXIMUM_BYTES_PER_MFT / 512) + + #define MAXIMUM_BYTES_PER_INDEX 4096 ++#define MAXIMUM_SHIFT_BYTES_PER_INDEX 12 + #define NTFS_BLOCKS_PER_INODE (MAXIMUM_BYTES_PER_INDEX / 512) + + /* NTFS specific error code when fixup failed. */ +--- a/fs/ntfs3/super.c ++++ b/fs/ntfs3/super.c +@@ -906,9 +906,17 @@ check_boot: + goto out; + } + +- sbi->record_size = record_size = +- boot->record_size < 0 ? 1 << (-boot->record_size) : +- (u32)boot->record_size << cluster_bits; ++ if (boot->record_size >= 0) { ++ record_size = (u32)boot->record_size << cluster_bits; ++ } else if (-boot->record_size <= MAXIMUM_SHIFT_BYTES_PER_MFT) { ++ record_size = 1u << (-boot->record_size); ++ } else { ++ ntfs_err(sb, "%s: invalid record size %d.", hint, ++ boot->record_size); ++ goto out; ++ } ++ ++ sbi->record_size = record_size; + sbi->record_bits = blksize_bits(record_size); + sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes + +@@ -925,9 +933,15 @@ check_boot: + goto out; + } + +- sbi->index_size = boot->index_size < 0 ? +- 1u << (-boot->index_size) : +- (u32)boot->index_size << cluster_bits; ++ if (boot->index_size >= 0) { ++ sbi->index_size = (u32)boot->index_size << cluster_bits; ++ } else if (-boot->index_size <= MAXIMUM_SHIFT_BYTES_PER_INDEX) { ++ sbi->index_size = 1u << (-boot->index_size); ++ } else { ++ ntfs_err(sb, "%s: invalid index size %d.", hint, ++ boot->index_size); ++ goto out; ++ } + + /* Check index record size. */ + if (sbi->index_size < SECTOR_SIZE || !is_power_of_2(sbi->index_size)) { diff --git a/queue-6.5/kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch b/queue-6.5/kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch new file mode 100644 index 00000000000..37338e2960d --- /dev/null +++ b/queue-6.5/kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch @@ -0,0 +1,115 @@ +From 8647c52e9504c99752a39f1d44f6268f82c40a5c Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 27 Sep 2023 17:19:53 -0700 +Subject: KVM: x86: Constrain guest-supported xfeatures only at KVM_GET_XSAVE{2} + +From: Sean Christopherson + +commit 8647c52e9504c99752a39f1d44f6268f82c40a5c upstream. + +Mask off xfeatures that aren't exposed to the guest only when saving guest +state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly. +Preserving the maximal set of xfeatures in user_xfeatures restores KVM's +ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu: +Limit guest user_xfeatures to supported bits of XCR0") allowed userspace +to load xfeatures that are supported by the host, irrespective of what +xfeatures are exposed to the guest. + +There is no known use case where userspace *intentionally* loads xfeatures +that aren't exposed to the guest, but the bug fixed by commit ad856280ddea +was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't +exposed to the guest, e.g. would lead to userspace unintentionally loading +guest-unsupported xfeatures when live migrating a VM. + +Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially +problematic for QEMU-based setups, as QEMU has a bug where instead of +terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops +loading guest state, i.e. resumes the guest after live migration with +incomplete guest state, and ultimately results in guest data corruption. + +Note, letting userspace restore all host-supported xfeatures does not fix +setups where a VM is migrated from a host *without* commit ad856280ddea, +to a target with a subset of host-supported xfeatures. However there is +no way to safely address that scenario, e.g. KVM could silently drop the +unsupported features, but that would be a clear violation of KVM's ABI and +so would require userspace to opt-in, at which point userspace could +simply be updated to sanitize the to-be-loaded XSAVE state. + +Reported-by: Tyler Stachecki +Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net +Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") +Cc: stable@vger.kernel.org +Cc: Leonardo Bras +Signed-off-by: Sean Christopherson +Acked-by: Dave Hansen +Message-Id: <20230928001956.924301-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/xstate.c | 5 +---- + arch/x86/kvm/cpuid.c | 8 -------- + arch/x86/kvm/x86.c | 18 ++++++++++++++++-- + 3 files changed, 17 insertions(+), 14 deletions(-) + +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures + fpregs_restore_userregs(); + + newfps->xfeatures = curfps->xfeatures | xfeatures; +- +- if (!guest_fpu) +- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; +- ++ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + newfps->xfd = curfps->xfd & ~xfeatures; + + /* Do the final updates within the locked region */ +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -326,14 +326,6 @@ static void kvm_vcpu_after_set_cpuid(str + vcpu->arch.guest_supported_xcr0 = + cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); + +- /* +- * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if +- * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't +- * supported by the host. +- */ +- vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 | +- XFEATURE_MASK_FPSSE; +- + kvm_update_pv_runtime(vcpu); + + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5389,12 +5389,26 @@ static int kvm_vcpu_ioctl_x86_set_debugr + static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) + { ++ /* ++ * Only copy state for features that are enabled for the guest. The ++ * state itself isn't problematic, but setting bits in the header for ++ * features that are supported in *this* host but not exposed to the ++ * guest can result in KVM_SET_XSAVE failing when live migrating to a ++ * compatible host without the features that are NOT exposed to the ++ * guest. ++ * ++ * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if ++ * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't ++ * supported by the host. ++ */ ++ u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 | ++ XFEATURE_MASK_FPSSE; ++ + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) + return; + + fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, +- vcpu->arch.guest_fpu.fpstate->user_xfeatures, +- vcpu->arch.pkru); ++ supported_xcr0, vcpu->arch.pkru); + } + + static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, diff --git a/queue-6.5/kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch b/queue-6.5/kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch new file mode 100644 index 00000000000..83c51d5233f --- /dev/null +++ b/queue-6.5/kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch @@ -0,0 +1,92 @@ +From b29a2acd36dd7a33c63f260df738fb96baa3d4f8 Mon Sep 17 00:00:00 2001 +From: Roman Kagan +Date: Thu, 4 May 2023 14:00:42 +0200 +Subject: KVM: x86/pmu: Truncate counter value to allowed width on write + +From: Roman Kagan + +commit b29a2acd36dd7a33c63f260df738fb96baa3d4f8 upstream. + +Performance counters are defined to have width less than 64 bits. The +vPMU code maintains the counters in u64 variables but assumes the value +to fit within the defined width. However, for Intel non-full-width +counters (MSR_IA32_PERFCTRx) the value receieved from the guest is +truncated to 32 bits and then sign-extended to full 64 bits. If a +negative value is set, it's sign-extended to 64 bits, but then in +kvm_pmu_incr_counter() it's incremented, truncated, and compared to the +previous value for overflow detection. + +That previous value is not truncated, so it always evaluates bigger than +the truncated new one, and a PMI is injected. If the PMI handler writes +a negative counter value itself, the vCPU never quits the PMI loop. + +Turns out that Linux PMI handler actually does write the counter with +the value just read with RDPMC, so when no full-width support is exposed +via MSR_IA32_PERF_CAPABILITIES, and the guest initializes the counter to +a negative value, it locks up. + +This has been observed in the field, for example, when the guest configures +atop to use perfevents and runs two instances of it simultaneously. + +To address the problem, maintain the invariant that the counter value +always fits in the defined bit width, by truncating the received value +in the respective set_msr methods. For better readability, factor the +out into a helper function, pmc_write_counter(), shared by vmx and svm +parts. + +Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") +Cc: stable@vger.kernel.org +Signed-off-by: Roman Kagan +Link: https://lore.kernel.org/all/20230504120042.785651-1-rkagan@amazon.de +Tested-by: Like Xu +[sean: tweak changelog, s/set/write in the helper] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/pmu.h | 6 ++++++ + arch/x86/kvm/svm/pmu.c | 2 +- + arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- + 3 files changed, 9 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/pmu.h ++++ b/arch/x86/kvm/pmu.h +@@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struc + return counter & pmc_bitmask(pmc); + } + ++static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val) ++{ ++ pmc->counter += val - pmc_read_counter(pmc); ++ pmc->counter &= pmc_bitmask(pmc); ++} ++ + static inline void pmc_release_perf_event(struct kvm_pmc *pmc) + { + if (pmc->perf_event) { +--- a/arch/x86/kvm/svm/pmu.c ++++ b/arch/x86/kvm/svm/pmu.c +@@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vc + /* MSR_PERFCTRn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); + if (pmc) { +- pmc->counter += data - pmc_read_counter(pmc); ++ pmc_write_counter(pmc, data); + pmc_update_sample_period(pmc); + return 0; + } +--- a/arch/x86/kvm/vmx/pmu_intel.c ++++ b/arch/x86/kvm/vmx/pmu_intel.c +@@ -406,11 +406,11 @@ static int intel_pmu_set_msr(struct kvm_ + if (!msr_info->host_initiated && + !(msr & MSR_PMC_FULL_WIDTH_BIT)) + data = (s64)(s32)data; +- pmc->counter += data - pmc_read_counter(pmc); ++ pmc_write_counter(pmc, data); + pmc_update_sample_period(pmc); + break; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { +- pmc->counter += data - pmc_read_counter(pmc); ++ pmc_write_counter(pmc, data); + pmc_update_sample_period(pmc); + break; + } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { diff --git a/queue-6.5/mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch b/queue-6.5/mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch new file mode 100644 index 00000000000..3102cf5e484 --- /dev/null +++ b/queue-6.5/mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch @@ -0,0 +1,73 @@ +From c15cdea517414e0b29a11e0a0e2443d127c9109b Mon Sep 17 00:00:00 2001 +From: Catalin Marinas +Date: Fri, 6 Oct 2023 17:39:34 +0100 +Subject: mm: slab: Do not create kmalloc caches smaller than arch_slab_minalign() + +From: Catalin Marinas + +commit c15cdea517414e0b29a11e0a0e2443d127c9109b upstream. + +Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment +if DMA bouncing possible") allows architectures with non-coherent DMA to +define a small ARCH_KMALLOC_MINALIGN (e.g. sizeof(unsigned long long)) +and this has been enabled on arm64. With KASAN_HW_TAGS enabled, however, +ARCH_SLAB_MINALIGN becomes 16 on arm64 (arch_slab_minalign() dynamically +selects it since commit d949a8155d13 ("mm: make minimum slab alignment a +runtime property")). This can lead to a situation where kmalloc-8 caches +are attempted to be created with a kmem_caches.size aligned to 16. When +the cache is mergeable, it can lead to kernel warnings like: + +sysfs: cannot create duplicate filename '/kernel/slab/:d-0000016' +CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc1-00001-gda98843cd306-dirty #5 +Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +Call trace: + dump_backtrace+0x90/0xe8 + show_stack+0x18/0x24 + dump_stack_lvl+0x48/0x60 + dump_stack+0x18/0x24 + sysfs_warn_dup+0x64/0x80 + sysfs_create_dir_ns+0xe8/0x108 + kobject_add_internal+0x98/0x264 + kobject_init_and_add+0x8c/0xd8 + sysfs_slab_add+0x12c/0x248 + slab_sysfs_init+0x98/0x14c + do_one_initcall+0x6c/0x1b0 + kernel_init_freeable+0x1c0/0x288 + kernel_init+0x24/0x1e0 + ret_from_fork+0x10/0x20 +kobject: kobject_add_internal failed for :d-0000016 with -EEXIST, don't try to register things with the same name in the same directory. +SLUB: Unable to add boot slab dma-kmalloc-8 to sysfs + +Limit the __kmalloc_minalign() return value (used to create the +kmalloc-* caches) to arch_slab_minalign() so that kmalloc-8 caches are +skipped when KASAN_HW_TAGS is enabled (both config and runtime). + +Reported-by: Mark Rutland +Fixes: b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible") +Signed-off-by: Catalin Marinas +Cc: Peter Collingbourne +Cc: stable@vger.kernel.org # 6.5.x +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + mm/slab_common.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/mm/slab_common.c ++++ b/mm/slab_common.c +@@ -864,11 +864,13 @@ void __init setup_kmalloc_cache_index_ta + + static unsigned int __kmalloc_minalign(void) + { ++ unsigned int minalign = dma_get_cache_alignment(); ++ + #ifdef CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC + if (io_tlb_default_mem.nslabs) +- return ARCH_KMALLOC_MINALIGN; ++ minalign = ARCH_KMALLOC_MINALIGN; + #endif +- return dma_get_cache_alignment(); ++ return max(minalign, arch_slab_minalign()); + } + + void __init diff --git a/queue-6.5/mptcp-more-conservative-check-for-zero-probes.patch b/queue-6.5/mptcp-more-conservative-check-for-zero-probes.patch new file mode 100644 index 00000000000..748d5810213 --- /dev/null +++ b/queue-6.5/mptcp-more-conservative-check-for-zero-probes.patch @@ -0,0 +1,93 @@ +From 72377ab2d671befd6390a1d5677f5cca61235b65 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Wed, 18 Oct 2023 11:23:54 -0700 +Subject: mptcp: more conservative check for zero probes + +From: Paolo Abeni + +commit 72377ab2d671befd6390a1d5677f5cca61235b65 upstream. + +Christoph reported that the MPTCP protocol can find the subflow-level +write queue unexpectedly not empty while crafting a zero-window probe, +hitting a warning: + +------------[ cut here ]------------ +WARNING: CPU: 0 PID: 188 at net/mptcp/protocol.c:1312 mptcp_sendmsg_frag+0xc06/0xe70 +Modules linked in: +CPU: 0 PID: 188 Comm: kworker/0:2 Not tainted 6.6.0-rc2-g1176aa719d7a #47 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 +Workqueue: events mptcp_worker +RIP: 0010:mptcp_sendmsg_frag+0xc06/0xe70 net/mptcp/protocol.c:1312 +RAX: 47d0530de347ff6a RBX: 47d0530de347ff6b RCX: ffff8881015d3c00 +RDX: ffff8881015d3c00 RSI: 47d0530de347ff6b RDI: 47d0530de347ff6b +RBP: 47d0530de347ff6b R08: ffffffff8243c6a8 R09: ffffffff82042d9c +R10: 0000000000000002 R11: ffffffff82056850 R12: ffff88812a13d580 +R13: 0000000000000001 R14: ffff88812b375e50 R15: ffff88812bbf3200 +FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000695118 CR3: 0000000115dfc001 CR4: 0000000000170ef0 +Call Trace: + + __subflow_push_pending+0xa4/0x420 net/mptcp/protocol.c:1545 + __mptcp_push_pending+0x128/0x3b0 net/mptcp/protocol.c:1614 + mptcp_release_cb+0x218/0x5b0 net/mptcp/protocol.c:3391 + release_sock+0xf6/0x100 net/core/sock.c:3521 + mptcp_worker+0x6e8/0x8f0 net/mptcp/protocol.c:2746 + process_scheduled_works+0x341/0x690 kernel/workqueue.c:2630 + worker_thread+0x3a7/0x610 kernel/workqueue.c:2784 + kthread+0x143/0x180 kernel/kthread.c:388 + ret_from_fork+0x4d/0x60 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:304 + + +The root cause of the issue is that expectations are wrong: e.g. due +to MPTCP-level re-injection we can hit the critical condition. + +Explicitly avoid the zero-window probe when the subflow write queue +is not empty and drop the related warnings. + +Reported-by: Christoph Paasch +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/444 +Fixes: f70cad1085d1 ("mptcp: stop relying on tcp_tx_skb_cache") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Paolo Abeni +Signed-off-by: Mat Martineau +Link: https://lore.kernel.org/r/20231018-send-net-20231018-v1-3-17ecb002e41d@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1300,7 +1300,7 @@ alloc_skb: + if (copy == 0) { + u64 snd_una = READ_ONCE(msk->snd_una); + +- if (snd_una != msk->snd_nxt) { ++ if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) { + tcp_remove_empty_skb(ssk); + return 0; + } +@@ -1308,11 +1308,6 @@ alloc_skb: + zero_window_probe = true; + data_seq = snd_una - 1; + copy = 1; +- +- /* all mptcp-level data is acked, no skbs should be present into the +- * ssk write queue +- */ +- WARN_ON_ONCE(reuse_skb); + } + + copy = min_t(size_t, copy, info->limit - info->sent); +@@ -1341,7 +1336,6 @@ alloc_skb: + if (reuse_skb) { + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + mpext->data_len += copy; +- WARN_ON_ONCE(zero_window_probe); + goto out; + } + diff --git a/queue-6.5/netfilter-nft_payload-fix-wrong-mac-header-matching.patch b/queue-6.5/netfilter-nft_payload-fix-wrong-mac-header-matching.patch new file mode 100644 index 00000000000..b52c7d26d09 --- /dev/null +++ b/queue-6.5/netfilter-nft_payload-fix-wrong-mac-header-matching.patch @@ -0,0 +1,37 @@ +From d351c1ea2de3e36e608fc355d8ae7d0cc80e6cd6 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Sun, 8 Oct 2023 19:36:53 +0200 +Subject: netfilter: nft_payload: fix wrong mac header matching +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Florian Westphal + +commit d351c1ea2de3e36e608fc355d8ae7d0cc80e6cd6 upstream. + +mcast packets get looped back to the local machine. +Such packets have a 0-length mac header, we should treat +this like "mac header not set" and abort rule evaluation. + +As-is, we just copy data from the network header instead. + +Fixes: 96518518cc41 ("netfilter: add nftables") +Reported-by: Blažej Krajňák +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_payload.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netfilter/nft_payload.c ++++ b/net/netfilter/nft_payload.c +@@ -179,7 +179,7 @@ void nft_payload_eval(const struct nft_e + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: +- if (!skb_mac_header_was_set(skb)) ++ if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0) + goto err; + + if (skb_vlan_tag_present(skb) && diff --git a/queue-6.5/revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch b/queue-6.5/revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch new file mode 100644 index 00000000000..d5fe59f65c2 --- /dev/null +++ b/queue-6.5/revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch @@ -0,0 +1,306 @@ +From 1db34aa58d80988f5ee99d2fd9d8f7489c3b0681 Mon Sep 17 00:00:00 2001 +From: Bagas Sanjaya +Date: Tue, 17 Oct 2023 15:08:12 +0700 +Subject: Revert "net: wwan: iosm: enable runtime pm support for 7560" + +From: Bagas Sanjaya + +commit 1db34aa58d80988f5ee99d2fd9d8f7489c3b0681 upstream. + +Runtime power management support breaks Intel LTE modem where dmesg dump +showes timeout errors: + +``` +[ 72.027442] iosm 0000:01:00.0: msg timeout +[ 72.531638] iosm 0000:01:00.0: msg timeout +[ 73.035414] iosm 0000:01:00.0: msg timeout +[ 73.540359] iosm 0000:01:00.0: msg timeout +``` + +Furthermore, when shutting down with `poweroff` and modem attached, the +system rebooted instead of powering down as expected. The modem works +again only after power cycling. + +Revert runtime power management support for IOSM driver as introduced by +commit e4f5073d53be6c ("net: wwan: iosm: enable runtime pm support for +7560"). + +Fixes: e4f5073d53be ("net: wwan: iosm: enable runtime pm support for 7560") +Reported-by: Martin +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217996 +Link: https://lore.kernel.org/r/267abf02-4b60-4a2e-92cd-709e3da6f7d3@gmail.com/ +Signed-off-by: Bagas Sanjaya +Reviewed-by: Loic Poulain +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wwan/iosm/iosm_ipc_imem.c | 17 ----------------- + drivers/net/wwan/iosm/iosm_ipc_imem.h | 2 -- + drivers/net/wwan/iosm/iosm_ipc_pcie.c | 4 +--- + drivers/net/wwan/iosm/iosm_ipc_port.c | 17 +---------------- + drivers/net/wwan/iosm/iosm_ipc_trace.c | 8 -------- + drivers/net/wwan/iosm/iosm_ipc_wwan.c | 21 ++------------------- + 6 files changed, 4 insertions(+), 65 deletions(-) + +diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c +index 635301d677e1..829515a601b3 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_imem.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c +@@ -4,7 +4,6 @@ + */ + + #include +-#include + + #include "iosm_ipc_chnl_cfg.h" + #include "iosm_ipc_devlink.h" +@@ -632,11 +631,6 @@ static void ipc_imem_run_state_worker(struct work_struct *instance) + /* Complete all memory stores after setting bit */ + smp_mb__after_atomic(); + +- if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7560_ID) { +- pm_runtime_mark_last_busy(ipc_imem->dev); +- pm_runtime_put_autosuspend(ipc_imem->dev); +- } +- + return; + + err_ipc_mux_deinit: +@@ -1240,7 +1234,6 @@ void ipc_imem_cleanup(struct iosm_imem *ipc_imem) + + /* forward MDM_NOT_READY to listeners */ + ipc_uevent_send(ipc_imem->dev, UEVENT_MDM_NOT_READY); +- pm_runtime_get_sync(ipc_imem->dev); + + hrtimer_cancel(&ipc_imem->td_alloc_timer); + hrtimer_cancel(&ipc_imem->tdupdate_timer); +@@ -1426,16 +1419,6 @@ struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id, + + set_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag); + } +- +- if (!pm_runtime_enabled(ipc_imem->dev)) +- pm_runtime_enable(ipc_imem->dev); +- +- pm_runtime_set_autosuspend_delay(ipc_imem->dev, +- IPC_MEM_AUTO_SUSPEND_DELAY_MS); +- pm_runtime_use_autosuspend(ipc_imem->dev); +- pm_runtime_allow(ipc_imem->dev); +- pm_runtime_mark_last_busy(ipc_imem->dev); +- + return ipc_imem; + devlink_channel_fail: + ipc_devlink_deinit(ipc_imem->ipc_devlink); +diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h +index 0144b45e2afb..5664ac507c90 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_imem.h ++++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h +@@ -103,8 +103,6 @@ struct ipc_chnl_cfg; + #define FULLY_FUNCTIONAL 0 + #define IOSM_DEVLINK_INIT 1 + +-#define IPC_MEM_AUTO_SUSPEND_DELAY_MS 5000 +- + /* List of the supported UL/DL pipes. */ + enum ipc_mem_pipes { + IPC_MEM_PIPE_0 = 0, +diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c +index 3a259c9abefd..04517bd3325a 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c +@@ -6,7 +6,6 @@ + #include + #include + #include +-#include + #include + + #include "iosm_ipc_imem.h" +@@ -438,8 +437,7 @@ static int __maybe_unused ipc_pcie_resume_cb(struct device *dev) + return 0; + } + +-static DEFINE_RUNTIME_DEV_PM_OPS(iosm_ipc_pm, ipc_pcie_suspend_cb, +- ipc_pcie_resume_cb, NULL); ++static SIMPLE_DEV_PM_OPS(iosm_ipc_pm, ipc_pcie_suspend_cb, ipc_pcie_resume_cb); + + static struct pci_driver iosm_ipc_driver = { + .name = KBUILD_MODNAME, +diff --git a/drivers/net/wwan/iosm/iosm_ipc_port.c b/drivers/net/wwan/iosm/iosm_ipc_port.c +index 2ba1ddca3945..5d5b4183e14a 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_port.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_port.c +@@ -3,8 +3,6 @@ + * Copyright (C) 2020-21 Intel Corporation. + */ + +-#include +- + #include "iosm_ipc_chnl_cfg.h" + #include "iosm_ipc_imem_ops.h" + #include "iosm_ipc_port.h" +@@ -15,16 +13,12 @@ static int ipc_port_ctrl_start(struct wwan_port *port) + struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port); + int ret = 0; + +- pm_runtime_get_sync(ipc_port->ipc_imem->dev); + ipc_port->channel = ipc_imem_sys_port_open(ipc_port->ipc_imem, + ipc_port->chl_id, + IPC_HP_CDEV_OPEN); + if (!ipc_port->channel) + ret = -EIO; + +- pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev); +- pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev); +- + return ret; + } + +@@ -33,24 +27,15 @@ static void ipc_port_ctrl_stop(struct wwan_port *port) + { + struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port); + +- pm_runtime_get_sync(ipc_port->ipc_imem->dev); + ipc_imem_sys_port_close(ipc_port->ipc_imem, ipc_port->channel); +- pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev); +- pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev); + } + + /* transfer control data to modem */ + static int ipc_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb) + { + struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port); +- int ret; + +- pm_runtime_get_sync(ipc_port->ipc_imem->dev); +- ret = ipc_imem_sys_cdev_write(ipc_port, skb); +- pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev); +- pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev); +- +- return ret; ++ return ipc_imem_sys_cdev_write(ipc_port, skb); + } + + static const struct wwan_port_ops ipc_wwan_ctrl_ops = { +diff --git a/drivers/net/wwan/iosm/iosm_ipc_trace.c b/drivers/net/wwan/iosm/iosm_ipc_trace.c +index 4368373797b6..eeecfa3d10c5 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_trace.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_trace.c +@@ -3,9 +3,7 @@ + * Copyright (C) 2020-2021 Intel Corporation. + */ + +-#include + #include +- + #include "iosm_ipc_trace.h" + + /* sub buffer size and number of sub buffer */ +@@ -99,8 +97,6 @@ static ssize_t ipc_trace_ctrl_file_write(struct file *filp, + if (ret) + return ret; + +- pm_runtime_get_sync(ipc_trace->ipc_imem->dev); +- + mutex_lock(&ipc_trace->trc_mutex); + if (val == TRACE_ENABLE && ipc_trace->mode != TRACE_ENABLE) { + ipc_trace->channel = ipc_imem_sys_port_open(ipc_trace->ipc_imem, +@@ -121,10 +117,6 @@ static ssize_t ipc_trace_ctrl_file_write(struct file *filp, + ret = count; + unlock: + mutex_unlock(&ipc_trace->trc_mutex); +- +- pm_runtime_mark_last_busy(ipc_trace->ipc_imem->dev); +- pm_runtime_put_autosuspend(ipc_trace->ipc_imem->dev); +- + return ret; + } + +diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c +index 93d17de08786..ff747fc79aaf 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c +@@ -6,7 +6,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -52,13 +51,11 @@ static int ipc_wwan_link_open(struct net_device *netdev) + struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(netdev); + struct iosm_wwan *ipc_wwan = priv->ipc_wwan; + int if_id = priv->if_id; +- int ret = 0; + + if (if_id < IP_MUX_SESSION_START || + if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist)) + return -EINVAL; + +- pm_runtime_get_sync(ipc_wwan->ipc_imem->dev); + /* get channel id */ + priv->ch_id = ipc_imem_sys_wwan_open(ipc_wwan->ipc_imem, if_id); + +@@ -66,8 +63,7 @@ static int ipc_wwan_link_open(struct net_device *netdev) + dev_err(ipc_wwan->dev, + "cannot connect wwan0 & id %d to the IPC mem layer", + if_id); +- ret = -ENODEV; +- goto err_out; ++ return -ENODEV; + } + + /* enable tx path, DL data may follow */ +@@ -76,11 +72,7 @@ static int ipc_wwan_link_open(struct net_device *netdev) + dev_dbg(ipc_wwan->dev, "Channel id %d allocated to if_id %d", + priv->ch_id, priv->if_id); + +-err_out: +- pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev); +- pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev); +- +- return ret; ++ return 0; + } + + /* Bring-down the wwan net link */ +@@ -90,12 +82,9 @@ static int ipc_wwan_link_stop(struct net_device *netdev) + + netif_stop_queue(netdev); + +- pm_runtime_get_sync(priv->ipc_wwan->ipc_imem->dev); + ipc_imem_sys_wwan_close(priv->ipc_wwan->ipc_imem, priv->if_id, + priv->ch_id); + priv->ch_id = -1; +- pm_runtime_mark_last_busy(priv->ipc_wwan->ipc_imem->dev); +- pm_runtime_put_autosuspend(priv->ipc_wwan->ipc_imem->dev); + + return 0; + } +@@ -117,7 +106,6 @@ static netdev_tx_t ipc_wwan_link_transmit(struct sk_buff *skb, + if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist)) + return -EINVAL; + +- pm_runtime_get(ipc_wwan->ipc_imem->dev); + /* Send the SKB to device for transmission */ + ret = ipc_imem_sys_wwan_transmit(ipc_wwan->ipc_imem, + if_id, priv->ch_id, skb); +@@ -131,14 +119,9 @@ static netdev_tx_t ipc_wwan_link_transmit(struct sk_buff *skb, + ret = NETDEV_TX_BUSY; + dev_err(ipc_wwan->dev, "unable to push packets"); + } else { +- pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev); +- pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev); + goto exit; + } + +- pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev); +- pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev); +- + return ret; + + exit: +-- +2.42.0 + diff --git a/queue-6.5/selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch b/queue-6.5/selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch new file mode 100644 index 00000000000..fc88726e45e --- /dev/null +++ b/queue-6.5/selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch @@ -0,0 +1,133 @@ +From 2cfaa8b3b7aece3c7b13dd10db20dcea65875692 Mon Sep 17 00:00:00 2001 +From: Matthieu Baerts +Date: Wed, 18 Oct 2023 11:23:56 -0700 +Subject: selftests: mptcp: join: no RST when rm subflow/addr + +From: Matthieu Baerts + +commit 2cfaa8b3b7aece3c7b13dd10db20dcea65875692 upstream. + +Recently, we noticed that some RST were wrongly generated when removing +the initial subflow. + +This patch makes sure RST are not sent when removing any subflows or any +addresses. + +Fixes: c2b2ae3925b6 ("mptcp: handle correctly disconnect() failures") +Cc: stable@vger.kernel.org +Acked-by: Paolo Abeni +Signed-off-by: Matthieu Baerts +Signed-off-by: Mat Martineau +Link: https://lore.kernel.org/r/20231018-send-net-20231018-v1-5-17ecb002e41d@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_join.sh | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -2282,6 +2282,7 @@ remove_tests() + chk_join_nr 1 1 1 + chk_rm_tx_nr 1 + chk_rm_nr 1 1 ++ chk_rst_nr 0 0 + fi + + # multiple subflows, remove +@@ -2294,6 +2295,7 @@ remove_tests() + run_tests $ns1 $ns2 10.0.1.1 slow + chk_join_nr 2 2 2 + chk_rm_nr 2 2 ++ chk_rst_nr 0 0 + fi + + # single address, remove +@@ -2306,6 +2308,7 @@ remove_tests() + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert ++ chk_rst_nr 0 0 + fi + + # subflow and signal, remove +@@ -2319,6 +2322,7 @@ remove_tests() + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 ++ chk_rst_nr 0 0 + fi + + # subflows and signal, remove +@@ -2333,6 +2337,7 @@ remove_tests() + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 ++ chk_rst_nr 0 0 + fi + + # addresses remove +@@ -2347,6 +2352,7 @@ remove_tests() + chk_join_nr 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert ++ chk_rst_nr 0 0 + fi + + # invalid addresses remove +@@ -2361,6 +2367,7 @@ remove_tests() + chk_join_nr 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert ++ chk_rst_nr 0 0 + fi + + # subflows and signal, flush +@@ -2375,6 +2382,7 @@ remove_tests() + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 1 3 invert simult ++ chk_rst_nr 0 0 + fi + + # subflows flush +@@ -2394,6 +2402,7 @@ remove_tests() + else + chk_rm_nr 3 3 + fi ++ chk_rst_nr 0 0 + fi + + # addresses flush +@@ -2408,6 +2417,7 @@ remove_tests() + chk_join_nr 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert simult ++ chk_rst_nr 0 0 + fi + + # invalid addresses flush +@@ -2422,6 +2432,7 @@ remove_tests() + chk_join_nr 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert ++ chk_rst_nr 0 0 + fi + + # remove id 0 subflow +@@ -2433,6 +2444,7 @@ remove_tests() + run_tests $ns1 $ns2 10.0.1.1 slow + chk_join_nr 1 1 1 + chk_rm_nr 1 1 ++ chk_rst_nr 0 0 + fi + + # remove id 0 address +@@ -2445,6 +2457,7 @@ remove_tests() + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert ++ chk_rst_nr 0 0 invert + fi + } + diff --git a/queue-6.5/series b/queue-6.5/series index f1aa1afad5d..22465016e0b 100644 --- a/queue-6.5/series +++ b/queue-6.5/series @@ -13,3 +13,24 @@ btrfs-fix-stripe-length-calculation-for-non-zoned-data-chunk-allocation.patch nfc-nci-fix-possible-null-pointer-dereference-in-send_acknowledge.patch regmap-fix-null-deref-on-lookup.patch kvm-x86-mask-lvtpc-when-handling-a-pmi.patch +x86-sev-disable-mmio-emulation-from-user-mode.patch +x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch +x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch +x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch +kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch +kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch +x86-kvm-svm-always-update-the-x2avic-msr-interception.patch +x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch +x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch +audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch +tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch +mptcp-more-conservative-check-for-zero-probes.patch +selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch +mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch +fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch +fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch +fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch +fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch +fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch +revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch +netfilter-nft_payload-fix-wrong-mac-header-matching.patch diff --git a/queue-6.5/tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch b/queue-6.5/tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch new file mode 100644 index 00000000000..8ed4c4a7415 --- /dev/null +++ b/queue-6.5/tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch @@ -0,0 +1,47 @@ +From 6db8a37dfc541e059851652cfd4f0bb13b8ff6af Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Wed, 18 Oct 2023 11:23:53 -0700 +Subject: tcp: check mptcp-level constraints for backlog coalescing + +From: Paolo Abeni + +commit 6db8a37dfc541e059851652cfd4f0bb13b8ff6af upstream. + +The MPTCP protocol can acquire the subflow-level socket lock and +cause the tcp backlog usage. When inserting new skbs into the +backlog, the stack will try to coalesce them. + +Currently, we have no check in place to ensure that such coalescing +will respect the MPTCP-level DSS, and that may cause data stream +corruption, as reported by Christoph. + +Address the issue by adding the relevant admission check for coalescing +in tcp_add_backlog(). + +Note the issue is not easy to reproduce, as the MPTCP protocol tries +hard to avoid acquiring the subflow-level socket lock. + +Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path") +Cc: stable@vger.kernel.org +Reported-by: Christoph Paasch +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/420 +Reviewed-by: Mat Martineau +Signed-off-by: Paolo Abeni +Signed-off-by: Mat Martineau +Link: https://lore.kernel.org/r/20231018-send-net-20231018-v1-2-17ecb002e41d@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1869,6 +1869,7 @@ bool tcp_add_backlog(struct sock *sk, st + #ifdef CONFIG_TLS_DEVICE + tail->decrypted != skb->decrypted || + #endif ++ !mptcp_skb_can_collapse(tail, skb) || + thtail->doff != th->doff || + memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) + goto no_coalesce; diff --git a/queue-6.5/x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch b/queue-6.5/x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch new file mode 100644 index 00000000000..0b2c2355e7c --- /dev/null +++ b/queue-6.5/x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch @@ -0,0 +1,156 @@ +From 18164f66e6c59fda15c198b371fa008431efdb22 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 27 Sep 2023 17:19:52 -0700 +Subject: x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer + +From: Sean Christopherson + +commit 18164f66e6c59fda15c198b371fa008431efdb22 upstream. + +Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can +constrain which xfeatures are saved into the userspace buffer without +having to modify the user_xfeatures field in KVM's guest_fpu state. + +KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to +guest must not show up in the effective xstate_bv field of the buffer. +Saving only the guest-supported xfeatures allows userspace to load the +saved state on a different host with a fewer xfeatures, so long as the +target host supports the xfeatures that are exposed to the guest. + +KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to +the set of guest-supported xfeatures, but doing so broke KVM's historical +ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that +are supported by the *host*. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20230928001956.924301-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/fpu/api.h | 3 ++- + arch/x86/kernel/fpu/core.c | 5 +++-- + arch/x86/kernel/fpu/xstate.c | 7 +++++-- + arch/x86/kernel/fpu/xstate.h | 3 ++- + arch/x86/kvm/x86.c | 21 +++++++++------------ + 5 files changed, 21 insertions(+), 18 deletions(-) + +--- a/arch/x86/include/asm/fpu/api.h ++++ b/arch/x86/include/asm/fpu/api.h +@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd( + static inline void fpu_sync_guest_vmexit_xfd_state(void) { } + #endif + +-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); ++extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, ++ unsigned int size, u64 xfeatures, u32 pkru); + extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); + + static inline void fpstate_set_confidential(struct fpu_guest *gfpu) +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_gues + EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); + + void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, +- unsigned int size, u32 pkru) ++ unsigned int size, u64 xfeatures, u32 pkru) + { + struct fpstate *kstate = gfpu->fpstate; + union fpregs_state *ustate = buf; + struct membuf mb = { .p = buf, .left = size }; + + if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { +- __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE); ++ __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru, ++ XSTATE_COPY_XSAVE); + } else { + memcpy(&ustate->fxsave, &kstate->regs.fxsave, + sizeof(ustate->fxsave)); +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstat + * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer + * @to: membuf descriptor + * @fpstate: The fpstate buffer from which to copy ++ * @xfeatures: The mask of xfeatures to save (XSAVE mode only) + * @pkru_val: The PKRU value to store in the PKRU component + * @copy_mode: The requested copy mode + * +@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstat + * It supports partial copy but @to.pos always starts from zero. + */ + void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, +- u32 pkru_val, enum xstate_copy_mode copy_mode) ++ u64 xfeatures, u32 pkru_val, ++ enum xstate_copy_mode copy_mode) + { + const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); + struct xregs_state *xinit = &init_fpstate.regs.xsave; +@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct me + break; + + case XSTATE_COPY_XSAVE: +- header.xfeatures &= fpstate->user_xfeatures; ++ header.xfeatures &= fpstate->user_xfeatures & xfeatures; + break; + } + +@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct memb + enum xstate_copy_mode copy_mode) + { + __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, ++ tsk->thread.fpu.fpstate->user_xfeatures, + tsk->thread.pkru, copy_mode); + } + +--- a/arch/x86/kernel/fpu/xstate.h ++++ b/arch/x86/kernel/fpu/xstate.h +@@ -43,7 +43,8 @@ enum xstate_copy_mode { + + struct membuf; + extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, +- u32 pkru_val, enum xstate_copy_mode copy_mode); ++ u64 xfeatures, u32 pkru_val, ++ enum xstate_copy_mode copy_mode); + extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode mode); + extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru); +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5385,26 +5385,23 @@ static int kvm_vcpu_ioctl_x86_set_debugr + return 0; + } + +-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, +- struct kvm_xsave *guest_xsave) ++ ++static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, ++ u8 *state, unsigned int size) + { + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) + return; + +- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, +- guest_xsave->region, +- sizeof(guest_xsave->region), ++ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, ++ vcpu->arch.guest_fpu.fpstate->user_xfeatures, + vcpu->arch.pkru); + } + +-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, +- u8 *state, unsigned int size) ++static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, ++ struct kvm_xsave *guest_xsave) + { +- if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) +- return; +- +- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, +- state, size, vcpu->arch.pkru); ++ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, ++ sizeof(guest_xsave->region)); + } + + static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, diff --git a/queue-6.5/x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch b/queue-6.5/x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch new file mode 100644 index 00000000000..56805356219 --- /dev/null +++ b/queue-6.5/x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch @@ -0,0 +1,61 @@ +From 2dcf37abf9d3aab7f975002d29fc7c17272def38 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 28 Sep 2023 20:33:52 +0300 +Subject: x86: KVM: SVM: add support for Invalid IPI Vector interception + +From: Maxim Levitsky + +commit 2dcf37abf9d3aab7f975002d29fc7c17272def38 upstream. + +In later revisions of AMD's APM, there is a new 'incomplete IPI' exit code: + +"Invalid IPI Vector - The vector for the specified IPI was set to an +illegal value (VEC < 16)" + +Note that tests on Zen2 machine show that this VM exit doesn't happen and +instead AVIC just does nothing. + +Add support for this exit code by doing nothing, instead of filling +the kernel log with errors. + +Also replace an unthrottled 'pr_err()' if another unknown incomplete +IPI exit happens with vcpu_unimpl() + +(e.g in case AMD adds yet another 'Invalid IPI' exit reason) + +Cc: +Signed-off-by: Maxim Levitsky +Reviewed-by: Sean Christopherson +Message-Id: <20230928173354.217464-3-mlevitsk@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/svm.h | 1 + + arch/x86/kvm/svm/avic.c | 5 ++++- + 2 files changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/svm.h ++++ b/arch/x86/include/asm/svm.h +@@ -268,6 +268,7 @@ enum avic_ipi_failure_cause { + AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, + AVIC_IPI_FAILURE_INVALID_TARGET, + AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, ++ AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, + }; + + #define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0) +--- a/arch/x86/kvm/svm/avic.c ++++ b/arch/x86/kvm/svm/avic.c +@@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(str + case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: + WARN_ONCE(1, "Invalid backing page\n"); + break; ++ case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR: ++ /* Invalid IPI with vector < 16 */ ++ break; + default: +- pr_err("Unknown IPI interception\n"); ++ vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n"); + } + + return 1; diff --git a/queue-6.5/x86-kvm-svm-always-update-the-x2avic-msr-interception.patch b/queue-6.5/x86-kvm-svm-always-update-the-x2avic-msr-interception.patch new file mode 100644 index 00000000000..40d5371e076 --- /dev/null +++ b/queue-6.5/x86-kvm-svm-always-update-the-x2avic-msr-interception.patch @@ -0,0 +1,55 @@ +From b65235f6e102354ccafda601eaa1c5bef5284d21 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 28 Sep 2023 20:33:51 +0300 +Subject: x86: KVM: SVM: always update the x2avic msr interception + +From: Maxim Levitsky + +commit b65235f6e102354ccafda601eaa1c5bef5284d21 upstream. + +The following problem exists since x2avic was enabled in the KVM: + +svm_set_x2apic_msr_interception is called to enable the interception of +the x2apic msrs. + +In particular it is called at the moment the guest resets its apic. + +Assuming that the guest's apic was in x2apic mode, the reset will bring +it back to the xapic mode. + +The svm_set_x2apic_msr_interception however has an erroneous check for +'!apic_x2apic_mode()' which prevents it from doing anything in this case. + +As a result of this, all x2apic msrs are left unintercepted, and that +exposes the bare metal x2apic (if enabled) to the guest. +Oops. + +Remove the erroneous '!apic_x2apic_mode()' check to fix that. + +This fixes CVE-2023-5090 + +Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode") +Cc: stable@vger.kernel.org +Signed-off-by: Maxim Levitsky +Reviewed-by: Suravee Suthikulpanit +Tested-by: Suravee Suthikulpanit +Reviewed-by: Sean Christopherson +Message-Id: <20230928173354.217464-2-mlevitsk@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -829,8 +829,7 @@ void svm_set_x2apic_msr_interception(str + if (intercept == svm->x2avic_msrs_intercepted) + return; + +- if (!x2avic_enabled || +- !apic_x2apic_mode(svm->vcpu.arch.apic)) ++ if (!x2avic_enabled) + return; + + for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) { diff --git a/queue-6.5/x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch b/queue-6.5/x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch new file mode 100644 index 00000000000..00850cfb8e5 --- /dev/null +++ b/queue-6.5/x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch @@ -0,0 +1,40 @@ +From 3fdc6087df3be73a212a81ce5dd6516638568806 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 28 Sep 2023 20:33:53 +0300 +Subject: x86: KVM: SVM: refresh AVIC inhibition in svm_leave_nested() + +From: Maxim Levitsky + +commit 3fdc6087df3be73a212a81ce5dd6516638568806 upstream. + +svm_leave_nested() similar to a nested VM exit, get the vCPU out of nested +mode and thus should end the local inhibition of AVIC on this vCPU. + +Failure to do so, can lead to hangs on guest reboot. + +Raise the KVM_REQ_APICV_UPDATE request to refresh the AVIC state of the +current vCPU in this case. + +Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running") +Cc: stable@vger.kernel.org +Signed-off-by: Maxim Levitsky +Reviewed-by: Sean Christopherson +Message-Id: <20230928173354.217464-4-mlevitsk@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1243,6 +1243,9 @@ void svm_leave_nested(struct kvm_vcpu *v + + nested_svm_uninit_mmu_context(vcpu); + vmcb_mark_all_dirty(svm->vmcb); ++ ++ if (kvm_apicv_activated(vcpu->kvm)) ++ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + } + + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); diff --git a/queue-6.5/x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch b/queue-6.5/x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch new file mode 100644 index 00000000000..69402f3b32a --- /dev/null +++ b/queue-6.5/x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch @@ -0,0 +1,95 @@ +From a2e312947cba31a667fc6f953bfbf891861efd30 Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Mon, 16 Oct 2023 14:42:50 +0200 +Subject: x86/sev: Check for user-space IOIO pointing to kernel space + +From: Joerg Roedel + +Upstream commit: 63e44bc52047f182601e7817da969a105aa1f721 + +Check the memory operand of INS/OUTS before emulating the instruction. +The #VC exception can get raised from user-space, but the memory operand +can be manipulated to access kernel memory before the emulation actually +begins and after the exception handler has run. + + [ bp: Massage commit message. ] + +Fixes: 597cfe48212a ("x86/boot/compressed/64: Setup a GHCB-based VC Exception handler") +Reported-by: Tom Dohrmann +Signed-off-by: Joerg Roedel +Signed-off-by: Borislav Petkov (AMD) +Cc: +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/boot/compressed/sev.c | 5 +++++ + arch/x86/kernel/sev-shared.c | 31 +++++++++++++++++++++++++++++-- + 2 files changed, 34 insertions(+), 2 deletions(-) + +--- a/arch/x86/boot/compressed/sev.c ++++ b/arch/x86/boot/compressed/sev.c +@@ -108,6 +108,11 @@ static enum es_result vc_ioio_check(stru + return ES_OK; + } + ++static bool fault_in_kernel_space(unsigned long address) ++{ ++ return false; ++} ++ + #undef __init + #define __init + +--- a/arch/x86/kernel/sev-shared.c ++++ b/arch/x86/kernel/sev-shared.c +@@ -632,6 +632,23 @@ fail: + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); + } + ++static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt, ++ unsigned long address, ++ bool write) ++{ ++ if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) { ++ ctxt->fi.vector = X86_TRAP_PF; ++ ctxt->fi.error_code = X86_PF_USER; ++ ctxt->fi.cr2 = address; ++ if (write) ++ ctxt->fi.error_code |= X86_PF_WRITE; ++ ++ return ES_EXCEPTION; ++ } ++ ++ return ES_OK; ++} ++ + static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, + void *src, char *buf, + unsigned int data_size, +@@ -639,7 +656,12 @@ static enum es_result vc_insn_string_rea + bool backwards) + { + int i, b = backwards ? -1 : 1; +- enum es_result ret = ES_OK; ++ unsigned long address = (unsigned long)src; ++ enum es_result ret; ++ ++ ret = vc_insn_string_check(ctxt, address, false); ++ if (ret != ES_OK) ++ return ret; + + for (i = 0; i < count; i++) { + void *s = src + (i * data_size * b); +@@ -660,7 +682,12 @@ static enum es_result vc_insn_string_wri + bool backwards) + { + int i, s = backwards ? -1 : 1; +- enum es_result ret = ES_OK; ++ unsigned long address = (unsigned long)dst; ++ enum es_result ret; ++ ++ ret = vc_insn_string_check(ctxt, address, true); ++ if (ret != ES_OK) ++ return ret; + + for (i = 0; i < count; i++) { + void *d = dst + (i * data_size * s); diff --git a/queue-6.5/x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch b/queue-6.5/x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch new file mode 100644 index 00000000000..b74602e5ef9 --- /dev/null +++ b/queue-6.5/x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch @@ -0,0 +1,172 @@ +From 85465dd77a9e9ecfb18086120600e2361de570da Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Wed, 21 Jun 2023 17:42:42 +0200 +Subject: x86/sev: Check IOBM for IOIO exceptions from user-space + +From: Joerg Roedel + +Upstream commit: b9cb9c45583b911e0db71d09caa6b56469eb2bdf + +Check the IO permission bitmap (if present) before emulating IOIO #VC +exceptions for user-space. These permissions are checked by hardware +already before the #VC is raised, but due to the VC-handler decoding +race it needs to be checked again in software. + +Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions") +Reported-by: Tom Dohrmann +Signed-off-by: Joerg Roedel +Signed-off-by: Borislav Petkov (AMD) +Tested-by: Tom Dohrmann +Cc: +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/boot/compressed/sev.c | 5 +++++ + arch/x86/kernel/sev-shared.c | 22 +++++++++++++++------- + arch/x86/kernel/sev.c | 27 +++++++++++++++++++++++++++ + 3 files changed, 47 insertions(+), 7 deletions(-) + +--- a/arch/x86/boot/compressed/sev.c ++++ b/arch/x86/boot/compressed/sev.c +@@ -103,6 +103,11 @@ static enum es_result vc_read_mem(struct + return ES_OK; + } + ++static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) ++{ ++ return ES_OK; ++} ++ + #undef __init + #define __init + +--- a/arch/x86/kernel/sev-shared.c ++++ b/arch/x86/kernel/sev-shared.c +@@ -696,6 +696,9 @@ static enum es_result vc_insn_string_wri + static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) + { + struct insn *insn = &ctxt->insn; ++ size_t size; ++ u64 port; ++ + *exitinfo = 0; + + switch (insn->opcode.bytes[0]) { +@@ -704,7 +707,7 @@ static enum es_result vc_ioio_exitinfo(s + case 0x6d: + *exitinfo |= IOIO_TYPE_INS; + *exitinfo |= IOIO_SEG_ES; +- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; ++ port = ctxt->regs->dx & 0xffff; + break; + + /* OUTS opcodes */ +@@ -712,41 +715,43 @@ static enum es_result vc_ioio_exitinfo(s + case 0x6f: + *exitinfo |= IOIO_TYPE_OUTS; + *exitinfo |= IOIO_SEG_DS; +- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; ++ port = ctxt->regs->dx & 0xffff; + break; + + /* IN immediate opcodes */ + case 0xe4: + case 0xe5: + *exitinfo |= IOIO_TYPE_IN; +- *exitinfo |= (u8)insn->immediate.value << 16; ++ port = (u8)insn->immediate.value & 0xffff; + break; + + /* OUT immediate opcodes */ + case 0xe6: + case 0xe7: + *exitinfo |= IOIO_TYPE_OUT; +- *exitinfo |= (u8)insn->immediate.value << 16; ++ port = (u8)insn->immediate.value & 0xffff; + break; + + /* IN register opcodes */ + case 0xec: + case 0xed: + *exitinfo |= IOIO_TYPE_IN; +- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; ++ port = ctxt->regs->dx & 0xffff; + break; + + /* OUT register opcodes */ + case 0xee: + case 0xef: + *exitinfo |= IOIO_TYPE_OUT; +- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; ++ port = ctxt->regs->dx & 0xffff; + break; + + default: + return ES_DECODE_FAILED; + } + ++ *exitinfo |= port << 16; ++ + switch (insn->opcode.bytes[0]) { + case 0x6c: + case 0x6e: +@@ -756,12 +761,15 @@ static enum es_result vc_ioio_exitinfo(s + case 0xee: + /* Single byte opcodes */ + *exitinfo |= IOIO_DATA_8; ++ size = 1; + break; + default: + /* Length determined by instruction parsing */ + *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 + : IOIO_DATA_32; ++ size = (insn->opnd_bytes == 2) ? 2 : 4; + } ++ + switch (insn->addr_bytes) { + case 2: + *exitinfo |= IOIO_ADDR_16; +@@ -777,7 +785,7 @@ static enum es_result vc_ioio_exitinfo(s + if (insn_has_rep_prefix(insn)) + *exitinfo |= IOIO_REP; + +- return ES_OK; ++ return vc_ioio_check(ctxt, (u16)port, size); + } + + static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +--- a/arch/x86/kernel/sev.c ++++ b/arch/x86/kernel/sev.c +@@ -524,6 +524,33 @@ static enum es_result vc_slow_virt_to_ph + return ES_OK; + } + ++static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) ++{ ++ BUG_ON(size > 4); ++ ++ if (user_mode(ctxt->regs)) { ++ struct thread_struct *t = ¤t->thread; ++ struct io_bitmap *iobm = t->io_bitmap; ++ size_t idx; ++ ++ if (!iobm) ++ goto fault; ++ ++ for (idx = port; idx < port + size; ++idx) { ++ if (test_bit(idx, iobm->bitmap)) ++ goto fault; ++ } ++ } ++ ++ return ES_OK; ++ ++fault: ++ ctxt->fi.vector = X86_TRAP_GP; ++ ctxt->fi.error_code = 0; ++ ++ return ES_EXCEPTION; ++} ++ + /* Include code shared with pre-decompression boot stage */ + #include "sev-shared.c" + diff --git a/queue-6.5/x86-sev-disable-mmio-emulation-from-user-mode.patch b/queue-6.5/x86-sev-disable-mmio-emulation-from-user-mode.patch new file mode 100644 index 00000000000..bd5dd7dee47 --- /dev/null +++ b/queue-6.5/x86-sev-disable-mmio-emulation-from-user-mode.patch @@ -0,0 +1,42 @@ +From ed57a67142cadfb15e8bc47c5a86456298d7d000 Mon Sep 17 00:00:00 2001 +From: "Borislav Petkov (AMD)" +Date: Thu, 5 Oct 2023 11:06:36 +0200 +Subject: x86/sev: Disable MMIO emulation from user mode + +From: "Borislav Petkov (AMD)" + +Upstream commit: a37cd2a59d0cb270b1bba568fd3a3b8668b9d3ba + +A virt scenario can be constructed where MMIO memory can be user memory. +When that happens, a race condition opens between when the hardware +raises the #VC and when the #VC handler gets to emulate the instruction. + +If the MOVS is replaced with a MOVS accessing kernel memory in that +small race window, then write to kernel memory happens as the access +checks are not done at emulation time. + +Disable MMIO emulation in user mode temporarily until a sensible use +case appears and justifies properly handling the race window. + +Fixes: 0118b604c2c9 ("x86/sev-es: Handle MMIO String Instructions") +Reported-by: Tom Dohrmann +Signed-off-by: Borislav Petkov (AMD) +Tested-by: Tom Dohrmann +Cc: +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/sev.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/kernel/sev.c ++++ b/arch/x86/kernel/sev.c +@@ -1508,6 +1508,9 @@ static enum es_result vc_handle_mmio(str + return ES_DECODE_FAILED; + } + ++ if (user_mode(ctxt->regs)) ++ return ES_UNSUPPORTED; ++ + switch (mmio) { + case INSN_MMIO_WRITE: + memcpy(ghcb->shared_buffer, reg_data, bytes); -- 2.47.3