6.5-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 20 Oct 2023 17:47:03 +0000 (19:47 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 20 Oct 2023 17:47:03 +0000 (19:47 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Oct 2023 17:47:03 +0000 (19:47 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Oct 2023 17:47:03 +0000 (19:47 +0200)
diff --git a/queue-6.5/audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch b/queue-6.5/audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch

new file mode 100644 (file)

index 0000000..cfc5eb2
--- /dev/null
+++ b/queue-6.5/audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch
@@ -0,0 +1,165 @@
+From 03adc61edad49e1bbecfb53f7ea5d78f398fe368 Mon Sep 17 00:00:00 2001
+From: Dan Clash <daclash@linux.microsoft.com>
+Date: Thu, 12 Oct 2023 14:55:18 -0700
+Subject: audit,io_uring: io_uring openat triggers audit reference count underflow
+
+From: Dan Clash <daclash@linux.microsoft.com>
+
+commit 03adc61edad49e1bbecfb53f7ea5d78f398fe368 upstream.
+
+An io_uring openat operation can update an audit reference count
+from multiple threads resulting in the call trace below.
+
+A call to io_uring_submit() with a single openat op with a flag of
+IOSQE_ASYNC results in the following reference count updates.
+
+These first part of the system call performs two increments that do not race.
+
+do_syscall_64()
+  __do_sys_io_uring_enter()
+    io_submit_sqes()
+      io_openat_prep()
+        __io_openat_prep()
+          getname()
+            getname_flags()       /* update 1 (increment) */
+              __audit_getname()   /* update 2 (increment) */
+
+The openat op is queued to an io_uring worker thread which starts the
+opportunity for a race.  The system call exit performs one decrement.
+
+do_syscall_64()
+  syscall_exit_to_user_mode()
+    syscall_exit_to_user_mode_prepare()
+      __audit_syscall_exit()
+        audit_reset_context()
+           putname()              /* update 3 (decrement) */
+
+The io_uring worker thread performs one increment and two decrements.
+These updates can race with the system call decrement.
+
+io_wqe_worker()
+  io_worker_handle_work()
+    io_wq_submit_work()
+      io_issue_sqe()
+        io_openat()
+          io_openat2()
+            do_filp_open()
+              path_openat()
+                __audit_inode()   /* update 4 (increment) */
+            putname()             /* update 5 (decrement) */
+        __audit_uring_exit()
+          audit_reset_context()
+            putname()             /* update 6 (decrement) */
+
+The fix is to change the refcnt member of struct audit_names
+from int to atomic_t.
+
+kernel BUG at fs/namei.c:262!
+Call Trace:
+...
+ ? putname+0x68/0x70
+ audit_reset_context.part.0.constprop.0+0xe1/0x300
+ __audit_uring_exit+0xda/0x1c0
+ io_issue_sqe+0x1f3/0x450
+ ? lock_timer_base+0x3b/0xd0
+ io_wq_submit_work+0x8d/0x2b0
+ ? __try_to_del_timer_sync+0x67/0xa0
+ io_worker_handle_work+0x17c/0x2b0
+ io_wqe_worker+0x10a/0x350
+
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/lkml/MW2PR2101MB1033FFF044A258F84AEAA584F1C9A@MW2PR2101MB1033.namprd21.prod.outlook.com/
+Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring")
+Signed-off-by: Dan Clash <daclash@linux.microsoft.com>
+Link: https://lore.kernel.org/r/20231012215518.GA4048@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/namei.c         |    9 +++++----
+ include/linux/fs.h |    2 +-
+ kernel/auditsc.c   |    8 ++++----
+ 3 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -188,7 +188,7 @@ getname_flags(const char __user *filenam
+               }
+       }
+ 
+-      result->refcnt = 1;
++      atomic_set(&result->refcnt, 1);
+       /* The empty path is special. */
+       if (unlikely(!len)) {
+               if (empty)
+@@ -249,7 +249,7 @@ getname_kernel(const char * filename)
+       memcpy((char *)result->name, filename, len);
+       result->uptr = NULL;
+       result->aname = NULL;
+-      result->refcnt = 1;
++      atomic_set(&result->refcnt, 1);
+       audit_getname(result);
+ 
+       return result;
+@@ -261,9 +261,10 @@ void putname(struct filename *name)
+       if (IS_ERR(name))
+               return;
+ 
+-      BUG_ON(name->refcnt <= 0);
++      if (WARN_ON_ONCE(!atomic_read(&name->refcnt)))
++              return;
+ 
+-      if (--name->refcnt > 0)
++      if (!atomic_dec_and_test(&name->refcnt))
+               return;
+ 
+       if (name->name != name->iname) {
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2318,7 +2318,7 @@ struct audit_names;
+ struct filename {
+       const char              *name;  /* pointer to actual string */
+       const __user char       *uptr;  /* original userland pointer */
+-      int                     refcnt;
++      atomic_t                refcnt;
+       struct audit_names      *aname;
+       const char              iname[];
+ };
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -2210,7 +2210,7 @@ __audit_reusename(const __user char *upt
+               if (!n->name)
+                       continue;
+               if (n->name->uptr == uptr) {
+-                      n->name->refcnt++;
++                      atomic_inc(&n->name->refcnt);
+                       return n->name;
+               }
+       }
+@@ -2239,7 +2239,7 @@ void __audit_getname(struct filename *na
+       n->name = name;
+       n->name_len = AUDIT_NAME_FULL;
+       name->aname = n;
+-      name->refcnt++;
++      atomic_inc(&name->refcnt);
+ }
+ 
+ static inline int audit_copy_fcaps(struct audit_names *name,
+@@ -2371,7 +2371,7 @@ out_alloc:
+               return;
+       if (name) {
+               n->name = name;
+-              name->refcnt++;
++              atomic_inc(&name->refcnt);
+       }
+ 
+ out:
+@@ -2498,7 +2498,7 @@ void __audit_inode_child(struct inode *p
+               if (found_parent) {
+                       found_child->name = found_parent->name;
+                       found_child->name_len = AUDIT_NAME_FULL;
+-                      found_child->name->refcnt++;
++                      atomic_inc(&found_child->name->refcnt);
+               }
+       }
+ 
diff --git a/queue-6.5/fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch b/queue-6.5/fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch

new file mode 100644 (file)

index 0000000..7e7895a
--- /dev/null
+++ b/queue-6.5/fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch
@@ -0,0 +1,41 @@
+From bfbe5b31caa74ab97f1784fe9ade5f45e0d3de91 Mon Sep 17 00:00:00 2001
+From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Date: Fri, 30 Jun 2023 16:22:53 +0400
+Subject: fs/ntfs3: fix deadlock in mark_as_free_ex
+
+From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+
+commit bfbe5b31caa74ab97f1784fe9ade5f45e0d3de91 upstream.
+
+Reported-by: syzbot+e94d98936a0ed08bde43@syzkaller.appspotmail.com
+Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ntfs3/fsntfs.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/ntfs3/fsntfs.c
++++ b/fs/ntfs3/fsntfs.c
+@@ -2461,10 +2461,12 @@ void mark_as_free_ex(struct ntfs_sb_info
+ {
+       CLST end, i, zone_len, zlen;
+       struct wnd_bitmap *wnd = &sbi->used.bitmap;
++      bool dirty = false;
+ 
+       down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+       if (!wnd_is_used(wnd, lcn, len)) {
+-              ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
++              /* mark volume as dirty out of wnd->rw_lock */
++              dirty = true;
+ 
+               end = lcn + len;
+               len = 0;
+@@ -2518,6 +2520,8 @@ void mark_as_free_ex(struct ntfs_sb_info
+ 
+ out:
+       up_write(&wnd->rw_lock);
++      if (dirty)
++              ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ }
+ 
+ /*
diff --git a/queue-6.5/fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch b/queue-6.5/fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch

new file mode 100644 (file)

index 0000000..0ea4f1a
--- /dev/null
+++ b/queue-6.5/fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch
@@ -0,0 +1,41 @@
+From 34e6552a442f268eefd408e47f4f2d471aa64829 Mon Sep 17 00:00:00 2001
+From: Pavel Skripkin <paskripkin@gmail.com>
+Date: Thu, 13 Jul 2023 22:41:46 +0300
+Subject: fs/ntfs3: Fix OOB read in ntfs_init_from_boot
+
+From: Pavel Skripkin <paskripkin@gmail.com>
+
+commit 34e6552a442f268eefd408e47f4f2d471aa64829 upstream.
+
+Syzbot was able to create a device which has the last sector of size
+512.
+
+After failing to boot from initial sector, reading from boot info from
+offset 511 causes OOB read.
+
+To prevent such reports add sanity check to validate if size of buffer_head
+if big enough to hold ntfs3 bootinfo
+
+Fixes: 6a4cd3ea7d77 ("fs/ntfs3: Alternative boot if primary boot is corrupted")
+Reported-by: syzbot+53ce40c8c0322c06aea5@syzkaller.appspotmail.com
+Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
+Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ntfs3/super.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/ntfs3/super.c
++++ b/fs/ntfs3/super.c
+@@ -855,6 +855,11 @@ static int ntfs_init_from_boot(struct su
+ 
+ check_boot:
+       err = -EINVAL;
++
++      /* Corrupted image; do not read OOB */
++      if (bh->b_size - sizeof(*boot) < boot_off)
++              goto out;
++
+       boot = (struct NTFS_BOOT *)Add2Ptr(bh->b_data, boot_off);
+ 
+       if (memcmp(boot->system_id, "NTFS    ", sizeof("NTFS    ") - 1)) {
diff --git a/queue-6.5/fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch b/queue-6.5/fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch

new file mode 100644 (file)

index 0000000..974d666
--- /dev/null
+++ b/queue-6.5/fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch
@@ -0,0 +1,64 @@
+From 8e7e27b2ee1e19c4040d4987e345f678a74c0aed Mon Sep 17 00:00:00 2001
+From: Zeng Heng <zengheng4@huawei.com>
+Date: Thu, 20 Apr 2023 15:46:22 +0800
+Subject: fs/ntfs3: fix panic about slab-out-of-bounds caused by ntfs_list_ea()
+
+From: Zeng Heng <zengheng4@huawei.com>
+
+commit 8e7e27b2ee1e19c4040d4987e345f678a74c0aed upstream.
+
+Here is a BUG report about linux-6.1 from syzbot, but it still remains
+within upstream:
+
+BUG: KASAN: slab-out-of-bounds in ntfs_list_ea fs/ntfs3/xattr.c:191 [inline]
+BUG: KASAN: slab-out-of-bounds in ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710
+Read of size 1 at addr ffff888021acaf3d by task syz-executor128/3632
+
+Call Trace:
+ kasan_report+0x139/0x170 mm/kasan/report.c:495
+ ntfs_list_ea fs/ntfs3/xattr.c:191 [inline]
+ ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710
+ vfs_listxattr fs/xattr.c:457 [inline]
+ listxattr+0x293/0x2d0 fs/xattr.c:804
+ path_listxattr fs/xattr.c:828 [inline]
+ __do_sys_llistxattr fs/xattr.c:846 [inline]
+
+Before derefering field members of `ea` in unpacked_ea_size(), we need to
+check whether the EA_FULL struct is located in access validate range.
+
+Similarly, when derefering `ea->name` field member, we need to check
+whethe the ea->name is located in access validate range, too.
+
+Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations")
+Reported-by: syzbot+9fcea5ef6dc4dc72d334@syzkaller.appspotmail.com
+Signed-off-by: Zeng Heng <zengheng4@huawei.com>
+[almaz.alexandrovich@paragon-software.com: took the ret variable out of the loop block]
+Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ntfs3/xattr.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/fs/ntfs3/xattr.c
++++ b/fs/ntfs3/xattr.c
+@@ -211,7 +211,8 @@ static ssize_t ntfs_list_ea(struct ntfs_
+       size = le32_to_cpu(info->size);
+ 
+       /* Enumerate all xattrs. */
+-      for (ret = 0, off = 0; off < size; off += ea_size) {
++      ret = 0;
++      for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) {
+               ea = Add2Ptr(ea_all, off);
+               ea_size = unpacked_ea_size(ea);
+ 
+@@ -219,6 +220,10 @@ static ssize_t ntfs_list_ea(struct ntfs_
+                       break;
+ 
+               if (buffer) {
++                      /* Check if we can use field ea->name */
++                      if (off + ea_size > size)
++                              break;
++
+                       if (ret + ea->name_len + 1 > bytes_per_buffer) {
+                               err = -ERANGE;
+                               goto out;
diff --git a/queue-6.5/fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch b/queue-6.5/fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch

new file mode 100644 (file)

index 0000000..a2be8c9
--- /dev/null
+++ b/queue-6.5/fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch
@@ -0,0 +1,53 @@
+From 1f9b94af923c88539426ed811ae7e9543834a5c5 Mon Sep 17 00:00:00 2001
+From: Ziqi Zhao <astrajoan@yahoo.com>
+Date: Wed, 9 Aug 2023 12:11:18 -0700
+Subject: fs/ntfs3: Fix possible null-pointer dereference in hdr_find_e()
+
+From: Ziqi Zhao <astrajoan@yahoo.com>
+
+commit 1f9b94af923c88539426ed811ae7e9543834a5c5 upstream.
+
+Upon investigation of the C reproducer provided by Syzbot, it seemed
+the reproducer was trying to mount a corrupted NTFS filesystem, then
+issue a rename syscall to some nodes in the filesystem. This can be
+shown by modifying the reproducer to only include the mount syscall,
+and investigating the filesystem by e.g. `ls` and `rm` commands. As a
+result, during the problematic call to `hdr_fine_e`, the `inode` being
+supplied did not go through `indx_init`, hence the `cmp` function
+pointer was never set.
+
+The fix is simply to check whether `cmp` is not set, and return NULL
+if that's the case, in order to be consistent with other error
+scenarios of the `hdr_find_e` method. The rationale behind this patch
+is that:
+
+- We should prevent crashing the kernel even if the mounted filesystem
+  is corrupted. Any syscalls made on the filesystem could return
+  invalid, but the kernel should be able to sustain these calls.
+
+- Only very specific corruption would lead to this bug, so it would be
+  a pretty rare case in actual usage anyways. Therefore, introducing a
+  check to specifically protect against this bug seems appropriate.
+  Because of its rarity, an `unlikely` clause is used to wrap around
+  this nullity check.
+
+Reported-by: syzbot+60cf892fc31d1f4358fc@syzkaller.appspotmail.com
+Signed-off-by: Ziqi Zhao <astrajoan@yahoo.com>
+Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ntfs3/index.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/ntfs3/index.c
++++ b/fs/ntfs3/index.c
+@@ -729,6 +729,9 @@ static struct NTFS_DE *hdr_find_e(const
+       u32 total = le32_to_cpu(hdr->total);
+       u16 offs[128];
+ 
++      if (unlikely(!cmp))
++              return NULL;
++
+ fill_table:
+       if (end > total)
+               return NULL;
diff --git a/queue-6.5/fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch b/queue-6.5/fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch

new file mode 100644 (file)

index 0000000..1fd5d7f
--- /dev/null
+++ b/queue-6.5/fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch
@@ -0,0 +1,73 @@
+From 91a4b1ee78cb100b19b70f077c247f211110348f Mon Sep 17 00:00:00 2001
+From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Date: Fri, 30 Jun 2023 16:25:25 +0400
+Subject: fs/ntfs3: Fix shift-out-of-bounds in ntfs_fill_super
+
+From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+
+commit 91a4b1ee78cb100b19b70f077c247f211110348f upstream.
+
+Reported-by: syzbot+478c1bf0e6bf4a8f3a04@syzkaller.appspotmail.com
+Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ntfs3/ntfs_fs.h |    2 ++
+ fs/ntfs3/super.c   |   26 ++++++++++++++++++++------
+ 2 files changed, 22 insertions(+), 6 deletions(-)
+
+--- a/fs/ntfs3/ntfs_fs.h
++++ b/fs/ntfs3/ntfs_fs.h
+@@ -42,9 +42,11 @@ enum utf16_endian;
+ #define MINUS_ONE_T                   ((size_t)(-1))
+ /* Biggest MFT / smallest cluster */
+ #define MAXIMUM_BYTES_PER_MFT         4096
++#define MAXIMUM_SHIFT_BYTES_PER_MFT   12
+ #define NTFS_BLOCKS_PER_MFT_RECORD    (MAXIMUM_BYTES_PER_MFT / 512)
+ 
+ #define MAXIMUM_BYTES_PER_INDEX               4096
++#define MAXIMUM_SHIFT_BYTES_PER_INDEX 12
+ #define NTFS_BLOCKS_PER_INODE         (MAXIMUM_BYTES_PER_INDEX / 512)
+ 
+ /* NTFS specific error code when fixup failed. */
+--- a/fs/ntfs3/super.c
++++ b/fs/ntfs3/super.c
+@@ -906,9 +906,17 @@ check_boot:
+               goto out;
+       }
+ 
+-      sbi->record_size = record_size =
+-              boot->record_size < 0 ? 1 << (-boot->record_size) :
+-                                      (u32)boot->record_size << cluster_bits;
++      if (boot->record_size >= 0) {
++              record_size = (u32)boot->record_size << cluster_bits;
++      } else if (-boot->record_size <= MAXIMUM_SHIFT_BYTES_PER_MFT) {
++              record_size = 1u << (-boot->record_size);
++      } else {
++              ntfs_err(sb, "%s: invalid record size %d.", hint,
++                       boot->record_size);
++              goto out;
++      }
++
++      sbi->record_size = record_size;
+       sbi->record_bits = blksize_bits(record_size);
+       sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes
+ 
+@@ -925,9 +933,15 @@ check_boot:
+               goto out;
+       }
+ 
+-      sbi->index_size = boot->index_size < 0 ?
+-                                1u << (-boot->index_size) :
+-                                (u32)boot->index_size << cluster_bits;
++      if (boot->index_size >= 0) {
++              sbi->index_size = (u32)boot->index_size << cluster_bits;
++      } else if (-boot->index_size <= MAXIMUM_SHIFT_BYTES_PER_INDEX) {
++              sbi->index_size = 1u << (-boot->index_size);
++      } else {
++              ntfs_err(sb, "%s: invalid index size %d.", hint,
++                       boot->index_size);
++              goto out;
++      }
+ 
+       /* Check index record size. */
+       if (sbi->index_size < SECTOR_SIZE || !is_power_of_2(sbi->index_size)) {
diff --git a/queue-6.5/kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch b/queue-6.5/kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch

new file mode 100644 (file)

index 0000000..37338e2
--- /dev/null
+++ b/queue-6.5/kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch
@@ -0,0 +1,115 @@
+From 8647c52e9504c99752a39f1d44f6268f82c40a5c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Sep 2023 17:19:53 -0700
+Subject: KVM: x86: Constrain guest-supported xfeatures only at KVM_GET_XSAVE{2}
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 8647c52e9504c99752a39f1d44f6268f82c40a5c upstream.
+
+Mask off xfeatures that aren't exposed to the guest only when saving guest
+state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly.
+Preserving the maximal set of xfeatures in user_xfeatures restores KVM's
+ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu:
+Limit guest user_xfeatures to supported bits of XCR0") allowed userspace
+to load xfeatures that are supported by the host, irrespective of what
+xfeatures are exposed to the guest.
+
+There is no known use case where userspace *intentionally* loads xfeatures
+that aren't exposed to the guest, but the bug fixed by commit ad856280ddea
+was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't
+exposed to the guest, e.g. would lead to userspace unintentionally loading
+guest-unsupported xfeatures when live migrating a VM.
+
+Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially
+problematic for QEMU-based setups, as QEMU has a bug where instead of
+terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops
+loading guest state, i.e. resumes the guest after live migration with
+incomplete guest state, and ultimately results in guest data corruption.
+
+Note, letting userspace restore all host-supported xfeatures does not fix
+setups where a VM is migrated from a host *without* commit ad856280ddea,
+to a target with a subset of host-supported xfeatures.  However there is
+no way to safely address that scenario, e.g. KVM could silently drop the
+unsupported features, but that would be a clear violation of KVM's ABI and
+so would require userspace to opt-in, at which point userspace could
+simply be updated to sanitize the to-be-loaded XSAVE state.
+
+Reported-by: Tyler Stachecki <stachecki.tyler@gmail.com>
+Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net
+Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0")
+Cc: stable@vger.kernel.org
+Cc: Leonardo Bras <leobras@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Message-Id: <20230928001956.924301-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/xstate.c |    5 +----
+ arch/x86/kvm/cpuid.c         |    8 --------
+ arch/x86/kvm/x86.c           |   18 ++++++++++++++++--
+ 3 files changed, 17 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures
+               fpregs_restore_userregs();
+ 
+       newfps->xfeatures = curfps->xfeatures | xfeatures;
+-
+-      if (!guest_fpu)
+-              newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+-
++      newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+       newfps->xfd = curfps->xfd & ~xfeatures;
+ 
+       /* Do the final updates within the locked region */
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -326,14 +326,6 @@ static void kvm_vcpu_after_set_cpuid(str
+       vcpu->arch.guest_supported_xcr0 =
+               cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+ 
+-      /*
+-       * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
+-       * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
+-       * supported by the host.
+-       */
+-      vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
+-                                                     XFEATURE_MASK_FPSSE;
+-
+       kvm_update_pv_runtime(vcpu);
+ 
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5389,12 +5389,26 @@ static int kvm_vcpu_ioctl_x86_set_debugr
+ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+                                         u8 *state, unsigned int size)
+ {
++      /*
++       * Only copy state for features that are enabled for the guest.  The
++       * state itself isn't problematic, but setting bits in the header for
++       * features that are supported in *this* host but not exposed to the
++       * guest can result in KVM_SET_XSAVE failing when live migrating to a
++       * compatible host without the features that are NOT exposed to the
++       * guest.
++       *
++       * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
++       * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
++       * supported by the host.
++       */
++      u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
++                           XFEATURE_MASK_FPSSE;
++
+       if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+               return;
+ 
+       fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
+-                                     vcpu->arch.guest_fpu.fpstate->user_xfeatures,
+-                                     vcpu->arch.pkru);
++                                     supported_xcr0, vcpu->arch.pkru);
+ }
+ 
+ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
diff --git a/queue-6.5/kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch b/queue-6.5/kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch

new file mode 100644 (file)

index 0000000..83c51d5
--- /dev/null
+++ b/queue-6.5/kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch
@@ -0,0 +1,92 @@
+From b29a2acd36dd7a33c63f260df738fb96baa3d4f8 Mon Sep 17 00:00:00 2001
+From: Roman Kagan <rkagan@amazon.de>
+Date: Thu, 4 May 2023 14:00:42 +0200
+Subject: KVM: x86/pmu: Truncate counter value to allowed width on write
+
+From: Roman Kagan <rkagan@amazon.de>
+
+commit b29a2acd36dd7a33c63f260df738fb96baa3d4f8 upstream.
+
+Performance counters are defined to have width less than 64 bits.  The
+vPMU code maintains the counters in u64 variables but assumes the value
+to fit within the defined width.  However, for Intel non-full-width
+counters (MSR_IA32_PERFCTRx) the value receieved from the guest is
+truncated to 32 bits and then sign-extended to full 64 bits.  If a
+negative value is set, it's sign-extended to 64 bits, but then in
+kvm_pmu_incr_counter() it's incremented, truncated, and compared to the
+previous value for overflow detection.
+
+That previous value is not truncated, so it always evaluates bigger than
+the truncated new one, and a PMI is injected.  If the PMI handler writes
+a negative counter value itself, the vCPU never quits the PMI loop.
+
+Turns out that Linux PMI handler actually does write the counter with
+the value just read with RDPMC, so when no full-width support is exposed
+via MSR_IA32_PERF_CAPABILITIES, and the guest initializes the counter to
+a negative value, it locks up.
+
+This has been observed in the field, for example, when the guest configures
+atop to use perfevents and runs two instances of it simultaneously.
+
+To address the problem, maintain the invariant that the counter value
+always fits in the defined bit width, by truncating the received value
+in the respective set_msr methods.  For better readability, factor the
+out into a helper function, pmc_write_counter(), shared by vmx and svm
+parts.
+
+Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions")
+Cc: stable@vger.kernel.org
+Signed-off-by: Roman Kagan <rkagan@amazon.de>
+Link: https://lore.kernel.org/all/20230504120042.785651-1-rkagan@amazon.de
+Tested-by: Like Xu <likexu@tencent.com>
+[sean: tweak changelog, s/set/write in the helper]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/pmu.h           |    6 ++++++
+ arch/x86/kvm/svm/pmu.c       |    2 +-
+ arch/x86/kvm/vmx/pmu_intel.c |    4 ++--
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/pmu.h
++++ b/arch/x86/kvm/pmu.h
+@@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struc
+       return counter & pmc_bitmask(pmc);
+ }
+ 
++static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
++{
++      pmc->counter += val - pmc_read_counter(pmc);
++      pmc->counter &= pmc_bitmask(pmc);
++}
++
+ static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
+ {
+       if (pmc->perf_event) {
+--- a/arch/x86/kvm/svm/pmu.c
++++ b/arch/x86/kvm/svm/pmu.c
+@@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vc
+       /* MSR_PERFCTRn */
+       pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
+       if (pmc) {
+-              pmc->counter += data - pmc_read_counter(pmc);
++              pmc_write_counter(pmc, data);
+               pmc_update_sample_period(pmc);
+               return 0;
+       }
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -406,11 +406,11 @@ static int intel_pmu_set_msr(struct kvm_
+                       if (!msr_info->host_initiated &&
+                           !(msr & MSR_PMC_FULL_WIDTH_BIT))
+                               data = (s64)(s32)data;
+-                      pmc->counter += data - pmc_read_counter(pmc);
++                      pmc_write_counter(pmc, data);
+                       pmc_update_sample_period(pmc);
+                       break;
+               } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+-                      pmc->counter += data - pmc_read_counter(pmc);
++                      pmc_write_counter(pmc, data);
+                       pmc_update_sample_period(pmc);
+                       break;
+               } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
diff --git a/queue-6.5/mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch b/queue-6.5/mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch

new file mode 100644 (file)

index 0000000..3102cf5
--- /dev/null
+++ b/queue-6.5/mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch
@@ -0,0 +1,73 @@
+From c15cdea517414e0b29a11e0a0e2443d127c9109b Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Fri, 6 Oct 2023 17:39:34 +0100
+Subject: mm: slab: Do not create kmalloc caches smaller than arch_slab_minalign()
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit c15cdea517414e0b29a11e0a0e2443d127c9109b upstream.
+
+Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment
+if DMA bouncing possible") allows architectures with non-coherent DMA to
+define a small ARCH_KMALLOC_MINALIGN (e.g. sizeof(unsigned long long))
+and this has been enabled on arm64. With KASAN_HW_TAGS enabled, however,
+ARCH_SLAB_MINALIGN becomes 16 on arm64 (arch_slab_minalign() dynamically
+selects it since commit d949a8155d13 ("mm: make minimum slab alignment a
+runtime property")). This can lead to a situation where kmalloc-8 caches
+are attempted to be created with a kmem_caches.size aligned to 16. When
+the cache is mergeable, it can lead to kernel warnings like:
+
+sysfs: cannot create duplicate filename '/kernel/slab/:d-0000016'
+CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc1-00001-gda98843cd306-dirty #5
+Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
+Call trace:
+ dump_backtrace+0x90/0xe8
+ show_stack+0x18/0x24
+ dump_stack_lvl+0x48/0x60
+ dump_stack+0x18/0x24
+ sysfs_warn_dup+0x64/0x80
+ sysfs_create_dir_ns+0xe8/0x108
+ kobject_add_internal+0x98/0x264
+ kobject_init_and_add+0x8c/0xd8
+ sysfs_slab_add+0x12c/0x248
+ slab_sysfs_init+0x98/0x14c
+ do_one_initcall+0x6c/0x1b0
+ kernel_init_freeable+0x1c0/0x288
+ kernel_init+0x24/0x1e0
+ ret_from_fork+0x10/0x20
+kobject: kobject_add_internal failed for :d-0000016 with -EEXIST, don't try to register things with the same name in the same directory.
+SLUB: Unable to add boot slab dma-kmalloc-8 to sysfs
+
+Limit the __kmalloc_minalign() return value (used to create the
+kmalloc-* caches) to arch_slab_minalign() so that kmalloc-8 caches are
+skipped when KASAN_HW_TAGS is enabled (both config and runtime).
+
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Fixes: b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible")
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Peter Collingbourne <pcc@google.com>
+Cc: stable@vger.kernel.org # 6.5.x
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slab_common.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -864,11 +864,13 @@ void __init setup_kmalloc_cache_index_ta
+ 
+ static unsigned int __kmalloc_minalign(void)
+ {
++      unsigned int minalign = dma_get_cache_alignment();
++
+ #ifdef CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC
+       if (io_tlb_default_mem.nslabs)
+-              return ARCH_KMALLOC_MINALIGN;
++              minalign = ARCH_KMALLOC_MINALIGN;
+ #endif
+-      return dma_get_cache_alignment();
++      return max(minalign, arch_slab_minalign());
+ }
+ 
+ void __init
diff --git a/queue-6.5/mptcp-more-conservative-check-for-zero-probes.patch b/queue-6.5/mptcp-more-conservative-check-for-zero-probes.patch

new file mode 100644 (file)

index 0000000..748d581
--- /dev/null
+++ b/queue-6.5/mptcp-more-conservative-check-for-zero-probes.patch
@@ -0,0 +1,93 @@
+From 72377ab2d671befd6390a1d5677f5cca61235b65 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Wed, 18 Oct 2023 11:23:54 -0700
+Subject: mptcp: more conservative check for zero probes
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 72377ab2d671befd6390a1d5677f5cca61235b65 upstream.
+
+Christoph reported that the MPTCP protocol can find the subflow-level
+write queue unexpectedly not empty while crafting a zero-window probe,
+hitting a warning:
+
+------------[ cut here ]------------
+WARNING: CPU: 0 PID: 188 at net/mptcp/protocol.c:1312 mptcp_sendmsg_frag+0xc06/0xe70
+Modules linked in:
+CPU: 0 PID: 188 Comm: kworker/0:2 Not tainted 6.6.0-rc2-g1176aa719d7a #47
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+Workqueue: events mptcp_worker
+RIP: 0010:mptcp_sendmsg_frag+0xc06/0xe70 net/mptcp/protocol.c:1312
+RAX: 47d0530de347ff6a RBX: 47d0530de347ff6b RCX: ffff8881015d3c00
+RDX: ffff8881015d3c00 RSI: 47d0530de347ff6b RDI: 47d0530de347ff6b
+RBP: 47d0530de347ff6b R08: ffffffff8243c6a8 R09: ffffffff82042d9c
+R10: 0000000000000002 R11: ffffffff82056850 R12: ffff88812a13d580
+R13: 0000000000000001 R14: ffff88812b375e50 R15: ffff88812bbf3200
+FS:  0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000695118 CR3: 0000000115dfc001 CR4: 0000000000170ef0
+Call Trace:
+ <TASK>
+ __subflow_push_pending+0xa4/0x420 net/mptcp/protocol.c:1545
+ __mptcp_push_pending+0x128/0x3b0 net/mptcp/protocol.c:1614
+ mptcp_release_cb+0x218/0x5b0 net/mptcp/protocol.c:3391
+ release_sock+0xf6/0x100 net/core/sock.c:3521
+ mptcp_worker+0x6e8/0x8f0 net/mptcp/protocol.c:2746
+ process_scheduled_works+0x341/0x690 kernel/workqueue.c:2630
+ worker_thread+0x3a7/0x610 kernel/workqueue.c:2784
+ kthread+0x143/0x180 kernel/kthread.c:388
+ ret_from_fork+0x4d/0x60 arch/x86/kernel/process.c:147
+ ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:304
+ </TASK>
+
+The root cause of the issue is that expectations are wrong: e.g. due
+to MPTCP-level re-injection we can hit the critical condition.
+
+Explicitly avoid the zero-window probe when the subflow write queue
+is not empty and drop the related warnings.
+
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/444
+Fixes: f70cad1085d1 ("mptcp: stop relying on tcp_tx_skb_cache")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <martineau@kernel.org>
+Link: https://lore.kernel.org/r/20231018-send-net-20231018-v1-3-17ecb002e41d@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |    8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1300,7 +1300,7 @@ alloc_skb:
+       if (copy == 0) {
+               u64 snd_una = READ_ONCE(msk->snd_una);
+ 
+-              if (snd_una != msk->snd_nxt) {
++              if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) {
+                       tcp_remove_empty_skb(ssk);
+                       return 0;
+               }
+@@ -1308,11 +1308,6 @@ alloc_skb:
+               zero_window_probe = true;
+               data_seq = snd_una - 1;
+               copy = 1;
+-
+-              /* all mptcp-level data is acked, no skbs should be present into the
+-               * ssk write queue
+-               */
+-              WARN_ON_ONCE(reuse_skb);
+       }
+ 
+       copy = min_t(size_t, copy, info->limit - info->sent);
+@@ -1341,7 +1336,6 @@ alloc_skb:
+       if (reuse_skb) {
+               TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
+               mpext->data_len += copy;
+-              WARN_ON_ONCE(zero_window_probe);
+               goto out;
+       }
+ 
diff --git a/queue-6.5/netfilter-nft_payload-fix-wrong-mac-header-matching.patch b/queue-6.5/netfilter-nft_payload-fix-wrong-mac-header-matching.patch

new file mode 100644 (file)

index 0000000..b52c7d2
--- /dev/null
+++ b/queue-6.5/netfilter-nft_payload-fix-wrong-mac-header-matching.patch
@@ -0,0 +1,37 @@
+From d351c1ea2de3e36e608fc355d8ae7d0cc80e6cd6 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Sun, 8 Oct 2023 19:36:53 +0200
+Subject: netfilter: nft_payload: fix wrong mac header matching
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Florian Westphal <fw@strlen.de>
+
+commit d351c1ea2de3e36e608fc355d8ae7d0cc80e6cd6 upstream.
+
+mcast packets get looped back to the local machine.
+Such packets have a 0-length mac header, we should treat
+this like "mac header not set" and abort rule evaluation.
+
+As-is, we just copy data from the network header instead.
+
+Fixes: 96518518cc41 ("netfilter: add nftables")
+Reported-by: Blažej Krajňák <krajnak@levonet.sk>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_payload.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netfilter/nft_payload.c
++++ b/net/netfilter/nft_payload.c
+@@ -179,7 +179,7 @@ void nft_payload_eval(const struct nft_e
+ 
+       switch (priv->base) {
+       case NFT_PAYLOAD_LL_HEADER:
+-              if (!skb_mac_header_was_set(skb))
++              if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0)
+                       goto err;
+ 
+               if (skb_vlan_tag_present(skb) &&
diff --git a/queue-6.5/revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch b/queue-6.5/revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch

new file mode 100644 (file)

index 0000000..d5fe59f
--- /dev/null
+++ b/queue-6.5/revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch
@@ -0,0 +1,306 @@
+From 1db34aa58d80988f5ee99d2fd9d8f7489c3b0681 Mon Sep 17 00:00:00 2001
+From: Bagas Sanjaya <bagasdotme@gmail.com>
+Date: Tue, 17 Oct 2023 15:08:12 +0700
+Subject: Revert "net: wwan: iosm: enable runtime pm support for 7560"
+
+From: Bagas Sanjaya <bagasdotme@gmail.com>
+
+commit 1db34aa58d80988f5ee99d2fd9d8f7489c3b0681 upstream.
+
+Runtime power management support breaks Intel LTE modem where dmesg dump
+showes timeout errors:
+
+```
+[   72.027442] iosm 0000:01:00.0: msg timeout
+[   72.531638] iosm 0000:01:00.0: msg timeout
+[   73.035414] iosm 0000:01:00.0: msg timeout
+[   73.540359] iosm 0000:01:00.0: msg timeout
+```
+
+Furthermore, when shutting down with `poweroff` and modem attached, the
+system rebooted instead of powering down as expected. The modem works
+again only after power cycling.
+
+Revert runtime power management support for IOSM driver as introduced by
+commit e4f5073d53be6c ("net: wwan: iosm: enable runtime pm support for
+7560").
+
+Fixes: e4f5073d53be ("net: wwan: iosm: enable runtime pm support for 7560")
+Reported-by: Martin <mwolf@adiumentum.com>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217996
+Link: https://lore.kernel.org/r/267abf02-4b60-4a2e-92cd-709e3da6f7d3@gmail.com/
+Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
+Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wwan/iosm/iosm_ipc_imem.c  | 17 -----------------
+ drivers/net/wwan/iosm/iosm_ipc_imem.h  |  2 --
+ drivers/net/wwan/iosm/iosm_ipc_pcie.c  |  4 +---
+ drivers/net/wwan/iosm/iosm_ipc_port.c  | 17 +----------------
+ drivers/net/wwan/iosm/iosm_ipc_trace.c |  8 --------
+ drivers/net/wwan/iosm/iosm_ipc_wwan.c  | 21 ++-------------------
+ 6 files changed, 4 insertions(+), 65 deletions(-)
+
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c
+index 635301d677e1..829515a601b3 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_imem.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c
+@@ -4,7 +4,6 @@
+  */
+ 
+ #include <linux/delay.h>
+-#include <linux/pm_runtime.h>
+ 
+ #include "iosm_ipc_chnl_cfg.h"
+ #include "iosm_ipc_devlink.h"
+@@ -632,11 +631,6 @@ static void ipc_imem_run_state_worker(struct work_struct *instance)
+       /* Complete all memory stores after setting bit */
+       smp_mb__after_atomic();
+ 
+-      if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7560_ID) {
+-              pm_runtime_mark_last_busy(ipc_imem->dev);
+-              pm_runtime_put_autosuspend(ipc_imem->dev);
+-      }
+-
+       return;
+ 
+ err_ipc_mux_deinit:
+@@ -1240,7 +1234,6 @@ void ipc_imem_cleanup(struct iosm_imem *ipc_imem)
+ 
+       /* forward MDM_NOT_READY to listeners */
+       ipc_uevent_send(ipc_imem->dev, UEVENT_MDM_NOT_READY);
+-      pm_runtime_get_sync(ipc_imem->dev);
+ 
+       hrtimer_cancel(&ipc_imem->td_alloc_timer);
+       hrtimer_cancel(&ipc_imem->tdupdate_timer);
+@@ -1426,16 +1419,6 @@ struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id,
+ 
+               set_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag);
+       }
+-
+-      if (!pm_runtime_enabled(ipc_imem->dev))
+-              pm_runtime_enable(ipc_imem->dev);
+-
+-      pm_runtime_set_autosuspend_delay(ipc_imem->dev,
+-                                       IPC_MEM_AUTO_SUSPEND_DELAY_MS);
+-      pm_runtime_use_autosuspend(ipc_imem->dev);
+-      pm_runtime_allow(ipc_imem->dev);
+-      pm_runtime_mark_last_busy(ipc_imem->dev);
+-
+       return ipc_imem;
+ devlink_channel_fail:
+       ipc_devlink_deinit(ipc_imem->ipc_devlink);
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h
+index 0144b45e2afb..5664ac507c90 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_imem.h
++++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h
+@@ -103,8 +103,6 @@ struct ipc_chnl_cfg;
+ #define FULLY_FUNCTIONAL 0
+ #define IOSM_DEVLINK_INIT 1
+ 
+-#define IPC_MEM_AUTO_SUSPEND_DELAY_MS 5000
+-
+ /* List of the supported UL/DL pipes. */
+ enum ipc_mem_pipes {
+       IPC_MEM_PIPE_0 = 0,
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
+index 3a259c9abefd..04517bd3325a 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
+@@ -6,7 +6,6 @@
+ #include <linux/acpi.h>
+ #include <linux/bitfield.h>
+ #include <linux/module.h>
+-#include <linux/pm_runtime.h>
+ #include <net/rtnetlink.h>
+ 
+ #include "iosm_ipc_imem.h"
+@@ -438,8 +437,7 @@ static int __maybe_unused ipc_pcie_resume_cb(struct device *dev)
+       return 0;
+ }
+ 
+-static DEFINE_RUNTIME_DEV_PM_OPS(iosm_ipc_pm, ipc_pcie_suspend_cb,
+-                               ipc_pcie_resume_cb, NULL);
++static SIMPLE_DEV_PM_OPS(iosm_ipc_pm, ipc_pcie_suspend_cb, ipc_pcie_resume_cb);
+ 
+ static struct pci_driver iosm_ipc_driver = {
+       .name = KBUILD_MODNAME,
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_port.c b/drivers/net/wwan/iosm/iosm_ipc_port.c
+index 2ba1ddca3945..5d5b4183e14a 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_port.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_port.c
+@@ -3,8 +3,6 @@
+  * Copyright (C) 2020-21 Intel Corporation.
+  */
+ 
+-#include <linux/pm_runtime.h>
+-
+ #include "iosm_ipc_chnl_cfg.h"
+ #include "iosm_ipc_imem_ops.h"
+ #include "iosm_ipc_port.h"
+@@ -15,16 +13,12 @@ static int ipc_port_ctrl_start(struct wwan_port *port)
+       struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port);
+       int ret = 0;
+ 
+-      pm_runtime_get_sync(ipc_port->ipc_imem->dev);
+       ipc_port->channel = ipc_imem_sys_port_open(ipc_port->ipc_imem,
+                                                  ipc_port->chl_id,
+                                                  IPC_HP_CDEV_OPEN);
+       if (!ipc_port->channel)
+               ret = -EIO;
+ 
+-      pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev);
+-      pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev);
+-
+       return ret;
+ }
+ 
+@@ -33,24 +27,15 @@ static void ipc_port_ctrl_stop(struct wwan_port *port)
+ {
+       struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port);
+ 
+-      pm_runtime_get_sync(ipc_port->ipc_imem->dev);
+       ipc_imem_sys_port_close(ipc_port->ipc_imem, ipc_port->channel);
+-      pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev);
+-      pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev);
+ }
+ 
+ /* transfer control data to modem */
+ static int ipc_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb)
+ {
+       struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port);
+-      int ret;
+ 
+-      pm_runtime_get_sync(ipc_port->ipc_imem->dev);
+-      ret = ipc_imem_sys_cdev_write(ipc_port, skb);
+-      pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev);
+-      pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev);
+-
+-      return ret;
++      return ipc_imem_sys_cdev_write(ipc_port, skb);
+ }
+ 
+ static const struct wwan_port_ops ipc_wwan_ctrl_ops = {
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_trace.c b/drivers/net/wwan/iosm/iosm_ipc_trace.c
+index 4368373797b6..eeecfa3d10c5 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_trace.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_trace.c
+@@ -3,9 +3,7 @@
+  * Copyright (C) 2020-2021 Intel Corporation.
+  */
+ 
+-#include <linux/pm_runtime.h>
+ #include <linux/wwan.h>
+-
+ #include "iosm_ipc_trace.h"
+ 
+ /* sub buffer size and number of sub buffer */
+@@ -99,8 +97,6 @@ static ssize_t ipc_trace_ctrl_file_write(struct file *filp,
+       if (ret)
+               return ret;
+ 
+-      pm_runtime_get_sync(ipc_trace->ipc_imem->dev);
+-
+       mutex_lock(&ipc_trace->trc_mutex);
+       if (val == TRACE_ENABLE && ipc_trace->mode != TRACE_ENABLE) {
+               ipc_trace->channel = ipc_imem_sys_port_open(ipc_trace->ipc_imem,
+@@ -121,10 +117,6 @@ static ssize_t ipc_trace_ctrl_file_write(struct file *filp,
+       ret = count;
+ unlock:
+       mutex_unlock(&ipc_trace->trc_mutex);
+-
+-      pm_runtime_mark_last_busy(ipc_trace->ipc_imem->dev);
+-      pm_runtime_put_autosuspend(ipc_trace->ipc_imem->dev);
+-
+       return ret;
+ }
+ 
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+index 93d17de08786..ff747fc79aaf 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+@@ -6,7 +6,6 @@
+ #include <linux/etherdevice.h>
+ #include <linux/if_arp.h>
+ #include <linux/if_link.h>
+-#include <linux/pm_runtime.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/wwan.h>
+ #include <net/pkt_sched.h>
+@@ -52,13 +51,11 @@ static int ipc_wwan_link_open(struct net_device *netdev)
+       struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(netdev);
+       struct iosm_wwan *ipc_wwan = priv->ipc_wwan;
+       int if_id = priv->if_id;
+-      int ret = 0;
+ 
+       if (if_id < IP_MUX_SESSION_START ||
+           if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist))
+               return -EINVAL;
+ 
+-      pm_runtime_get_sync(ipc_wwan->ipc_imem->dev);
+       /* get channel id */
+       priv->ch_id = ipc_imem_sys_wwan_open(ipc_wwan->ipc_imem, if_id);
+ 
+@@ -66,8 +63,7 @@ static int ipc_wwan_link_open(struct net_device *netdev)
+               dev_err(ipc_wwan->dev,
+                       "cannot connect wwan0 & id %d to the IPC mem layer",
+                       if_id);
+-              ret = -ENODEV;
+-              goto err_out;
++              return -ENODEV;
+       }
+ 
+       /* enable tx path, DL data may follow */
+@@ -76,11 +72,7 @@ static int ipc_wwan_link_open(struct net_device *netdev)
+       dev_dbg(ipc_wwan->dev, "Channel id %d allocated to if_id %d",
+               priv->ch_id, priv->if_id);
+ 
+-err_out:
+-      pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev);
+-      pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev);
+-
+-      return ret;
++      return 0;
+ }
+ 
+ /* Bring-down the wwan net link */
+@@ -90,12 +82,9 @@ static int ipc_wwan_link_stop(struct net_device *netdev)
+ 
+       netif_stop_queue(netdev);
+ 
+-      pm_runtime_get_sync(priv->ipc_wwan->ipc_imem->dev);
+       ipc_imem_sys_wwan_close(priv->ipc_wwan->ipc_imem, priv->if_id,
+                               priv->ch_id);
+       priv->ch_id = -1;
+-      pm_runtime_mark_last_busy(priv->ipc_wwan->ipc_imem->dev);
+-      pm_runtime_put_autosuspend(priv->ipc_wwan->ipc_imem->dev);
+ 
+       return 0;
+ }
+@@ -117,7 +106,6 @@ static netdev_tx_t ipc_wwan_link_transmit(struct sk_buff *skb,
+           if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist))
+               return -EINVAL;
+ 
+-      pm_runtime_get(ipc_wwan->ipc_imem->dev);
+       /* Send the SKB to device for transmission */
+       ret = ipc_imem_sys_wwan_transmit(ipc_wwan->ipc_imem,
+                                        if_id, priv->ch_id, skb);
+@@ -131,14 +119,9 @@ static netdev_tx_t ipc_wwan_link_transmit(struct sk_buff *skb,
+               ret = NETDEV_TX_BUSY;
+               dev_err(ipc_wwan->dev, "unable to push packets");
+       } else {
+-              pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev);
+-              pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev);
+               goto exit;
+       }
+ 
+-      pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev);
+-      pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev);
+-
+       return ret;
+ 
+ exit:
+-- 
+2.42.0
+
diff --git a/queue-6.5/selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch b/queue-6.5/selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch

new file mode 100644 (file)

index 0000000..fc88726
--- /dev/null
+++ b/queue-6.5/selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch
@@ -0,0 +1,133 @@
+From 2cfaa8b3b7aece3c7b13dd10db20dcea65875692 Mon Sep 17 00:00:00 2001
+From: Matthieu Baerts <matttbe@kernel.org>
+Date: Wed, 18 Oct 2023 11:23:56 -0700
+Subject: selftests: mptcp: join: no RST when rm subflow/addr
+
+From: Matthieu Baerts <matttbe@kernel.org>
+
+commit 2cfaa8b3b7aece3c7b13dd10db20dcea65875692 upstream.
+
+Recently, we noticed that some RST were wrongly generated when removing
+the initial subflow.
+
+This patch makes sure RST are not sent when removing any subflows or any
+addresses.
+
+Fixes: c2b2ae3925b6 ("mptcp: handle correctly disconnect() failures")
+Cc: stable@vger.kernel.org
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: Mat Martineau <martineau@kernel.org>
+Link: https://lore.kernel.org/r/20231018-send-net-20231018-v1-5-17ecb002e41d@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -2282,6 +2282,7 @@ remove_tests()
+               chk_join_nr 1 1 1
+               chk_rm_tx_nr 1
+               chk_rm_nr 1 1
++              chk_rst_nr 0 0
+       fi
+ 
+       # multiple subflows, remove
+@@ -2294,6 +2295,7 @@ remove_tests()
+                       run_tests $ns1 $ns2 10.0.1.1 slow
+               chk_join_nr 2 2 2
+               chk_rm_nr 2 2
++              chk_rst_nr 0 0
+       fi
+ 
+       # single address, remove
+@@ -2306,6 +2308,7 @@ remove_tests()
+               chk_join_nr 1 1 1
+               chk_add_nr 1 1
+               chk_rm_nr 1 1 invert
++              chk_rst_nr 0 0
+       fi
+ 
+       # subflow and signal, remove
+@@ -2319,6 +2322,7 @@ remove_tests()
+               chk_join_nr 2 2 2
+               chk_add_nr 1 1
+               chk_rm_nr 1 1
++              chk_rst_nr 0 0
+       fi
+ 
+       # subflows and signal, remove
+@@ -2333,6 +2337,7 @@ remove_tests()
+               chk_join_nr 3 3 3
+               chk_add_nr 1 1
+               chk_rm_nr 2 2
++              chk_rst_nr 0 0
+       fi
+ 
+       # addresses remove
+@@ -2347,6 +2352,7 @@ remove_tests()
+               chk_join_nr 3 3 3
+               chk_add_nr 3 3
+               chk_rm_nr 3 3 invert
++              chk_rst_nr 0 0
+       fi
+ 
+       # invalid addresses remove
+@@ -2361,6 +2367,7 @@ remove_tests()
+               chk_join_nr 1 1 1
+               chk_add_nr 3 3
+               chk_rm_nr 3 1 invert
++              chk_rst_nr 0 0
+       fi
+ 
+       # subflows and signal, flush
+@@ -2375,6 +2382,7 @@ remove_tests()
+               chk_join_nr 3 3 3
+               chk_add_nr 1 1
+               chk_rm_nr 1 3 invert simult
++              chk_rst_nr 0 0
+       fi
+ 
+       # subflows flush
+@@ -2394,6 +2402,7 @@ remove_tests()
+               else
+                       chk_rm_nr 3 3
+               fi
++              chk_rst_nr 0 0
+       fi
+ 
+       # addresses flush
+@@ -2408,6 +2417,7 @@ remove_tests()
+               chk_join_nr 3 3 3
+               chk_add_nr 3 3
+               chk_rm_nr 3 3 invert simult
++              chk_rst_nr 0 0
+       fi
+ 
+       # invalid addresses flush
+@@ -2422,6 +2432,7 @@ remove_tests()
+               chk_join_nr 1 1 1
+               chk_add_nr 3 3
+               chk_rm_nr 3 1 invert
++              chk_rst_nr 0 0
+       fi
+ 
+       # remove id 0 subflow
+@@ -2433,6 +2444,7 @@ remove_tests()
+                       run_tests $ns1 $ns2 10.0.1.1 slow
+               chk_join_nr 1 1 1
+               chk_rm_nr 1 1
++              chk_rst_nr 0 0
+       fi
+ 
+       # remove id 0 address
+@@ -2445,6 +2457,7 @@ remove_tests()
+               chk_join_nr 1 1 1
+               chk_add_nr 1 1
+               chk_rm_nr 1 1 invert
++              chk_rst_nr 0 0 invert
+       fi
+ }
+ 
diff --git a/queue-6.5/series b/queue-6.5/series

index f1aa1afad5ddf85566aebc909b3db6439f767743..22465016e0bea7c9f5ab847e339994ce8e07139c 100644 (file)
--- a/queue-6.5/series
+++ b/queue-6.5/series
@@ -13,3 +13,24 @@ btrfs-fix-stripe-length-calculation-for-non-zoned-data-chunk-allocation.patch
  nfc-nci-fix-possible-null-pointer-dereference-in-send_acknowledge.patch
  regmap-fix-null-deref-on-lookup.patch
  kvm-x86-mask-lvtpc-when-handling-a-pmi.patch
+x86-sev-disable-mmio-emulation-from-user-mode.patch
+x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch
+x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch
+x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch
+kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch
+kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch
+x86-kvm-svm-always-update-the-x2avic-msr-interception.patch
+x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch
+x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch
+audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch
+tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch
+mptcp-more-conservative-check-for-zero-probes.patch
+selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch
+mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch
+fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch
+fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch
+fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch
+fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch
+fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch
+revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch
+netfilter-nft_payload-fix-wrong-mac-header-matching.patch
diff --git a/queue-6.5/tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch b/queue-6.5/tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch

new file mode 100644 (file)

index 0000000..8ed4c4a
--- /dev/null
+++ b/queue-6.5/tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch
@@ -0,0 +1,47 @@
+From 6db8a37dfc541e059851652cfd4f0bb13b8ff6af Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Wed, 18 Oct 2023 11:23:53 -0700
+Subject: tcp: check mptcp-level constraints for backlog coalescing
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 6db8a37dfc541e059851652cfd4f0bb13b8ff6af upstream.
+
+The MPTCP protocol can acquire the subflow-level socket lock and
+cause the tcp backlog usage. When inserting new skbs into the
+backlog, the stack will try to coalesce them.
+
+Currently, we have no check in place to ensure that such coalescing
+will respect the MPTCP-level DSS, and that may cause data stream
+corruption, as reported by Christoph.
+
+Address the issue by adding the relevant admission check for coalescing
+in tcp_add_backlog().
+
+Note the issue is not easy to reproduce, as the MPTCP protocol tries
+hard to avoid acquiring the subflow-level socket lock.
+
+Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/420
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <martineau@kernel.org>
+Link: https://lore.kernel.org/r/20231018-send-net-20231018-v1-2-17ecb002e41d@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1869,6 +1869,7 @@ bool tcp_add_backlog(struct sock *sk, st
+ #ifdef CONFIG_TLS_DEVICE
+           tail->decrypted != skb->decrypted ||
+ #endif
++          !mptcp_skb_can_collapse(tail, skb) ||
+           thtail->doff != th->doff ||
+           memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
+               goto no_coalesce;
diff --git a/queue-6.5/x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch b/queue-6.5/x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch

new file mode 100644 (file)

index 0000000..0b2c235
--- /dev/null
+++ b/queue-6.5/x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch
@@ -0,0 +1,156 @@
+From 18164f66e6c59fda15c198b371fa008431efdb22 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Sep 2023 17:19:52 -0700
+Subject: x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 18164f66e6c59fda15c198b371fa008431efdb22 upstream.
+
+Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can
+constrain which xfeatures are saved into the userspace buffer without
+having to modify the user_xfeatures field in KVM's guest_fpu state.
+
+KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to
+guest must not show up in the effective xstate_bv field of the buffer.
+Saving only the guest-supported xfeatures allows userspace to load the
+saved state on a different host with a fewer xfeatures, so long as the
+target host supports the xfeatures that are exposed to the guest.
+
+KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to
+the set of guest-supported xfeatures, but doing so broke KVM's historical
+ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that
+are supported by the *host*.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230928001956.924301-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/fpu/api.h |    3 ++-
+ arch/x86/kernel/fpu/core.c     |    5 +++--
+ arch/x86/kernel/fpu/xstate.c   |    7 +++++--
+ arch/x86/kernel/fpu/xstate.h   |    3 ++-
+ arch/x86/kvm/x86.c             |   21 +++++++++------------
+ 5 files changed, 21 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/include/asm/fpu/api.h
++++ b/arch/x86/include/asm/fpu/api.h
+@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd(
+ static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
+ #endif
+ 
+-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
++extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
++                                         unsigned int size, u64 xfeatures, u32 pkru);
+ extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
+ 
+ static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_gues
+ EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
+ 
+ void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+-                                  unsigned int size, u32 pkru)
++                                  unsigned int size, u64 xfeatures, u32 pkru)
+ {
+       struct fpstate *kstate = gfpu->fpstate;
+       union fpregs_state *ustate = buf;
+       struct membuf mb = { .p = buf, .left = size };
+ 
+       if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+-              __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
++              __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
++                                        XSTATE_COPY_XSAVE);
+       } else {
+               memcpy(&ustate->fxsave, &kstate->regs.fxsave,
+                      sizeof(ustate->fxsave));
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstat
+  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
+  * @to:               membuf descriptor
+  * @fpstate:  The fpstate buffer from which to copy
++ * @xfeatures:        The mask of xfeatures to save (XSAVE mode only)
+  * @pkru_val: The PKRU value to store in the PKRU component
+  * @copy_mode:        The requested copy mode
+  *
+@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstat
+  * It supports partial copy but @to.pos always starts from zero.
+  */
+ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+-                             u32 pkru_val, enum xstate_copy_mode copy_mode)
++                             u64 xfeatures, u32 pkru_val,
++                             enum xstate_copy_mode copy_mode)
+ {
+       const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
+       struct xregs_state *xinit = &init_fpstate.regs.xsave;
+@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct me
+               break;
+ 
+       case XSTATE_COPY_XSAVE:
+-              header.xfeatures &= fpstate->user_xfeatures;
++              header.xfeatures &= fpstate->user_xfeatures & xfeatures;
+               break;
+       }
+ 
+@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct memb
+                            enum xstate_copy_mode copy_mode)
+ {
+       __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
++                                tsk->thread.fpu.fpstate->user_xfeatures,
+                                 tsk->thread.pkru, copy_mode);
+ }
+ 
+--- a/arch/x86/kernel/fpu/xstate.h
++++ b/arch/x86/kernel/fpu/xstate.h
+@@ -43,7 +43,8 @@ enum xstate_copy_mode {
+ 
+ struct membuf;
+ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+-                                    u32 pkru_val, enum xstate_copy_mode copy_mode);
++                                    u64 xfeatures, u32 pkru_val,
++                                    enum xstate_copy_mode copy_mode);
+ extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+                                   enum xstate_copy_mode mode);
+ extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5385,26 +5385,23 @@ static int kvm_vcpu_ioctl_x86_set_debugr
+       return 0;
+ }
+ 
+-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+-                                       struct kvm_xsave *guest_xsave)
++
++static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
++                                        u8 *state, unsigned int size)
+ {
+       if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+               return;
+ 
+-      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+-                                     guest_xsave->region,
+-                                     sizeof(guest_xsave->region),
++      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
++                                     vcpu->arch.guest_fpu.fpstate->user_xfeatures,
+                                      vcpu->arch.pkru);
+ }
+ 
+-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+-                                        u8 *state, unsigned int size)
++static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
++                                       struct kvm_xsave *guest_xsave)
+ {
+-      if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+-              return;
+-
+-      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+-                                     state, size, vcpu->arch.pkru);
++      return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
++                                           sizeof(guest_xsave->region));
+ }
+ 
+ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
diff --git a/queue-6.5/x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch b/queue-6.5/x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch

new file mode 100644 (file)

index 0000000..5680535
--- /dev/null
+++ b/queue-6.5/x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch
@@ -0,0 +1,61 @@
+From 2dcf37abf9d3aab7f975002d29fc7c17272def38 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 28 Sep 2023 20:33:52 +0300
+Subject: x86: KVM: SVM: add support for Invalid IPI Vector interception
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 2dcf37abf9d3aab7f975002d29fc7c17272def38 upstream.
+
+In later revisions of AMD's APM, there is a new 'incomplete IPI' exit code:
+
+"Invalid IPI Vector - The vector for the specified IPI was set to an
+illegal value (VEC < 16)"
+
+Note that tests on Zen2 machine show that this VM exit doesn't happen and
+instead AVIC just does nothing.
+
+Add support for this exit code by doing nothing, instead of filling
+the kernel log with errors.
+
+Also replace an unthrottled 'pr_err()' if another unknown incomplete
+IPI exit happens with vcpu_unimpl()
+
+(e.g in case AMD adds yet another 'Invalid IPI' exit reason)
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230928173354.217464-3-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/svm.h |    1 +
+ arch/x86/kvm/svm/avic.c    |    5 ++++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/svm.h
++++ b/arch/x86/include/asm/svm.h
+@@ -268,6 +268,7 @@ enum avic_ipi_failure_cause {
+       AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+       AVIC_IPI_FAILURE_INVALID_TARGET,
+       AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
++      AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
+ };
+ 
+ #define AVIC_PHYSICAL_MAX_INDEX_MASK  GENMASK_ULL(8, 0)
+--- a/arch/x86/kvm/svm/avic.c
++++ b/arch/x86/kvm/svm/avic.c
+@@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(str
+       case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
+               WARN_ONCE(1, "Invalid backing page\n");
+               break;
++      case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR:
++              /* Invalid IPI with vector < 16 */
++              break;
+       default:
+-              pr_err("Unknown IPI interception\n");
++              vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n");
+       }
+ 
+       return 1;
diff --git a/queue-6.5/x86-kvm-svm-always-update-the-x2avic-msr-interception.patch b/queue-6.5/x86-kvm-svm-always-update-the-x2avic-msr-interception.patch

new file mode 100644 (file)

index 0000000..40d5371
--- /dev/null
+++ b/queue-6.5/x86-kvm-svm-always-update-the-x2avic-msr-interception.patch
@@ -0,0 +1,55 @@
+From b65235f6e102354ccafda601eaa1c5bef5284d21 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 28 Sep 2023 20:33:51 +0300
+Subject: x86: KVM: SVM: always update the x2avic msr interception
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit b65235f6e102354ccafda601eaa1c5bef5284d21 upstream.
+
+The following problem exists since x2avic was enabled in the KVM:
+
+svm_set_x2apic_msr_interception is called to enable the interception of
+the x2apic msrs.
+
+In particular it is called at the moment the guest resets its apic.
+
+Assuming that the guest's apic was in x2apic mode, the reset will bring
+it back to the xapic mode.
+
+The svm_set_x2apic_msr_interception however has an erroneous check for
+'!apic_x2apic_mode()' which prevents it from doing anything in this case.
+
+As a result of this, all x2apic msrs are left unintercepted, and that
+exposes the bare metal x2apic (if enabled) to the guest.
+Oops.
+
+Remove the erroneous '!apic_x2apic_mode()' check to fix that.
+
+This fixes CVE-2023-5090
+
+Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode")
+Cc: stable@vger.kernel.org
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230928173354.217464-2-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -829,8 +829,7 @@ void svm_set_x2apic_msr_interception(str
+       if (intercept == svm->x2avic_msrs_intercepted)
+               return;
+ 
+-      if (!x2avic_enabled ||
+-          !apic_x2apic_mode(svm->vcpu.arch.apic))
++      if (!x2avic_enabled)
+               return;
+ 
+       for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) {
diff --git a/queue-6.5/x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch b/queue-6.5/x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch

new file mode 100644 (file)

index 0000000..00850cf
--- /dev/null
+++ b/queue-6.5/x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch
@@ -0,0 +1,40 @@
+From 3fdc6087df3be73a212a81ce5dd6516638568806 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 28 Sep 2023 20:33:53 +0300
+Subject: x86: KVM: SVM: refresh AVIC inhibition in svm_leave_nested()
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 3fdc6087df3be73a212a81ce5dd6516638568806 upstream.
+
+svm_leave_nested() similar to a nested VM exit, get the vCPU out of nested
+mode and thus should end the local inhibition of AVIC on this vCPU.
+
+Failure to do so, can lead to hangs on guest reboot.
+
+Raise the KVM_REQ_APICV_UPDATE request to refresh the AVIC state of the
+current vCPU in this case.
+
+Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230928173354.217464-4-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1243,6 +1243,9 @@ void svm_leave_nested(struct kvm_vcpu *v
+ 
+               nested_svm_uninit_mmu_context(vcpu);
+               vmcb_mark_all_dirty(svm->vmcb);
++
++              if (kvm_apicv_activated(vcpu->kvm))
++                      kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+       }
+ 
+       kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
diff --git a/queue-6.5/x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch b/queue-6.5/x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch

new file mode 100644 (file)

index 0000000..69402f3
--- /dev/null
+++ b/queue-6.5/x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch
@@ -0,0 +1,95 @@
+From a2e312947cba31a667fc6f953bfbf891861efd30 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Mon, 16 Oct 2023 14:42:50 +0200
+Subject: x86/sev: Check for user-space IOIO pointing to kernel space
+
+From: Joerg Roedel <jroedel@suse.de>
+
+Upstream commit: 63e44bc52047f182601e7817da969a105aa1f721
+
+Check the memory operand of INS/OUTS before emulating the instruction.
+The #VC exception can get raised from user-space, but the memory operand
+can be manipulated to access kernel memory before the emulation actually
+begins and after the exception handler has run.
+
+  [ bp: Massage commit message. ]
+
+Fixes: 597cfe48212a ("x86/boot/compressed/64: Setup a GHCB-based VC Exception handler")
+Reported-by: Tom Dohrmann <erbse.13@gmx.de>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: <stable@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/sev.c |    5 +++++
+ arch/x86/kernel/sev-shared.c   |   31 +++++++++++++++++++++++++++++--
+ 2 files changed, 34 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/boot/compressed/sev.c
++++ b/arch/x86/boot/compressed/sev.c
+@@ -108,6 +108,11 @@ static enum es_result vc_ioio_check(stru
+       return ES_OK;
+ }
+ 
++static bool fault_in_kernel_space(unsigned long address)
++{
++      return false;
++}
++
+ #undef __init
+ #define __init
+ 
+--- a/arch/x86/kernel/sev-shared.c
++++ b/arch/x86/kernel/sev-shared.c
+@@ -632,6 +632,23 @@ fail:
+       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+ }
+ 
++static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt,
++                                         unsigned long address,
++                                         bool write)
++{
++      if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) {
++              ctxt->fi.vector     = X86_TRAP_PF;
++              ctxt->fi.error_code = X86_PF_USER;
++              ctxt->fi.cr2        = address;
++              if (write)
++                      ctxt->fi.error_code |= X86_PF_WRITE;
++
++              return ES_EXCEPTION;
++      }
++
++      return ES_OK;
++}
++
+ static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
+                                         void *src, char *buf,
+                                         unsigned int data_size,
+@@ -639,7 +656,12 @@ static enum es_result vc_insn_string_rea
+                                         bool backwards)
+ {
+       int i, b = backwards ? -1 : 1;
+-      enum es_result ret = ES_OK;
++      unsigned long address = (unsigned long)src;
++      enum es_result ret;
++
++      ret = vc_insn_string_check(ctxt, address, false);
++      if (ret != ES_OK)
++              return ret;
+ 
+       for (i = 0; i < count; i++) {
+               void *s = src + (i * data_size * b);
+@@ -660,7 +682,12 @@ static enum es_result vc_insn_string_wri
+                                          bool backwards)
+ {
+       int i, s = backwards ? -1 : 1;
+-      enum es_result ret = ES_OK;
++      unsigned long address = (unsigned long)dst;
++      enum es_result ret;
++
++      ret = vc_insn_string_check(ctxt, address, true);
++      if (ret != ES_OK)
++              return ret;
+ 
+       for (i = 0; i < count; i++) {
+               void *d = dst + (i * data_size * s);
diff --git a/queue-6.5/x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch b/queue-6.5/x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch

new file mode 100644 (file)

index 0000000..b74602e
--- /dev/null
+++ b/queue-6.5/x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch
@@ -0,0 +1,172 @@
+From 85465dd77a9e9ecfb18086120600e2361de570da Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Wed, 21 Jun 2023 17:42:42 +0200
+Subject: x86/sev: Check IOBM for IOIO exceptions from user-space
+
+From: Joerg Roedel <jroedel@suse.de>
+
+Upstream commit: b9cb9c45583b911e0db71d09caa6b56469eb2bdf
+
+Check the IO permission bitmap (if present) before emulating IOIO #VC
+exceptions for user-space. These permissions are checked by hardware
+already before the #VC is raised, but due to the VC-handler decoding
+race it needs to be checked again in software.
+
+Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
+Reported-by: Tom Dohrmann <erbse.13@gmx.de>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: Tom Dohrmann <erbse.13@gmx.de>
+Cc: <stable@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/sev.c |    5 +++++
+ arch/x86/kernel/sev-shared.c   |   22 +++++++++++++++-------
+ arch/x86/kernel/sev.c          |   27 +++++++++++++++++++++++++++
+ 3 files changed, 47 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/boot/compressed/sev.c
++++ b/arch/x86/boot/compressed/sev.c
+@@ -103,6 +103,11 @@ static enum es_result vc_read_mem(struct
+       return ES_OK;
+ }
+ 
++static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
++{
++      return ES_OK;
++}
++
+ #undef __init
+ #define __init
+ 
+--- a/arch/x86/kernel/sev-shared.c
++++ b/arch/x86/kernel/sev-shared.c
+@@ -696,6 +696,9 @@ static enum es_result vc_insn_string_wri
+ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
+ {
+       struct insn *insn = &ctxt->insn;
++      size_t size;
++      u64 port;
++
+       *exitinfo = 0;
+ 
+       switch (insn->opcode.bytes[0]) {
+@@ -704,7 +707,7 @@ static enum es_result vc_ioio_exitinfo(s
+       case 0x6d:
+               *exitinfo |= IOIO_TYPE_INS;
+               *exitinfo |= IOIO_SEG_ES;
+-              *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
++              port       = ctxt->regs->dx & 0xffff;
+               break;
+ 
+       /* OUTS opcodes */
+@@ -712,41 +715,43 @@ static enum es_result vc_ioio_exitinfo(s
+       case 0x6f:
+               *exitinfo |= IOIO_TYPE_OUTS;
+               *exitinfo |= IOIO_SEG_DS;
+-              *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
++              port       = ctxt->regs->dx & 0xffff;
+               break;
+ 
+       /* IN immediate opcodes */
+       case 0xe4:
+       case 0xe5:
+               *exitinfo |= IOIO_TYPE_IN;
+-              *exitinfo |= (u8)insn->immediate.value << 16;
++              port       = (u8)insn->immediate.value & 0xffff;
+               break;
+ 
+       /* OUT immediate opcodes */
+       case 0xe6:
+       case 0xe7:
+               *exitinfo |= IOIO_TYPE_OUT;
+-              *exitinfo |= (u8)insn->immediate.value << 16;
++              port       = (u8)insn->immediate.value & 0xffff;
+               break;
+ 
+       /* IN register opcodes */
+       case 0xec:
+       case 0xed:
+               *exitinfo |= IOIO_TYPE_IN;
+-              *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
++              port       = ctxt->regs->dx & 0xffff;
+               break;
+ 
+       /* OUT register opcodes */
+       case 0xee:
+       case 0xef:
+               *exitinfo |= IOIO_TYPE_OUT;
+-              *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
++              port       = ctxt->regs->dx & 0xffff;
+               break;
+ 
+       default:
+               return ES_DECODE_FAILED;
+       }
+ 
++      *exitinfo |= port << 16;
++
+       switch (insn->opcode.bytes[0]) {
+       case 0x6c:
+       case 0x6e:
+@@ -756,12 +761,15 @@ static enum es_result vc_ioio_exitinfo(s
+       case 0xee:
+               /* Single byte opcodes */
+               *exitinfo |= IOIO_DATA_8;
++              size       = 1;
+               break;
+       default:
+               /* Length determined by instruction parsing */
+               *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
+                                                    : IOIO_DATA_32;
++              size       = (insn->opnd_bytes == 2) ? 2 : 4;
+       }
++
+       switch (insn->addr_bytes) {
+       case 2:
+               *exitinfo |= IOIO_ADDR_16;
+@@ -777,7 +785,7 @@ static enum es_result vc_ioio_exitinfo(s
+       if (insn_has_rep_prefix(insn))
+               *exitinfo |= IOIO_REP;
+ 
+-      return ES_OK;
++      return vc_ioio_check(ctxt, (u16)port, size);
+ }
+ 
+ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -524,6 +524,33 @@ static enum es_result vc_slow_virt_to_ph
+       return ES_OK;
+ }
+ 
++static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
++{
++      BUG_ON(size > 4);
++
++      if (user_mode(ctxt->regs)) {
++              struct thread_struct *t = &current->thread;
++              struct io_bitmap *iobm = t->io_bitmap;
++              size_t idx;
++
++              if (!iobm)
++                      goto fault;
++
++              for (idx = port; idx < port + size; ++idx) {
++                      if (test_bit(idx, iobm->bitmap))
++                              goto fault;
++              }
++      }
++
++      return ES_OK;
++
++fault:
++      ctxt->fi.vector = X86_TRAP_GP;
++      ctxt->fi.error_code = 0;
++
++      return ES_EXCEPTION;
++}
++
+ /* Include code shared with pre-decompression boot stage */
+ #include "sev-shared.c"
+ 
diff --git a/queue-6.5/x86-sev-disable-mmio-emulation-from-user-mode.patch b/queue-6.5/x86-sev-disable-mmio-emulation-from-user-mode.patch

new file mode 100644 (file)

index 0000000..bd5dd7d
--- /dev/null
+++ b/queue-6.5/x86-sev-disable-mmio-emulation-from-user-mode.patch
@@ -0,0 +1,42 @@
+From ed57a67142cadfb15e8bc47c5a86456298d7d000 Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Thu, 5 Oct 2023 11:06:36 +0200
+Subject: x86/sev: Disable MMIO emulation from user mode
+
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+
+Upstream commit: a37cd2a59d0cb270b1bba568fd3a3b8668b9d3ba
+
+A virt scenario can be constructed where MMIO memory can be user memory.
+When that happens, a race condition opens between when the hardware
+raises the #VC and when the #VC handler gets to emulate the instruction.
+
+If the MOVS is replaced with a MOVS accessing kernel memory in that
+small race window, then write to kernel memory happens as the access
+checks are not done at emulation time.
+
+Disable MMIO emulation in user mode temporarily until a sensible use
+case appears and justifies properly handling the race window.
+
+Fixes: 0118b604c2c9 ("x86/sev-es: Handle MMIO String Instructions")
+Reported-by: Tom Dohrmann <erbse.13@gmx.de>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: Tom Dohrmann <erbse.13@gmx.de>
+Cc: <stable@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/sev.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -1508,6 +1508,9 @@ static enum es_result vc_handle_mmio(str
+                       return ES_DECODE_FAILED;
+       }
+ 
++      if (user_mode(ctxt->regs))
++              return ES_UNSUPPORTED;
++
+       switch (mmio) {
+       case INSN_MMIO_WRITE:
+               memcpy(ghcb->shared_buffer, reg_data, bytes);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 20 Oct 2023 17:47:03 +0000 (19:47 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 20 Oct 2023 17:47:03 +0000 (19:47 +0200)
queue-6.5/audit-io_uring-io_uring-openat-triggers-audit-reference-count-underflow.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/fs-ntfs3-fix-deadlock-in-mark_as_free_ex.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/fs-ntfs3-fix-oob-read-in-ntfs_init_from_boot.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/fs-ntfs3-fix-panic-about-slab-out-of-bounds-caused-by-ntfs_list_ea.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/fs-ntfs3-fix-possible-null-pointer-dereference-in-hdr_find_e.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/fs-ntfs3-fix-shift-out-of-bounds-in-ntfs_fill_super.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/kvm-x86-constrain-guest-supported-xfeatures-only-at-kvm_get_xsave-2.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/kvm-x86-pmu-truncate-counter-value-to-allowed-width-on-write.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/mm-slab-do-not-create-kmalloc-caches-smaller-than-arch_slab_minalign.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/mptcp-more-conservative-check-for-zero-probes.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/netfilter-nft_payload-fix-wrong-mac-header-matching.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/revert-net-wwan-iosm-enable-runtime-pm-support-for-7560.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/selftests-mptcp-join-no-rst-when-rm-subflow-addr.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/series		patch \| blob \| blame \| history
queue-6.5/tcp-check-mptcp-level-constraints-for-backlog-coalescing.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-fpu-allow-caller-to-constrain-xfeatures-when-copying-to-uabi-buffer.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-kvm-svm-add-support-for-invalid-ipi-vector-interception.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-kvm-svm-always-update-the-x2avic-msr-interception.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-kvm-svm-refresh-avic-inhibition-in-svm_leave_nested.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-sev-check-for-user-space-ioio-pointing-to-kernel-space.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-sev-check-iobm-for-ioio-exceptions-from-user-space.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-sev-disable-mmio-emulation-from-user-mode.patch	[new file with mode: 0644]	patch \| blob