Fixes for 5.10

author Sasha Levin <sashal@kernel.org>

Fri, 4 Nov 2022 14:54:41 +0000 (10:54 -0400)

committer Sasha Levin <sashal@kernel.org>

Fri, 4 Nov 2022 14:54:41 +0000 (10:54 -0400)
author Sasha Levin <sashal@kernel.org>
Fri, 4 Nov 2022 14:54:41 +0000 (10:54 -0400)
committer Sasha Levin <sashal@kernel.org>
Fri, 4 Nov 2022 14:54:41 +0000 (10:54 -0400)
diff --git a/queue-5.10/kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch b/queue-5.10/kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch

new file mode 100644 (file)

index 0000000..11ca87e
--- /dev/null
+++ b/queue-5.10/kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch
@@ -0,0 +1,54 @@
+From a9675746749792878cf5cd6521d6b5b459e936df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Aug 2022 15:37:21 +0200
+Subject: KVM: nVMX: Don't propagate vmcs12's PERF_GLOBAL_CTRL settings to
+ vmcs02
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit def9d705c05eab3fdedeb10ad67907513b12038e ]
+
+Don't propagate vmcs12's VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL to vmcs02.
+KVM doesn't disallow L1 from using VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL
+even when KVM itself doesn't use the control, e.g. due to the various
+CPU errata that where the MSR can be corrupted on VM-Exit.
+
+Preserve KVM's (vmcs01) setting to hopefully avoid having to toggle the
+bit in vmcs02 at a later point.  E.g. if KVM is loading PERF_GLOBAL_CTRL
+when running L1, then odds are good KVM will also load the MSR when
+running L2.
+
+Fixes: 8bf00a529967 ("KVM: VMX: add support for switching of PERF_GLOBAL_CTRL")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://lore.kernel.org/r/20220830133737.1539624-18-vkuznets@redhat.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 2395387945a8..498fed0dda98 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2345,9 +2345,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
+        * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
+        * on the related bits (if supported by the CPU) in the hope that
+        * we can avoid VMWrites during vmx_set_efer().
++       *
++       * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is
++       * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to
++       * do the same for L2.
+        */
+       exec_control = __vm_entry_controls_get(vmcs01);
+-      exec_control |= vmcs12->vm_entry_controls;
++      exec_control |= (vmcs12->vm_entry_controls &
++                       ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
+       exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
+       if (cpu_has_load_ia32_efer()) {
+               if (guest_efer & EFER_LMA)
+-- 
+2.35.1
+
diff --git a/queue-5.10/kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch b/queue-5.10/kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch

new file mode 100644 (file)

index 0000000..567639d
--- /dev/null
+++ b/queue-5.10/kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch
@@ -0,0 +1,151 @@
+From fa34c2a052c62e8f2245b49665951978fbd7dbc4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Aug 2021 10:19:50 -0700
+Subject: KVM: nVMX: Pull KVM L0's desired controls directly from vmcs01
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 389ab25216c9d09e0d335e764eeeb84c2089614f ]
+
+When preparing controls for vmcs02, grab KVM's desired controls from
+vmcs01's shadow state instead of recalculating the controls from scratch,
+or in the secondary execution controls, instead of using the dedicated
+cache.  Calculating secondary exec controls is eye-poppingly expensive
+due to the guest CPUID checks, hence the dedicated cache, but the other
+calculations aren't exactly free either.
+
+Explicitly clear several bits (x2APIC, DESC exiting, and load EFER on
+exit) as appropriate as they may be set in vmcs01, whereas the previous
+implementation relied on dynamic bits being cleared in the calculator.
+
+Intentionally propagate VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL from
+vmcs01 to vmcs02.  Whether or not PERF_GLOBAL_CTRL is loaded depends on
+whether or not perf itself is active, so unless perf stops between the
+exit from L1 and entry to L2, vmcs01 will hold the desired value.  This
+is purely an optimization as atomic_switch_perf_msrs() will set/clear
+the control as needed at VM-Enter, i.e. it avoids two extra VMWRITEs in
+the case where perf is active (versus starting with the bits clear in
+vmcs02, which was the previous behavior).
+
+Cc: Zeng Guang <guang.zeng@intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210810171952.2758100-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: def9d705c05e ("KVM: nVMX: Don't propagate vmcs12's PERF_GLOBAL_CTRL settings to vmcs02")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++---------
+ arch/x86/kvm/vmx/vmx.h    |  6 +++++-
+ 2 files changed, 21 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 7f15e2b2a0d6..2395387945a8 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2232,7 +2232,8 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
+       }
+ }
+ 
+-static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
++static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
++                               struct vmcs12 *vmcs12)
+ {
+       u32 exec_control, vmcs12_exec_ctrl;
+       u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
+@@ -2243,7 +2244,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+       /*
+        * PIN CONTROLS
+        */
+-      exec_control = vmx_pin_based_exec_ctrl(vmx);
++      exec_control = __pin_controls_get(vmcs01);
+       exec_control |= (vmcs12->pin_based_vm_exec_control &
+                        ~PIN_BASED_VMX_PREEMPTION_TIMER);
+ 
+@@ -2258,7 +2259,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+       /*
+        * EXEC CONTROLS
+        */
+-      exec_control = vmx_exec_control(vmx); /* L0's desires */
++      exec_control = __exec_controls_get(vmcs01); /* L0's desires */
+       exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
+       exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
+       exec_control &= ~CPU_BASED_TPR_SHADOW;
+@@ -2295,17 +2296,20 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+        * SECONDARY EXEC CONTROLS
+        */
+       if (cpu_has_secondary_exec_ctrls()) {
+-              exec_control = vmx->secondary_exec_control;
++              exec_control = __secondary_exec_controls_get(vmcs01);
+ 
+               /* Take the following fields only from vmcs12 */
+               exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
++                                SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+                                 SECONDARY_EXEC_ENABLE_INVPCID |
+                                 SECONDARY_EXEC_ENABLE_RDTSCP |
+                                 SECONDARY_EXEC_XSAVES |
+                                 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
+                                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+                                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
+-                                SECONDARY_EXEC_ENABLE_VMFUNC);
++                                SECONDARY_EXEC_ENABLE_VMFUNC |
++                                SECONDARY_EXEC_DESC);
++
+               if (nested_cpu_has(vmcs12,
+                                  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
+                       vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
+@@ -2342,8 +2346,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+        * on the related bits (if supported by the CPU) in the hope that
+        * we can avoid VMWrites during vmx_set_efer().
+        */
+-      exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
+-                      ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
++      exec_control = __vm_entry_controls_get(vmcs01);
++      exec_control |= vmcs12->vm_entry_controls;
++      exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
+       if (cpu_has_load_ia32_efer()) {
+               if (guest_efer & EFER_LMA)
+                       exec_control |= VM_ENTRY_IA32E_MODE;
+@@ -2359,9 +2364,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+        * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
+        * bits may be modified by vmx_set_efer() in prepare_vmcs02().
+        */
+-      exec_control = vmx_vmexit_ctrl();
++      exec_control = __vm_exit_controls_get(vmcs01);
+       if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
+               exec_control |= VM_EXIT_LOAD_IA32_EFER;
++      else
++              exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
+       vm_exit_controls_set(vmx, exec_control);
+ 
+       /*
+@@ -3370,7 +3377,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ 
+       vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
+ 
+-      prepare_vmcs02_early(vmx, vmcs12);
++      prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
+ 
+       if (from_vmentry) {
+               if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
+diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
+index 24903f05c204..ed4b6da83aa8 100644
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -386,9 +386,13 @@ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val)        \
+               vmx->loaded_vmcs->controls_shadow.lname = val;              \
+       }                                                                   \
+ }                                                                         \
++static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs)      \
++{                                                                         \
++      return vmcs->controls_shadow.lname;                                 \
++}                                                                         \
+ static inline u32 lname##_controls_get(struct vcpu_vmx *vmx)              \
+ {                                                                         \
+-      return vmx->loaded_vmcs->controls_shadow.lname;                     \
++      return __##lname##_controls_get(vmx->loaded_vmcs);                  \
+ }                                                                         \
+ static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val)   \
+ {                                                                         \
+-- 
+2.35.1
+
diff --git a/queue-5.10/kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch b/queue-5.10/kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch

new file mode 100644 (file)

index 0000000..56a6ed8
--- /dev/null
+++ b/queue-5.10/kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch
@@ -0,0 +1,99 @@
+From da02dca0fc1b37ce6af610f96a9329668d608320 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Oct 2022 20:45:41 +0200
+Subject: KVM: x86: Add compat handler for KVM_X86_SET_MSR_FILTER
+
+From: Alexander Graf <graf@amazon.com>
+
+[ Upstream commit 1739c7017fb1d759965dcbab925ff5980a5318cb ]
+
+The KVM_X86_SET_MSR_FILTER ioctls contains a pointer in the passed in
+struct which means it has a different struct size depending on whether
+it gets called from 32bit or 64bit code.
+
+This patch introduces compat code that converts from the 32bit struct to
+its 64bit counterpart which then gets used going forward internally.
+With this applied, 32bit QEMU can successfully set MSR bitmaps when
+running on 64bit kernels.
+
+Reported-by: Andrew Randrianasulu <randrianasulu@gmail.com>
+Fixes: 1a155254ff937 ("KVM: x86: Introduce MSR filtering")
+Signed-off-by: Alexander Graf <graf@amazon.com>
+Message-Id: <20221017184541.2658-4-graf@amazon.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 56 insertions(+)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index be4326b143e1..0ac80b3ff0f5 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5493,6 +5493,62 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
+       return 0;
+ }
+ 
++#ifdef CONFIG_KVM_COMPAT
++/* for KVM_X86_SET_MSR_FILTER */
++struct kvm_msr_filter_range_compat {
++      __u32 flags;
++      __u32 nmsrs;
++      __u32 base;
++      __u32 bitmap;
++};
++
++struct kvm_msr_filter_compat {
++      __u32 flags;
++      struct kvm_msr_filter_range_compat ranges[KVM_MSR_FILTER_MAX_RANGES];
++};
++
++#define KVM_X86_SET_MSR_FILTER_COMPAT _IOW(KVMIO, 0xc6, struct kvm_msr_filter_compat)
++
++long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
++                            unsigned long arg)
++{
++      void __user *argp = (void __user *)arg;
++      struct kvm *kvm = filp->private_data;
++      long r = -ENOTTY;
++
++      switch (ioctl) {
++      case KVM_X86_SET_MSR_FILTER_COMPAT: {
++              struct kvm_msr_filter __user *user_msr_filter = argp;
++              struct kvm_msr_filter_compat filter_compat;
++              struct kvm_msr_filter filter;
++              int i;
++
++              if (copy_from_user(&filter_compat, user_msr_filter,
++                                 sizeof(filter_compat)))
++                      return -EFAULT;
++
++              filter.flags = filter_compat.flags;
++              for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
++                      struct kvm_msr_filter_range_compat *cr;
++
++                      cr = &filter_compat.ranges[i];
++                      filter.ranges[i] = (struct kvm_msr_filter_range) {
++                              .flags = cr->flags,
++                              .nmsrs = cr->nmsrs,
++                              .base = cr->base,
++                              .bitmap = (__u8 *)(ulong)cr->bitmap,
++                      };
++              }
++
++              r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
++              break;
++      }
++      }
++
++      return r;
++}
++#endif
++
+ long kvm_arch_vm_ioctl(struct file *filp,
+                      unsigned int ioctl, unsigned long arg)
+ {
+-- 
+2.35.1
+
diff --git a/queue-5.10/kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch b/queue-5.10/kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch

new file mode 100644 (file)

index 0000000..a3854c7
--- /dev/null
+++ b/queue-5.10/kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch
@@ -0,0 +1,93 @@
+From 1854d4fb3789bf2624f95981ef443ad9a7981acd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Oct 2022 20:45:40 +0200
+Subject: KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter()
+
+From: Alexander Graf <graf@amazon.com>
+
+[ Upstream commit 2e3272bc1790825c43d2c39690bf2836b81c6d36 ]
+
+In the next patch we want to introduce a second caller to
+set_msr_filter() which constructs its own filter list on the stack.
+Refactor the original function so it takes it as argument instead of
+reading it through copy_from_user().
+
+Signed-off-by: Alexander Graf <graf@amazon.com>
+Message-Id: <20221017184541.2658-3-graf@amazon.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 31 +++++++++++++++++--------------
+ 1 file changed, 17 insertions(+), 14 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index ed8efd402d05..be4326b143e1 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5446,26 +5446,22 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
+       return r;
+ }
+ 
+-static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
++static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
++                                     struct kvm_msr_filter *filter)
+ {
+-      struct kvm_msr_filter __user *user_msr_filter = argp;
+       struct kvm_x86_msr_filter *new_filter, *old_filter;
+-      struct kvm_msr_filter filter;
+       bool default_allow;
+       bool empty = true;
+       int r = 0;
+       u32 i;
+ 
+-      if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
+-              return -EFAULT;
+-
+-      if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
++      if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+               return -EINVAL;
+ 
+-      for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
+-              empty &= !filter.ranges[i].nmsrs;
++      for (i = 0; i < ARRAY_SIZE(filter->ranges); i++)
++              empty &= !filter->ranges[i].nmsrs;
+ 
+-      default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
++      default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY);
+       if (empty && !default_allow)
+               return -EINVAL;
+ 
+@@ -5473,8 +5469,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+       if (!new_filter)
+               return -ENOMEM;
+ 
+-      for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
+-              r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
++      for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) {
++              r = kvm_add_msr_filter(new_filter, &filter->ranges[i]);
+               if (r) {
+                       kvm_free_msr_filter(new_filter);
+                       return r;
+@@ -5803,9 +5799,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
+       case KVM_SET_PMU_EVENT_FILTER:
+               r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
+               break;
+-      case KVM_X86_SET_MSR_FILTER:
+-              r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
++      case KVM_X86_SET_MSR_FILTER: {
++              struct kvm_msr_filter __user *user_msr_filter = argp;
++              struct kvm_msr_filter filter;
++
++              if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
++                      return -EFAULT;
++
++              r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
+               break;
++      }
+       default:
+               r = -ENOTTY;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch b/queue-5.10/kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch

new file mode 100644 (file)

index 0000000..8da3179
--- /dev/null
+++ b/queue-5.10/kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch
@@ -0,0 +1,52 @@
+From c54ed785f358b2ffd11033e61595168dc667f3da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 16:13:15 +0000
+Subject: KVM: x86: Protect the unused bits in MSR exiting flags
+
+From: Aaron Lewis <aaronlewis@google.com>
+
+[ Upstream commit cf5029d5dd7cb0aaa53250fa9e389abd231606b3 ]
+
+The flags for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER
+have no protection for their unused bits.  Without protection, future
+development for these features will be difficult.  Add the protection
+needed to make it possible to extend these features in the future.
+
+Signed-off-by: Aaron Lewis <aaronlewis@google.com>
+Message-Id: <20220714161314.1715227-1-aaronlewis@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: 2e3272bc1790 ("KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index e07607eed35c..ed8efd402d05 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5360,6 +5360,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+               r = 0;
+               break;
+       case KVM_CAP_X86_USER_SPACE_MSR:
++              r = -EINVAL;
++              if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
++                                   KVM_MSR_EXIT_REASON_UNKNOWN |
++                                   KVM_MSR_EXIT_REASON_FILTER))
++                      break;
+               kvm->arch.user_space_msr_mask = cap->args[0];
+               r = 0;
+               break;
+@@ -5454,6 +5459,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+       if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
+               return -EFAULT;
+ 
++      if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
++              return -EINVAL;
++
+       for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
+               empty &= !filter.ranges[i].nmsrs;
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.10/kvm-x86-trace-re-injected-exceptions.patch b/queue-5.10/kvm-x86-trace-re-injected-exceptions.patch

new file mode 100644 (file)

index 0000000..2789f82
--- /dev/null
+++ b/queue-5.10/kvm-x86-trace-re-injected-exceptions.patch
@@ -0,0 +1,111 @@
+From 185c8e06922043e0f7c3eda821599b06377e9a7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 00:07:31 +0200
+Subject: KVM: x86: Trace re-injected exceptions
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit a61d7c5432ac5a953bbcec17af031661c2bd201d ]
+
+Trace exceptions that are re-injected, not just those that KVM is
+injecting for the first time.  Debugging re-injection bugs is painful
+enough as is, not having visibility into what KVM is doing only makes
+things worse.
+
+Delay propagating pending=>injected in the non-reinjection path so that
+the tracing can properly identify reinjected exceptions.
+
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+Message-Id: <25470690a38b4d2b32b6204875dd35676c65c9f2.1651440202.git.maciej.szmigiero@oracle.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: 5623f751bd9c ("KVM: x86: Treat #DBs from the emulator as fault-like (code and DR7.GD=1)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/trace.h | 12 ++++++++----
+ arch/x86/kvm/x86.c   | 16 +++++++++-------
+ 2 files changed, 17 insertions(+), 11 deletions(-)
+
+diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
+index a2835d784f4b..3d4988ea8b57 100644
+--- a/arch/x86/kvm/trace.h
++++ b/arch/x86/kvm/trace.h
+@@ -304,25 +304,29 @@ TRACE_EVENT(kvm_inj_virq,
+  * Tracepoint for kvm interrupt injection:
+  */
+ TRACE_EVENT(kvm_inj_exception,
+-      TP_PROTO(unsigned exception, bool has_error, unsigned error_code),
+-      TP_ARGS(exception, has_error, error_code),
++      TP_PROTO(unsigned exception, bool has_error, unsigned error_code,
++               bool reinjected),
++      TP_ARGS(exception, has_error, error_code, reinjected),
+ 
+       TP_STRUCT__entry(
+               __field(        u8,     exception       )
+               __field(        u8,     has_error       )
+               __field(        u32,    error_code      )
++              __field(        bool,   reinjected      )
+       ),
+ 
+       TP_fast_assign(
+               __entry->exception      = exception;
+               __entry->has_error      = has_error;
+               __entry->error_code     = error_code;
++              __entry->reinjected     = reinjected;
+       ),
+ 
+-      TP_printk("%s (0x%x)",
++      TP_printk("%s (0x%x)%s",
+                 __print_symbolic(__entry->exception, kvm_trace_sym_exc),
+                 /* FIXME: don't print error_code if not present */
+-                __entry->has_error ? __entry->error_code : 0)
++                __entry->has_error ? __entry->error_code : 0,
++                __entry->reinjected ? " [reinjected]" : "")
+ );
+ 
+ /*
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index f3473418dcd5..17bb3d0e2d13 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8347,6 +8347,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+ 
+ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
+ {
++      trace_kvm_inj_exception(vcpu->arch.exception.nr,
++                              vcpu->arch.exception.has_error_code,
++                              vcpu->arch.exception.error_code,
++                              vcpu->arch.exception.injected);
++
+       if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
+               vcpu->arch.exception.error_code = false;
+       kvm_x86_ops.queue_exception(vcpu);
+@@ -8404,13 +8409,6 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
+ 
+       /* try to inject new event if pending */
+       if (vcpu->arch.exception.pending) {
+-              trace_kvm_inj_exception(vcpu->arch.exception.nr,
+-                                      vcpu->arch.exception.has_error_code,
+-                                      vcpu->arch.exception.error_code);
+-
+-              vcpu->arch.exception.pending = false;
+-              vcpu->arch.exception.injected = true;
+-
+               if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
+                       __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
+                                            X86_EFLAGS_RF);
+@@ -8424,6 +8422,10 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
+               }
+ 
+               kvm_inject_exception(vcpu);
++
++              vcpu->arch.exception.pending = false;
++              vcpu->arch.exception.injected = true;
++
+               can_inject = false;
+       }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.10/kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch b/queue-5.10/kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch

new file mode 100644 (file)

index 0000000..1becdd4
--- /dev/null
+++ b/queue-5.10/kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch
@@ -0,0 +1,101 @@
+From de9834177c5085fbb469373a2193c641f9e5b7f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Aug 2022 23:15:55 +0000
+Subject: KVM: x86: Treat #DBs from the emulator as fault-like (code and
+ DR7.GD=1)
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 5623f751bd9c438ed12840e086f33c4646440d19 ]
+
+Add a dedicated "exception type" for #DBs, as #DBs can be fault-like or
+trap-like depending the sub-type of #DB, and effectively defer the
+decision of what to do with the #DB to the caller.
+
+For the emulator's two calls to exception_type(), treat the #DB as
+fault-like, as the emulator handles only code breakpoint and general
+detect #DBs, both of which are fault-like.
+
+For event injection, which uses exception_type() to determine whether to
+set EFLAGS.RF=1 on the stack, keep the current behavior of not setting
+RF=1 for #DBs.  Intel and AMD explicitly state RF isn't set on code #DBs,
+so exempting by failing the "== EXCPT_FAULT" check is correct.  The only
+other fault-like #DB is General Detect, and despite Intel and AMD both
+strongly implying (through omission) that General Detect #DBs should set
+RF=1, hardware (multiple generations of both Intel and AMD), in fact does
+not.  Through insider knowledge, extreme foresight, sheer dumb luck, or
+some combination thereof, KVM correctly handled RF for General Detect #DBs.
+
+Fixes: 38827dbd3fb8 ("KVM: x86: Do not update EFLAGS on faulting emulation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220830231614.3580124-9-seanjc@google.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 27 +++++++++++++++++++++++++--
+ 1 file changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 17bb3d0e2d13..e07607eed35c 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -459,6 +459,7 @@ static int exception_class(int vector)
+ #define EXCPT_TRAP            1
+ #define EXCPT_ABORT           2
+ #define EXCPT_INTERRUPT               3
++#define EXCPT_DB              4
+ 
+ static int exception_type(int vector)
+ {
+@@ -469,8 +470,14 @@ static int exception_type(int vector)
+ 
+       mask = 1 << vector;
+ 
+-      /* #DB is trap, as instruction watchpoints are handled elsewhere */
+-      if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
++      /*
++       * #DBs can be trap-like or fault-like, the caller must check other CPU
++       * state, e.g. DR6, to determine whether a #DB is a trap or fault.
++       */
++      if (mask & (1 << DB_VECTOR))
++              return EXCPT_DB;
++
++      if (mask & ((1 << BP_VECTOR) | (1 << OF_VECTOR)))
+               return EXCPT_TRAP;
+ 
+       if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
+@@ -7560,6 +7567,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+               unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
+               toggle_interruptibility(vcpu, ctxt->interruptibility);
+               vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
++
++              /*
++               * Note, EXCPT_DB is assumed to be fault-like as the emulator
++               * only supports code breakpoints and general detect #DB, both
++               * of which are fault-like.
++               */
+               if (!ctxt->have_exception ||
+                   exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
+                       kvm_rip_write(vcpu, ctxt->eip);
+@@ -8409,6 +8422,16 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
+ 
+       /* try to inject new event if pending */
+       if (vcpu->arch.exception.pending) {
++              /*
++               * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS
++               * value pushed on the stack.  Trap-like exception and all #DBs
++               * leave RF as-is (KVM follows Intel's behavior in this regard;
++               * AMD states that code breakpoint #DBs excplitly clear RF=0).
++               *
++               * Note, most versions of Intel's SDM and AMD's APM incorrectly
++               * describe the behavior of General Detect #DBs, which are
++               * fault-like.  They do _not_ set RF, a la code breakpoints.
++               */
+               if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
+                       __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
+                                            X86_EFLAGS_RF);
+-- 
+2.35.1
+
diff --git a/queue-5.10/serial-8250-let-drivers-request-full-16550a-feature-.patch b/queue-5.10/serial-8250-let-drivers-request-full-16550a-feature-.patch

new file mode 100644 (file)

index 0000000..401fb0b
--- /dev/null
+++ b/queue-5.10/serial-8250-let-drivers-request-full-16550a-feature-.patch
@@ -0,0 +1,70 @@
+From 33e8378918fbb59ac2ecf5553d799925bb355b16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Sep 2022 00:35:32 +0100
+Subject: serial: 8250: Let drivers request full 16550A feature probing
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+[ Upstream commit 9906890c89e4dbd900ed87ad3040080339a7f411 ]
+
+A SERIAL_8250_16550A_VARIANTS configuration option has been recently
+defined that lets one request the 8250 driver not to probe for 16550A
+device features so as to reduce the driver's device startup time in
+virtual machines.
+
+Some actual hardware devices require these features to have been fully
+determined however for their driver to work correctly, so define a flag
+to let drivers request full 16550A feature probing on a device-by-device
+basis if required regardless of the SERIAL_8250_16550A_VARIANTS option
+setting chosen.
+
+Fixes: dc56ecb81a0a ("serial: 8250: Support disabling mdelay-filled probes of 16550A variants")
+Cc: stable@vger.kernel.org # v5.6+
+Reported-by: Anders Blomdell <anders.blomdell@control.lth.se>
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209202357520.41633@angie.orcam.me.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/8250/8250_port.c | 3 ++-
+ include/linux/serial_core.h         | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
+index 8b3756e4bb05..f648fd1d7548 100644
+--- a/drivers/tty/serial/8250/8250_port.c
++++ b/drivers/tty/serial/8250/8250_port.c
+@@ -1023,7 +1023,8 @@ static void autoconfig_16550a(struct uart_8250_port *up)
+       up->port.type = PORT_16550A;
+       up->capabilities |= UART_CAP_FIFO;
+ 
+-      if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS))
++      if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) &&
++          !(up->port.flags & UPF_FULL_PROBE))
+               return;
+ 
+       /*
+diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
+index 59a8caf3230a..6df4c3356ae6 100644
+--- a/include/linux/serial_core.h
++++ b/include/linux/serial_core.h
+@@ -100,7 +100,7 @@ struct uart_icount {
+       __u32   buf_overrun;
+ };
+ 
+-typedef unsigned int __bitwise upf_t;
++typedef u64 __bitwise upf_t;
+ typedef unsigned int __bitwise upstat_t;
+ 
+ struct uart_port {
+@@ -207,6 +207,7 @@ struct uart_port {
+ #define UPF_FIXED_PORT                ((__force upf_t) (1 << 29))
+ #define UPF_DEAD              ((__force upf_t) (1 << 30))
+ #define UPF_IOREMAP           ((__force upf_t) (1 << 31))
++#define UPF_FULL_PROBE                ((__force upf_t) (1ULL << 32))
+ 
+ #define __UPF_CHANGE_MASK     0x17fff
+ #define UPF_CHANGE_MASK               ((__force upf_t) __UPF_CHANGE_MASK)
+-- 
+2.35.1
+
diff --git a/queue-5.10/serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch b/queue-5.10/serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch

new file mode 100644 (file)

index 0000000..322347c
--- /dev/null
+++ b/queue-5.10/serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch
@@ -0,0 +1,48 @@
+From ed5a30b1b62a514b606e883884676bac9b544c25 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Sep 2022 11:12:15 +0200
+Subject: serial: ar933x: Deassert Transmit Enable on ->rs485_config()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit 3a939433ddc1bab98be028903aaa286e5e7461d7 ]
+
+The ar933x_uart driver neglects to deassert Transmit Enable when
+->rs485_config() is invoked.  Fix it.
+
+Fixes: 9be1064fe524 ("serial: ar933x_uart: add RS485 support")
+Cc: stable@vger.kernel.org # v5.7+
+Cc: Daniel Golle <daniel@makrotopia.org>
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Link: https://lore.kernel.org/r/5b36af26e57553f084334666e7d24c7fd131a01e.1662887231.git.lukas@wunner.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/ar933x_uart.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
+index be3f5d8f683b..23f2d9937cfc 100644
+--- a/drivers/tty/serial/ar933x_uart.c
++++ b/drivers/tty/serial/ar933x_uart.c
+@@ -585,6 +585,13 @@ static const struct uart_ops ar933x_uart_ops = {
+ static int ar933x_config_rs485(struct uart_port *port,
+                               struct serial_rs485 *rs485conf)
+ {
++      struct ar933x_uart_port *up =
++                      container_of(port, struct ar933x_uart_port, port);
++
++      if (port->rs485.flags & SER_RS485_ENABLED)
++              gpiod_set_value(up->rts_gpiod,
++                      !!(rs485conf->flags & SER_RS485_RTS_AFTER_SEND));
++
+       return 0;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.10/serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch b/queue-5.10/serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch

new file mode 100644 (file)

index 0000000..6fcf724
--- /dev/null
+++ b/queue-5.10/serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch
@@ -0,0 +1,50 @@
+From 5f883654fb7bc06350c4f9c8b07b0690bb946879 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Jul 2022 18:44:36 +0200
+Subject: serial: ar933x: Remove superfluous code in ar933x_config_rs485()
+
+From: Lino Sanfilippo <l.sanfilippo@kunbus.com>
+
+[ Upstream commit 184842622c97da2f88f365a981af05432baa5385 ]
+
+In ar933x_config_rs485() the check for the RTS GPIO is not needed since in
+case the GPIO is not available at driver init ar933x_no_rs485 is assigned
+to port->rs485_supported and this function is never called. So remove the
+check.
+
+Also in uart_set_rs485_config() the serial core already assigns the passed
+serial_rs485 struct to the uart port. So remove the assignment in the
+drivers rs485_config() function to avoid redundancy.
+
+Signed-off-by: Lino Sanfilippo <l.sanfilippo@kunbus.com>
+Link: https://lore.kernel.org/r/20220710164442.2958979-3-LinoSanfilippo@gmx.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 3a939433ddc1 ("serial: ar933x: Deassert Transmit Enable on ->rs485_config()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/ar933x_uart.c | 9 ---------
+ 1 file changed, 9 deletions(-)
+
+diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
+index c2be7cf91399..be3f5d8f683b 100644
+--- a/drivers/tty/serial/ar933x_uart.c
++++ b/drivers/tty/serial/ar933x_uart.c
+@@ -585,15 +585,6 @@ static const struct uart_ops ar933x_uart_ops = {
+ static int ar933x_config_rs485(struct uart_port *port,
+                               struct serial_rs485 *rs485conf)
+ {
+-      struct ar933x_uart_port *up =
+-              container_of(port, struct ar933x_uart_port, port);
+-
+-      if ((rs485conf->flags & SER_RS485_ENABLED) &&
+-          !up->rts_gpiod) {
+-              dev_err(port->dev, "RS485 needs rts-gpio\n");
+-              return 1;
+-      }
+-      port->rs485 = *rs485conf;
+       return 0;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.10/series b/queue-5.10/series

new file mode 100644 (file)

index 0000000..fc6657c
--- /dev/null
+++ b/queue-5.10/series
@@ -0,0 +1,13 @@
+serial-8250-let-drivers-request-full-16550a-feature-.patch
+serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch
+serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch
+kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch
+kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch
+kvm-x86-trace-re-injected-exceptions.patch
+kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch
+x86-topology-set-cpu_die_id-only-if-die_type-found.patch
+x86-topology-fix-multiple-packages-shown-on-a-single.patch
+x86-topology-fix-duplicated-core-id-within-a-package.patch
+kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch
+kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch
+kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch
diff --git a/queue-5.10/x86-topology-fix-duplicated-core-id-within-a-package.patch b/queue-5.10/x86-topology-fix-duplicated-core-id-within-a-package.patch

new file mode 100644 (file)

index 0000000..3dd9a95
--- /dev/null
+++ b/queue-5.10/x86-topology-fix-duplicated-core-id-within-a-package.patch
@@ -0,0 +1,54 @@
+From 1ae276b910f750a14faa6e9be1d5132886b325d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Oct 2022 17:01:47 +0800
+Subject: x86/topology: Fix duplicated core ID within a package
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+[ Upstream commit 71eac7063698b7d7b8fafb1683ac24a034541141 ]
+
+Today, core ID is assumed to be unique within each package.
+
+But an AlderLake-N platform adds a Module level between core and package,
+Linux excludes the unknown modules bits from the core ID, resulting in
+duplicate core ID's.
+
+To keep core ID unique within a package, Linux must include all APIC-ID
+bits for known or unknown levels above the core and below the package
+in the core ID.
+
+It is important to understand that core ID's have always come directly
+from the APIC-ID encoding, which comes from the BIOS. Thus there is no
+guarantee that they start at 0, or that they are contiguous.
+As such, naively using them for array indexes can be problematic.
+
+[ dhansen: un-known -> unknown ]
+
+Fixes: 7745f03eb395 ("x86/topology: Add CPUID.1F multi-die/package support")
+Suggested-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Len Brown <len.brown@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20221014090147.1836-5-rui.zhang@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/topology.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
+index 696309749d62..37d48ab3d077 100644
+--- a/arch/x86/kernel/cpu/topology.c
++++ b/arch/x86/kernel/cpu/topology.c
+@@ -141,7 +141,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+               sub_index++;
+       }
+ 
+-      core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
++      core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width;
+       die_select_mask = (~(-1 << die_plus_mask_width)) >>
+                               core_plus_mask_width;
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.10/x86-topology-fix-multiple-packages-shown-on-a-single.patch b/queue-5.10/x86-topology-fix-multiple-packages-shown-on-a-single.patch

new file mode 100644 (file)

index 0000000..65436d7
--- /dev/null
+++ b/queue-5.10/x86-topology-fix-multiple-packages-shown-on-a-single.patch
@@ -0,0 +1,92 @@
+From aaeb3fa9575615e7b38d511c254f474efdfbca2f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Oct 2022 17:01:46 +0800
+Subject: x86/topology: Fix multiple packages shown on a single-package system
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+[ Upstream commit 2b12a7a126d62bdbd81f4923c21bf6e9a7fbd069 ]
+
+CPUID.1F/B does not enumerate Package level explicitly, instead, all the
+APIC-ID bits above the enumerated levels are assumed to be package ID
+bits.
+
+Current code gets package ID by shifting out all the APIC-ID bits that
+Linux supports, rather than shifting out all the APIC-ID bits that
+CPUID.1F enumerates. This introduces problems when CPUID.1F enumerates a
+level that Linux does not support.
+
+For example, on a single package AlderLake-N, there are 2 Ecore Modules
+with 4 atom cores in each module.  Linux does not support the Module
+level and interprets the Module ID bits as package ID and erroneously
+reports a multi module system as a multi-package system.
+
+Fix this by using APIC-ID bits above all the CPUID.1F enumerated levels
+as package ID.
+
+[ dhansen: spelling fix ]
+
+Fixes: 7745f03eb395 ("x86/topology: Add CPUID.1F multi-die/package support")
+Suggested-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Len Brown <len.brown@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20221014090147.1836-4-rui.zhang@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/topology.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
+index 8678864ce712..696309749d62 100644
+--- a/arch/x86/kernel/cpu/topology.c
++++ b/arch/x86/kernel/cpu/topology.c
+@@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+       unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
+       unsigned int core_select_mask, core_level_siblings;
+       unsigned int die_select_mask, die_level_siblings;
++      unsigned int pkg_mask_width;
+       bool die_level_present = false;
+       int leaf;
+ 
+@@ -111,10 +112,10 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+       core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+       core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+       die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+-      die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
++      pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ 
+       sub_index = 1;
+-      do {
++      while (true) {
+               cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
+ 
+               /*
+@@ -132,8 +133,13 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+                       die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+               }
+ 
++              if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE)
++                      pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
++              else
++                      break;
++
+               sub_index++;
+-      } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
++      }
+ 
+       core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
+       die_select_mask = (~(-1 << die_plus_mask_width)) >>
+@@ -148,7 +154,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+       }
+ 
+       c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
+-                              die_plus_mask_width);
++                              pkg_mask_width);
+       /*
+        * Reinit the apicid, now that we have extended initial_apicid.
+        */
+-- 
+2.35.1
+
diff --git a/queue-5.10/x86-topology-set-cpu_die_id-only-if-die_type-found.patch b/queue-5.10/x86-topology-set-cpu_die_id-only-if-die_type-found.patch

new file mode 100644 (file)

index 0000000..ea2ad87
--- /dev/null
+++ b/queue-5.10/x86-topology-set-cpu_die_id-only-if-die_type-found.patch
@@ -0,0 +1,67 @@
+From 04a4eaf4cc55d9b09cc245ca6f60deba912675fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Nov 2020 21:06:59 +0000
+Subject: x86/topology: Set cpu_die_id only if DIE_TYPE found
+
+From: Yazen Ghannam <yazen.ghannam@amd.com>
+
+[ Upstream commit cb09a379724d299c603a7a79f444f52a9a75b8d2 ]
+
+CPUID Leaf 0x1F defines a DIE_TYPE level (nb: ECX[8:15] level type == 0x5),
+but CPUID Leaf 0xB does not. However, detect_extended_topology() will
+set struct cpuinfo_x86.cpu_die_id regardless of whether a valid Die ID
+was found.
+
+Only set cpu_die_id if a DIE_TYPE level is found. CPU topology code may
+use another value for cpu_die_id, e.g. the AMD NodeId on AMD-based
+systems. Code ordering should be maintained so that the CPUID Leaf 0x1F
+Die ID value will take precedence on systems that may use another value.
+
+Suggested-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20201109210659.754018-5-Yazen.Ghannam@amd.com
+Stable-dep-of: 2b12a7a126d6 ("x86/topology: Fix multiple packages shown on a single-package system")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/topology.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
+index 91288da29599..8678864ce712 100644
+--- a/arch/x86/kernel/cpu/topology.c
++++ b/arch/x86/kernel/cpu/topology.c
+@@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+       unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
+       unsigned int core_select_mask, core_level_siblings;
+       unsigned int die_select_mask, die_level_siblings;
++      bool die_level_present = false;
+       int leaf;
+ 
+       leaf = detect_extended_topology_leaf(c);
+@@ -126,6 +127,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+                       die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+               }
+               if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) {
++                      die_level_present = true;
+                       die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+                       die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+               }
+@@ -139,8 +141,12 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+ 
+       c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid,
+                               ht_mask_width) & core_select_mask;
+-      c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid,
+-                              core_plus_mask_width) & die_select_mask;
++
++      if (die_level_present) {
++              c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid,
++                                      core_plus_mask_width) & die_select_mask;
++      }
++
+       c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
+                               die_plus_mask_width);
+       /*
+-- 
+2.35.1
+
author	Sasha Levin <sashal@kernel.org>
	Fri, 4 Nov 2022 14:54:41 +0000 (10:54 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Fri, 4 Nov 2022 14:54:41 +0000 (10:54 -0400)
queue-5.10/kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kvm-x86-trace-re-injected-exceptions.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/serial-8250-let-drivers-request-full-16550a-feature-.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series	[new file with mode: 0644]	patch \| blob
queue-5.10/x86-topology-fix-duplicated-core-id-within-a-package.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/x86-topology-fix-multiple-packages-shown-on-a-single.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/x86-topology-set-cpu_die_id-only-if-die_type-found.patch	[new file with mode: 0644]	patch \| blob