From: Sasha Levin Date: Fri, 4 Nov 2022 14:54:41 +0000 (-0400) Subject: Fixes for 5.10 X-Git-Tag: v4.9.333~92 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3251bb1ea16b70ae00e9f4719e690c485a41c41a;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch b/queue-5.10/kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch new file mode 100644 index 00000000000..11ca87ee034 --- /dev/null +++ b/queue-5.10/kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch @@ -0,0 +1,54 @@ +From a9675746749792878cf5cd6521d6b5b459e936df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Aug 2022 15:37:21 +0200 +Subject: KVM: nVMX: Don't propagate vmcs12's PERF_GLOBAL_CTRL settings to + vmcs02 + +From: Sean Christopherson + +[ Upstream commit def9d705c05eab3fdedeb10ad67907513b12038e ] + +Don't propagate vmcs12's VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL to vmcs02. +KVM doesn't disallow L1 from using VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL +even when KVM itself doesn't use the control, e.g. due to the various +CPU errata that where the MSR can be corrupted on VM-Exit. + +Preserve KVM's (vmcs01) setting to hopefully avoid having to toggle the +bit in vmcs02 at a later point. E.g. if KVM is loading PERF_GLOBAL_CTRL +when running L1, then odds are good KVM will also load the MSR when +running L2. + +Fixes: 8bf00a529967 ("KVM: VMX: add support for switching of PERF_GLOBAL_CTRL") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Vitaly Kuznetsov +Link: https://lore.kernel.org/r/20220830133737.1539624-18-vkuznets@redhat.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/vmx/nested.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index 2395387945a8..498fed0dda98 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -2345,9 +2345,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 + * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate + * on the related bits (if supported by the CPU) in the hope that + * we can avoid VMWrites during vmx_set_efer(). ++ * ++ * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is ++ * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to ++ * do the same for L2. + */ + exec_control = __vm_entry_controls_get(vmcs01); +- exec_control |= vmcs12->vm_entry_controls; ++ exec_control |= (vmcs12->vm_entry_controls & ++ ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); + exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER); + if (cpu_has_load_ia32_efer()) { + if (guest_efer & EFER_LMA) +-- +2.35.1 + diff --git a/queue-5.10/kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch b/queue-5.10/kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch new file mode 100644 index 00000000000..567639df1a4 --- /dev/null +++ b/queue-5.10/kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch @@ -0,0 +1,151 @@ +From fa34c2a052c62e8f2245b49665951978fbd7dbc4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Aug 2021 10:19:50 -0700 +Subject: KVM: nVMX: Pull KVM L0's desired controls directly from vmcs01 + +From: Sean Christopherson + +[ Upstream commit 389ab25216c9d09e0d335e764eeeb84c2089614f ] + +When preparing controls for vmcs02, grab KVM's desired controls from +vmcs01's shadow state instead of recalculating the controls from scratch, +or in the secondary execution controls, instead of using the dedicated +cache. Calculating secondary exec controls is eye-poppingly expensive +due to the guest CPUID checks, hence the dedicated cache, but the other +calculations aren't exactly free either. + +Explicitly clear several bits (x2APIC, DESC exiting, and load EFER on +exit) as appropriate as they may be set in vmcs01, whereas the previous +implementation relied on dynamic bits being cleared in the calculator. + +Intentionally propagate VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL from +vmcs01 to vmcs02. Whether or not PERF_GLOBAL_CTRL is loaded depends on +whether or not perf itself is active, so unless perf stops between the +exit from L1 and entry to L2, vmcs01 will hold the desired value. This +is purely an optimization as atomic_switch_perf_msrs() will set/clear +the control as needed at VM-Enter, i.e. it avoids two extra VMWRITEs in +the case where perf is active (versus starting with the bits clear in +vmcs02, which was the previous behavior). + +Cc: Zeng Guang +Signed-off-by: Sean Christopherson +Message-Id: <20210810171952.2758100-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Stable-dep-of: def9d705c05e ("KVM: nVMX: Don't propagate vmcs12's PERF_GLOBAL_CTRL settings to vmcs02") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++--------- + arch/x86/kvm/vmx/vmx.h | 6 +++++- + 2 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index 7f15e2b2a0d6..2395387945a8 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -2232,7 +2232,8 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, + } + } + +-static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) ++static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01, ++ struct vmcs12 *vmcs12) + { + u32 exec_control, vmcs12_exec_ctrl; + u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); +@@ -2243,7 +2244,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) + /* + * PIN CONTROLS + */ +- exec_control = vmx_pin_based_exec_ctrl(vmx); ++ exec_control = __pin_controls_get(vmcs01); + exec_control |= (vmcs12->pin_based_vm_exec_control & + ~PIN_BASED_VMX_PREEMPTION_TIMER); + +@@ -2258,7 +2259,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) + /* + * EXEC CONTROLS + */ +- exec_control = vmx_exec_control(vmx); /* L0's desires */ ++ exec_control = __exec_controls_get(vmcs01); /* L0's desires */ + exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING; + exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING; + exec_control &= ~CPU_BASED_TPR_SHADOW; +@@ -2295,17 +2296,20 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) + * SECONDARY EXEC CONTROLS + */ + if (cpu_has_secondary_exec_ctrls()) { +- exec_control = vmx->secondary_exec_control; ++ exec_control = __secondary_exec_controls_get(vmcs01); + + /* Take the following fields only from vmcs12 */ + exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + SECONDARY_EXEC_ENABLE_INVPCID | + SECONDARY_EXEC_ENABLE_RDTSCP | + SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + SECONDARY_EXEC_APIC_REGISTER_VIRT | +- SECONDARY_EXEC_ENABLE_VMFUNC); ++ SECONDARY_EXEC_ENABLE_VMFUNC | ++ SECONDARY_EXEC_DESC); ++ + if (nested_cpu_has(vmcs12, + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { + vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & +@@ -2342,8 +2346,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) + * on the related bits (if supported by the CPU) in the hope that + * we can avoid VMWrites during vmx_set_efer(). + */ +- exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) & +- ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER; ++ exec_control = __vm_entry_controls_get(vmcs01); ++ exec_control |= vmcs12->vm_entry_controls; ++ exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER); + if (cpu_has_load_ia32_efer()) { + if (guest_efer & EFER_LMA) + exec_control |= VM_ENTRY_IA32E_MODE; +@@ -2359,9 +2364,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) + * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER + * bits may be modified by vmx_set_efer() in prepare_vmcs02(). + */ +- exec_control = vmx_vmexit_ctrl(); ++ exec_control = __vm_exit_controls_get(vmcs01); + if (cpu_has_load_ia32_efer() && guest_efer != host_efer) + exec_control |= VM_EXIT_LOAD_IA32_EFER; ++ else ++ exec_control &= ~VM_EXIT_LOAD_IA32_EFER; + vm_exit_controls_set(vmx, exec_control); + + /* +@@ -3370,7 +3377,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + + vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); + +- prepare_vmcs02_early(vmx, vmcs12); ++ prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12); + + if (from_vmentry) { + if (unlikely(!nested_get_vmcs12_pages(vcpu))) { +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index 24903f05c204..ed4b6da83aa8 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -386,9 +386,13 @@ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \ + vmx->loaded_vmcs->controls_shadow.lname = val; \ + } \ + } \ ++static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs) \ ++{ \ ++ return vmcs->controls_shadow.lname; \ ++} \ + static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \ + { \ +- return vmx->loaded_vmcs->controls_shadow.lname; \ ++ return __##lname##_controls_get(vmx->loaded_vmcs); \ + } \ + static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \ + { \ +-- +2.35.1 + diff --git a/queue-5.10/kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch b/queue-5.10/kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch new file mode 100644 index 00000000000..56a6ed849e3 --- /dev/null +++ b/queue-5.10/kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch @@ -0,0 +1,99 @@ +From da02dca0fc1b37ce6af610f96a9329668d608320 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Oct 2022 20:45:41 +0200 +Subject: KVM: x86: Add compat handler for KVM_X86_SET_MSR_FILTER + +From: Alexander Graf + +[ Upstream commit 1739c7017fb1d759965dcbab925ff5980a5318cb ] + +The KVM_X86_SET_MSR_FILTER ioctls contains a pointer in the passed in +struct which means it has a different struct size depending on whether +it gets called from 32bit or 64bit code. + +This patch introduces compat code that converts from the 32bit struct to +its 64bit counterpart which then gets used going forward internally. +With this applied, 32bit QEMU can successfully set MSR bitmaps when +running on 64bit kernels. + +Reported-by: Andrew Randrianasulu +Fixes: 1a155254ff937 ("KVM: x86: Introduce MSR filtering") +Signed-off-by: Alexander Graf +Message-Id: <20221017184541.2658-4-graf@amazon.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 56 insertions(+) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index be4326b143e1..0ac80b3ff0f5 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5493,6 +5493,62 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, + return 0; + } + ++#ifdef CONFIG_KVM_COMPAT ++/* for KVM_X86_SET_MSR_FILTER */ ++struct kvm_msr_filter_range_compat { ++ __u32 flags; ++ __u32 nmsrs; ++ __u32 base; ++ __u32 bitmap; ++}; ++ ++struct kvm_msr_filter_compat { ++ __u32 flags; ++ struct kvm_msr_filter_range_compat ranges[KVM_MSR_FILTER_MAX_RANGES]; ++}; ++ ++#define KVM_X86_SET_MSR_FILTER_COMPAT _IOW(KVMIO, 0xc6, struct kvm_msr_filter_compat) ++ ++long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, ++ unsigned long arg) ++{ ++ void __user *argp = (void __user *)arg; ++ struct kvm *kvm = filp->private_data; ++ long r = -ENOTTY; ++ ++ switch (ioctl) { ++ case KVM_X86_SET_MSR_FILTER_COMPAT: { ++ struct kvm_msr_filter __user *user_msr_filter = argp; ++ struct kvm_msr_filter_compat filter_compat; ++ struct kvm_msr_filter filter; ++ int i; ++ ++ if (copy_from_user(&filter_compat, user_msr_filter, ++ sizeof(filter_compat))) ++ return -EFAULT; ++ ++ filter.flags = filter_compat.flags; ++ for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { ++ struct kvm_msr_filter_range_compat *cr; ++ ++ cr = &filter_compat.ranges[i]; ++ filter.ranges[i] = (struct kvm_msr_filter_range) { ++ .flags = cr->flags, ++ .nmsrs = cr->nmsrs, ++ .base = cr->base, ++ .bitmap = (__u8 *)(ulong)cr->bitmap, ++ }; ++ } ++ ++ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter); ++ break; ++ } ++ } ++ ++ return r; ++} ++#endif ++ + long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) + { +-- +2.35.1 + diff --git a/queue-5.10/kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch b/queue-5.10/kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch new file mode 100644 index 00000000000..a3854c77333 --- /dev/null +++ b/queue-5.10/kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch @@ -0,0 +1,93 @@ +From 1854d4fb3789bf2624f95981ef443ad9a7981acd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Oct 2022 20:45:40 +0200 +Subject: KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter() + +From: Alexander Graf + +[ Upstream commit 2e3272bc1790825c43d2c39690bf2836b81c6d36 ] + +In the next patch we want to introduce a second caller to +set_msr_filter() which constructs its own filter list on the stack. +Refactor the original function so it takes it as argument instead of +reading it through copy_from_user(). + +Signed-off-by: Alexander Graf +Message-Id: <20221017184541.2658-3-graf@amazon.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/x86.c | 31 +++++++++++++++++-------------- + 1 file changed, 17 insertions(+), 14 deletions(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index ed8efd402d05..be4326b143e1 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5446,26 +5446,22 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, + return r; + } + +-static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) ++static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, ++ struct kvm_msr_filter *filter) + { +- struct kvm_msr_filter __user *user_msr_filter = argp; + struct kvm_x86_msr_filter *new_filter, *old_filter; +- struct kvm_msr_filter filter; + bool default_allow; + bool empty = true; + int r = 0; + u32 i; + +- if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) +- return -EFAULT; +- +- if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY) ++ if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY) + return -EINVAL; + +- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) +- empty &= !filter.ranges[i].nmsrs; ++ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) ++ empty &= !filter->ranges[i].nmsrs; + +- default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY); ++ default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY); + if (empty && !default_allow) + return -EINVAL; + +@@ -5473,8 +5469,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) + if (!new_filter) + return -ENOMEM; + +- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { +- r = kvm_add_msr_filter(new_filter, &filter.ranges[i]); ++ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) { ++ r = kvm_add_msr_filter(new_filter, &filter->ranges[i]); + if (r) { + kvm_free_msr_filter(new_filter); + return r; +@@ -5803,9 +5799,16 @@ long kvm_arch_vm_ioctl(struct file *filp, + case KVM_SET_PMU_EVENT_FILTER: + r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); + break; +- case KVM_X86_SET_MSR_FILTER: +- r = kvm_vm_ioctl_set_msr_filter(kvm, argp); ++ case KVM_X86_SET_MSR_FILTER: { ++ struct kvm_msr_filter __user *user_msr_filter = argp; ++ struct kvm_msr_filter filter; ++ ++ if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) ++ return -EFAULT; ++ ++ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter); + break; ++ } + default: + r = -ENOTTY; + } +-- +2.35.1 + diff --git a/queue-5.10/kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch b/queue-5.10/kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch new file mode 100644 index 00000000000..8da3179715f --- /dev/null +++ b/queue-5.10/kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch @@ -0,0 +1,52 @@ +From c54ed785f358b2ffd11033e61595168dc667f3da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Jul 2022 16:13:15 +0000 +Subject: KVM: x86: Protect the unused bits in MSR exiting flags + +From: Aaron Lewis + +[ Upstream commit cf5029d5dd7cb0aaa53250fa9e389abd231606b3 ] + +The flags for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER +have no protection for their unused bits. Without protection, future +development for these features will be difficult. Add the protection +needed to make it possible to extend these features in the future. + +Signed-off-by: Aaron Lewis +Message-Id: <20220714161314.1715227-1-aaronlewis@google.com> +Signed-off-by: Paolo Bonzini +Stable-dep-of: 2e3272bc1790 ("KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter()") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/x86.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index e07607eed35c..ed8efd402d05 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5360,6 +5360,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + r = 0; + break; + case KVM_CAP_X86_USER_SPACE_MSR: ++ r = -EINVAL; ++ if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL | ++ KVM_MSR_EXIT_REASON_UNKNOWN | ++ KVM_MSR_EXIT_REASON_FILTER)) ++ break; + kvm->arch.user_space_msr_mask = cap->args[0]; + r = 0; + break; +@@ -5454,6 +5459,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) + if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) + return -EFAULT; + ++ if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY) ++ return -EINVAL; ++ + for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) + empty &= !filter.ranges[i].nmsrs; + +-- +2.35.1 + diff --git a/queue-5.10/kvm-x86-trace-re-injected-exceptions.patch b/queue-5.10/kvm-x86-trace-re-injected-exceptions.patch new file mode 100644 index 00000000000..2789f82c950 --- /dev/null +++ b/queue-5.10/kvm-x86-trace-re-injected-exceptions.patch @@ -0,0 +1,111 @@ +From 185c8e06922043e0f7c3eda821599b06377e9a7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 00:07:31 +0200 +Subject: KVM: x86: Trace re-injected exceptions + +From: Sean Christopherson + +[ Upstream commit a61d7c5432ac5a953bbcec17af031661c2bd201d ] + +Trace exceptions that are re-injected, not just those that KVM is +injecting for the first time. Debugging re-injection bugs is painful +enough as is, not having visibility into what KVM is doing only makes +things worse. + +Delay propagating pending=>injected in the non-reinjection path so that +the tracing can properly identify reinjected exceptions. + +Signed-off-by: Sean Christopherson +Reviewed-by: Maxim Levitsky +Signed-off-by: Maciej S. Szmigiero +Message-Id: <25470690a38b4d2b32b6204875dd35676c65c9f2.1651440202.git.maciej.szmigiero@oracle.com> +Signed-off-by: Paolo Bonzini +Stable-dep-of: 5623f751bd9c ("KVM: x86: Treat #DBs from the emulator as fault-like (code and DR7.GD=1)") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/trace.h | 12 ++++++++---- + arch/x86/kvm/x86.c | 16 +++++++++------- + 2 files changed, 17 insertions(+), 11 deletions(-) + +diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h +index a2835d784f4b..3d4988ea8b57 100644 +--- a/arch/x86/kvm/trace.h ++++ b/arch/x86/kvm/trace.h +@@ -304,25 +304,29 @@ TRACE_EVENT(kvm_inj_virq, + * Tracepoint for kvm interrupt injection: + */ + TRACE_EVENT(kvm_inj_exception, +- TP_PROTO(unsigned exception, bool has_error, unsigned error_code), +- TP_ARGS(exception, has_error, error_code), ++ TP_PROTO(unsigned exception, bool has_error, unsigned error_code, ++ bool reinjected), ++ TP_ARGS(exception, has_error, error_code, reinjected), + + TP_STRUCT__entry( + __field( u8, exception ) + __field( u8, has_error ) + __field( u32, error_code ) ++ __field( bool, reinjected ) + ), + + TP_fast_assign( + __entry->exception = exception; + __entry->has_error = has_error; + __entry->error_code = error_code; ++ __entry->reinjected = reinjected; + ), + +- TP_printk("%s (0x%x)", ++ TP_printk("%s (0x%x)%s", + __print_symbolic(__entry->exception, kvm_trace_sym_exc), + /* FIXME: don't print error_code if not present */ +- __entry->has_error ? __entry->error_code : 0) ++ __entry->has_error ? __entry->error_code : 0, ++ __entry->reinjected ? " [reinjected]" : "") + ); + + /* +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index f3473418dcd5..17bb3d0e2d13 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -8347,6 +8347,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) + + static void kvm_inject_exception(struct kvm_vcpu *vcpu) + { ++ trace_kvm_inj_exception(vcpu->arch.exception.nr, ++ vcpu->arch.exception.has_error_code, ++ vcpu->arch.exception.error_code, ++ vcpu->arch.exception.injected); ++ + if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) + vcpu->arch.exception.error_code = false; + kvm_x86_ops.queue_exception(vcpu); +@@ -8404,13 +8409,6 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit + + /* try to inject new event if pending */ + if (vcpu->arch.exception.pending) { +- trace_kvm_inj_exception(vcpu->arch.exception.nr, +- vcpu->arch.exception.has_error_code, +- vcpu->arch.exception.error_code); +- +- vcpu->arch.exception.pending = false; +- vcpu->arch.exception.injected = true; +- + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) + __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | + X86_EFLAGS_RF); +@@ -8424,6 +8422,10 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit + } + + kvm_inject_exception(vcpu); ++ ++ vcpu->arch.exception.pending = false; ++ vcpu->arch.exception.injected = true; ++ + can_inject = false; + } + +-- +2.35.1 + diff --git a/queue-5.10/kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch b/queue-5.10/kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch new file mode 100644 index 00000000000..1becdd4d66a --- /dev/null +++ b/queue-5.10/kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch @@ -0,0 +1,101 @@ +From de9834177c5085fbb469373a2193c641f9e5b7f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Aug 2022 23:15:55 +0000 +Subject: KVM: x86: Treat #DBs from the emulator as fault-like (code and + DR7.GD=1) + +From: Sean Christopherson + +[ Upstream commit 5623f751bd9c438ed12840e086f33c4646440d19 ] + +Add a dedicated "exception type" for #DBs, as #DBs can be fault-like or +trap-like depending the sub-type of #DB, and effectively defer the +decision of what to do with the #DB to the caller. + +For the emulator's two calls to exception_type(), treat the #DB as +fault-like, as the emulator handles only code breakpoint and general +detect #DBs, both of which are fault-like. + +For event injection, which uses exception_type() to determine whether to +set EFLAGS.RF=1 on the stack, keep the current behavior of not setting +RF=1 for #DBs. Intel and AMD explicitly state RF isn't set on code #DBs, +so exempting by failing the "== EXCPT_FAULT" check is correct. The only +other fault-like #DB is General Detect, and despite Intel and AMD both +strongly implying (through omission) that General Detect #DBs should set +RF=1, hardware (multiple generations of both Intel and AMD), in fact does +not. Through insider knowledge, extreme foresight, sheer dumb luck, or +some combination thereof, KVM correctly handled RF for General Detect #DBs. + +Fixes: 38827dbd3fb8 ("KVM: x86: Do not update EFLAGS on faulting emulation") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Reviewed-by: Maxim Levitsky +Link: https://lore.kernel.org/r/20220830231614.3580124-9-seanjc@google.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/x86.c | 27 +++++++++++++++++++++++++-- + 1 file changed, 25 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 17bb3d0e2d13..e07607eed35c 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -459,6 +459,7 @@ static int exception_class(int vector) + #define EXCPT_TRAP 1 + #define EXCPT_ABORT 2 + #define EXCPT_INTERRUPT 3 ++#define EXCPT_DB 4 + + static int exception_type(int vector) + { +@@ -469,8 +470,14 @@ static int exception_type(int vector) + + mask = 1 << vector; + +- /* #DB is trap, as instruction watchpoints are handled elsewhere */ +- if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR))) ++ /* ++ * #DBs can be trap-like or fault-like, the caller must check other CPU ++ * state, e.g. DR6, to determine whether a #DB is a trap or fault. ++ */ ++ if (mask & (1 << DB_VECTOR)) ++ return EXCPT_DB; ++ ++ if (mask & ((1 << BP_VECTOR) | (1 << OF_VECTOR))) + return EXCPT_TRAP; + + if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR))) +@@ -7560,6 +7567,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + unsigned long rflags = kvm_x86_ops.get_rflags(vcpu); + toggle_interruptibility(vcpu, ctxt->interruptibility); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; ++ ++ /* ++ * Note, EXCPT_DB is assumed to be fault-like as the emulator ++ * only supports code breakpoints and general detect #DB, both ++ * of which are fault-like. ++ */ + if (!ctxt->have_exception || + exception_type(ctxt->exception.vector) == EXCPT_TRAP) { + kvm_rip_write(vcpu, ctxt->eip); +@@ -8409,6 +8422,16 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit + + /* try to inject new event if pending */ + if (vcpu->arch.exception.pending) { ++ /* ++ * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS ++ * value pushed on the stack. Trap-like exception and all #DBs ++ * leave RF as-is (KVM follows Intel's behavior in this regard; ++ * AMD states that code breakpoint #DBs excplitly clear RF=0). ++ * ++ * Note, most versions of Intel's SDM and AMD's APM incorrectly ++ * describe the behavior of General Detect #DBs, which are ++ * fault-like. They do _not_ set RF, a la code breakpoints. ++ */ + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) + __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | + X86_EFLAGS_RF); +-- +2.35.1 + diff --git a/queue-5.10/serial-8250-let-drivers-request-full-16550a-feature-.patch b/queue-5.10/serial-8250-let-drivers-request-full-16550a-feature-.patch new file mode 100644 index 00000000000..401fb0bbd2d --- /dev/null +++ b/queue-5.10/serial-8250-let-drivers-request-full-16550a-feature-.patch @@ -0,0 +1,70 @@ +From 33e8378918fbb59ac2ecf5553d799925bb355b16 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Sep 2022 00:35:32 +0100 +Subject: serial: 8250: Let drivers request full 16550A feature probing + +From: Maciej W. Rozycki + +[ Upstream commit 9906890c89e4dbd900ed87ad3040080339a7f411 ] + +A SERIAL_8250_16550A_VARIANTS configuration option has been recently +defined that lets one request the 8250 driver not to probe for 16550A +device features so as to reduce the driver's device startup time in +virtual machines. + +Some actual hardware devices require these features to have been fully +determined however for their driver to work correctly, so define a flag +to let drivers request full 16550A feature probing on a device-by-device +basis if required regardless of the SERIAL_8250_16550A_VARIANTS option +setting chosen. + +Fixes: dc56ecb81a0a ("serial: 8250: Support disabling mdelay-filled probes of 16550A variants") +Cc: stable@vger.kernel.org # v5.6+ +Reported-by: Anders Blomdell +Signed-off-by: Maciej W. Rozycki +Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209202357520.41633@angie.orcam.me.uk +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/8250/8250_port.c | 3 ++- + include/linux/serial_core.h | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c +index 8b3756e4bb05..f648fd1d7548 100644 +--- a/drivers/tty/serial/8250/8250_port.c ++++ b/drivers/tty/serial/8250/8250_port.c +@@ -1023,7 +1023,8 @@ static void autoconfig_16550a(struct uart_8250_port *up) + up->port.type = PORT_16550A; + up->capabilities |= UART_CAP_FIFO; + +- if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS)) ++ if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) && ++ !(up->port.flags & UPF_FULL_PROBE)) + return; + + /* +diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h +index 59a8caf3230a..6df4c3356ae6 100644 +--- a/include/linux/serial_core.h ++++ b/include/linux/serial_core.h +@@ -100,7 +100,7 @@ struct uart_icount { + __u32 buf_overrun; + }; + +-typedef unsigned int __bitwise upf_t; ++typedef u64 __bitwise upf_t; + typedef unsigned int __bitwise upstat_t; + + struct uart_port { +@@ -207,6 +207,7 @@ struct uart_port { + #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) + #define UPF_DEAD ((__force upf_t) (1 << 30)) + #define UPF_IOREMAP ((__force upf_t) (1 << 31)) ++#define UPF_FULL_PROBE ((__force upf_t) (1ULL << 32)) + + #define __UPF_CHANGE_MASK 0x17fff + #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) +-- +2.35.1 + diff --git a/queue-5.10/serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch b/queue-5.10/serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch new file mode 100644 index 00000000000..322347cb10a --- /dev/null +++ b/queue-5.10/serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch @@ -0,0 +1,48 @@ +From ed5a30b1b62a514b606e883884676bac9b544c25 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 11 Sep 2022 11:12:15 +0200 +Subject: serial: ar933x: Deassert Transmit Enable on ->rs485_config() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Lukas Wunner + +[ Upstream commit 3a939433ddc1bab98be028903aaa286e5e7461d7 ] + +The ar933x_uart driver neglects to deassert Transmit Enable when +->rs485_config() is invoked. Fix it. + +Fixes: 9be1064fe524 ("serial: ar933x_uart: add RS485 support") +Cc: stable@vger.kernel.org # v5.7+ +Cc: Daniel Golle +Reviewed-by: Ilpo Järvinen +Signed-off-by: Lukas Wunner +Link: https://lore.kernel.org/r/5b36af26e57553f084334666e7d24c7fd131a01e.1662887231.git.lukas@wunner.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/ar933x_uart.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c +index be3f5d8f683b..23f2d9937cfc 100644 +--- a/drivers/tty/serial/ar933x_uart.c ++++ b/drivers/tty/serial/ar933x_uart.c +@@ -585,6 +585,13 @@ static const struct uart_ops ar933x_uart_ops = { + static int ar933x_config_rs485(struct uart_port *port, + struct serial_rs485 *rs485conf) + { ++ struct ar933x_uart_port *up = ++ container_of(port, struct ar933x_uart_port, port); ++ ++ if (port->rs485.flags & SER_RS485_ENABLED) ++ gpiod_set_value(up->rts_gpiod, ++ !!(rs485conf->flags & SER_RS485_RTS_AFTER_SEND)); ++ + return 0; + } + +-- +2.35.1 + diff --git a/queue-5.10/serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch b/queue-5.10/serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch new file mode 100644 index 00000000000..6fcf7240448 --- /dev/null +++ b/queue-5.10/serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch @@ -0,0 +1,50 @@ +From 5f883654fb7bc06350c4f9c8b07b0690bb946879 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 10 Jul 2022 18:44:36 +0200 +Subject: serial: ar933x: Remove superfluous code in ar933x_config_rs485() + +From: Lino Sanfilippo + +[ Upstream commit 184842622c97da2f88f365a981af05432baa5385 ] + +In ar933x_config_rs485() the check for the RTS GPIO is not needed since in +case the GPIO is not available at driver init ar933x_no_rs485 is assigned +to port->rs485_supported and this function is never called. So remove the +check. + +Also in uart_set_rs485_config() the serial core already assigns the passed +serial_rs485 struct to the uart port. So remove the assignment in the +drivers rs485_config() function to avoid redundancy. + +Signed-off-by: Lino Sanfilippo +Link: https://lore.kernel.org/r/20220710164442.2958979-3-LinoSanfilippo@gmx.de +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 3a939433ddc1 ("serial: ar933x: Deassert Transmit Enable on ->rs485_config()") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/ar933x_uart.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c +index c2be7cf91399..be3f5d8f683b 100644 +--- a/drivers/tty/serial/ar933x_uart.c ++++ b/drivers/tty/serial/ar933x_uart.c +@@ -585,15 +585,6 @@ static const struct uart_ops ar933x_uart_ops = { + static int ar933x_config_rs485(struct uart_port *port, + struct serial_rs485 *rs485conf) + { +- struct ar933x_uart_port *up = +- container_of(port, struct ar933x_uart_port, port); +- +- if ((rs485conf->flags & SER_RS485_ENABLED) && +- !up->rts_gpiod) { +- dev_err(port->dev, "RS485 needs rts-gpio\n"); +- return 1; +- } +- port->rs485 = *rs485conf; + return 0; + } + +-- +2.35.1 + diff --git a/queue-5.10/series b/queue-5.10/series new file mode 100644 index 00000000000..fc6657c05cb --- /dev/null +++ b/queue-5.10/series @@ -0,0 +1,13 @@ +serial-8250-let-drivers-request-full-16550a-feature-.patch +serial-ar933x-remove-superfluous-code-in-ar933x_conf.patch +serial-ar933x-deassert-transmit-enable-on-rs485_conf.patch +kvm-nvmx-pull-kvm-l0-s-desired-controls-directly-fro.patch +kvm-nvmx-don-t-propagate-vmcs12-s-perf_global_ctrl-s.patch +kvm-x86-trace-re-injected-exceptions.patch +kvm-x86-treat-dbs-from-the-emulator-as-fault-like-co.patch +x86-topology-set-cpu_die_id-only-if-die_type-found.patch +x86-topology-fix-multiple-packages-shown-on-a-single.patch +x86-topology-fix-duplicated-core-id-within-a-package.patch +kvm-x86-protect-the-unused-bits-in-msr-exiting-flags.patch +kvm-x86-copy-filter-arg-outside-kvm_vm_ioctl_set_msr.patch +kvm-x86-add-compat-handler-for-kvm_x86_set_msr_filte.patch diff --git a/queue-5.10/x86-topology-fix-duplicated-core-id-within-a-package.patch b/queue-5.10/x86-topology-fix-duplicated-core-id-within-a-package.patch new file mode 100644 index 00000000000..3dd9a9554c9 --- /dev/null +++ b/queue-5.10/x86-topology-fix-duplicated-core-id-within-a-package.patch @@ -0,0 +1,54 @@ +From 1ae276b910f750a14faa6e9be1d5132886b325d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Oct 2022 17:01:47 +0800 +Subject: x86/topology: Fix duplicated core ID within a package + +From: Zhang Rui + +[ Upstream commit 71eac7063698b7d7b8fafb1683ac24a034541141 ] + +Today, core ID is assumed to be unique within each package. + +But an AlderLake-N platform adds a Module level between core and package, +Linux excludes the unknown modules bits from the core ID, resulting in +duplicate core ID's. + +To keep core ID unique within a package, Linux must include all APIC-ID +bits for known or unknown levels above the core and below the package +in the core ID. + +It is important to understand that core ID's have always come directly +from the APIC-ID encoding, which comes from the BIOS. Thus there is no +guarantee that they start at 0, or that they are contiguous. +As such, naively using them for array indexes can be problematic. + +[ dhansen: un-known -> unknown ] + +Fixes: 7745f03eb395 ("x86/topology: Add CPUID.1F multi-die/package support") +Suggested-by: Len Brown +Signed-off-by: Zhang Rui +Signed-off-by: Dave Hansen +Reviewed-by: Len Brown +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20221014090147.1836-5-rui.zhang@intel.com +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/cpu/topology.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c +index 696309749d62..37d48ab3d077 100644 +--- a/arch/x86/kernel/cpu/topology.c ++++ b/arch/x86/kernel/cpu/topology.c +@@ -141,7 +141,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) + sub_index++; + } + +- core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; ++ core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width; + die_select_mask = (~(-1 << die_plus_mask_width)) >> + core_plus_mask_width; + +-- +2.35.1 + diff --git a/queue-5.10/x86-topology-fix-multiple-packages-shown-on-a-single.patch b/queue-5.10/x86-topology-fix-multiple-packages-shown-on-a-single.patch new file mode 100644 index 00000000000..65436d70218 --- /dev/null +++ b/queue-5.10/x86-topology-fix-multiple-packages-shown-on-a-single.patch @@ -0,0 +1,92 @@ +From aaeb3fa9575615e7b38d511c254f474efdfbca2f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Oct 2022 17:01:46 +0800 +Subject: x86/topology: Fix multiple packages shown on a single-package system + +From: Zhang Rui + +[ Upstream commit 2b12a7a126d62bdbd81f4923c21bf6e9a7fbd069 ] + +CPUID.1F/B does not enumerate Package level explicitly, instead, all the +APIC-ID bits above the enumerated levels are assumed to be package ID +bits. + +Current code gets package ID by shifting out all the APIC-ID bits that +Linux supports, rather than shifting out all the APIC-ID bits that +CPUID.1F enumerates. This introduces problems when CPUID.1F enumerates a +level that Linux does not support. + +For example, on a single package AlderLake-N, there are 2 Ecore Modules +with 4 atom cores in each module. Linux does not support the Module +level and interprets the Module ID bits as package ID and erroneously +reports a multi module system as a multi-package system. + +Fix this by using APIC-ID bits above all the CPUID.1F enumerated levels +as package ID. + +[ dhansen: spelling fix ] + +Fixes: 7745f03eb395 ("x86/topology: Add CPUID.1F multi-die/package support") +Suggested-by: Len Brown +Signed-off-by: Zhang Rui +Signed-off-by: Dave Hansen +Reviewed-by: Len Brown +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20221014090147.1836-4-rui.zhang@intel.com +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/cpu/topology.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c +index 8678864ce712..696309749d62 100644 +--- a/arch/x86/kernel/cpu/topology.c ++++ b/arch/x86/kernel/cpu/topology.c +@@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) + unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; + unsigned int die_select_mask, die_level_siblings; ++ unsigned int pkg_mask_width; + bool die_level_present = false; + int leaf; + +@@ -111,10 +112,10 @@ int detect_extended_topology(struct cpuinfo_x86 *c) + core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); +- die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); ++ pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + + sub_index = 1; +- do { ++ while (true) { + cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx); + + /* +@@ -132,8 +133,13 @@ int detect_extended_topology(struct cpuinfo_x86 *c) + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + } + ++ if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE) ++ pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); ++ else ++ break; ++ + sub_index++; +- } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); ++ } + + core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; + die_select_mask = (~(-1 << die_plus_mask_width)) >> +@@ -148,7 +154,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) + } + + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, +- die_plus_mask_width); ++ pkg_mask_width); + /* + * Reinit the apicid, now that we have extended initial_apicid. + */ +-- +2.35.1 + diff --git a/queue-5.10/x86-topology-set-cpu_die_id-only-if-die_type-found.patch b/queue-5.10/x86-topology-set-cpu_die_id-only-if-die_type-found.patch new file mode 100644 index 00000000000..ea2ad877929 --- /dev/null +++ b/queue-5.10/x86-topology-set-cpu_die_id-only-if-die_type-found.patch @@ -0,0 +1,67 @@ +From 04a4eaf4cc55d9b09cc245ca6f60deba912675fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Nov 2020 21:06:59 +0000 +Subject: x86/topology: Set cpu_die_id only if DIE_TYPE found + +From: Yazen Ghannam + +[ Upstream commit cb09a379724d299c603a7a79f444f52a9a75b8d2 ] + +CPUID Leaf 0x1F defines a DIE_TYPE level (nb: ECX[8:15] level type == 0x5), +but CPUID Leaf 0xB does not. However, detect_extended_topology() will +set struct cpuinfo_x86.cpu_die_id regardless of whether a valid Die ID +was found. + +Only set cpu_die_id if a DIE_TYPE level is found. CPU topology code may +use another value for cpu_die_id, e.g. the AMD NodeId on AMD-based +systems. Code ordering should be maintained so that the CPUID Leaf 0x1F +Die ID value will take precedence on systems that may use another value. + +Suggested-by: Borislav Petkov +Signed-off-by: Yazen Ghannam +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20201109210659.754018-5-Yazen.Ghannam@amd.com +Stable-dep-of: 2b12a7a126d6 ("x86/topology: Fix multiple packages shown on a single-package system") +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/cpu/topology.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c +index 91288da29599..8678864ce712 100644 +--- a/arch/x86/kernel/cpu/topology.c ++++ b/arch/x86/kernel/cpu/topology.c +@@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) + unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; + unsigned int die_select_mask, die_level_siblings; ++ bool die_level_present = false; + int leaf; + + leaf = detect_extended_topology_leaf(c); +@@ -126,6 +127,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + } + if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) { ++ die_level_present = true; + die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + } +@@ -139,8 +141,12 @@ int detect_extended_topology(struct cpuinfo_x86 *c) + + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, + ht_mask_width) & core_select_mask; +- c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, +- core_plus_mask_width) & die_select_mask; ++ ++ if (die_level_present) { ++ c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, ++ core_plus_mask_width) & die_select_mask; ++ } ++ + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, + die_plus_mask_width); + /* +-- +2.35.1 +