]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.2-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Mar 2023 17:48:56 +0000 (18:48 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Mar 2023 17:48:56 +0000 (18:48 +0100)
added patches:
documentation-hw-vuln-document-the-interaction-between-ibrs-and-stibp.patch
kvm-svm-don-t-put-load-avic-when-setting-virtual-apic-mode.patch
kvm-svm-fix-potential-overflow-in-sev-s-send-receive_update_data.patch
kvm-svm-flush-the-current-tlb-when-activating-avic.patch
kvm-svm-hyper-v-placate-modpost-section-mismatch-error.patch
kvm-svm-process-icr-on-avic-ipi-delivery-failure-due-to-invalid-target.patch
kvm-x86-blindly-get-current-x2apic-reg-value-on-nodecode-write-traps.patch
kvm-x86-don-t-inhibit-apicv-avic-if-xapic-id-mismatch-is-due-to-32-bit-id.patch
kvm-x86-don-t-inhibit-apicv-avic-on-xapic-id-change-if-apic-is-disabled.patch
kvm-x86-inject-gp-if-wrmsr-sets-reserved-bits-in-apic-self-ipi.patch
kvm-x86-inject-gp-on-x2apic-wrmsr-that-sets-reserved-bits-63-32.patch
kvm-x86-purge-highest-isr-cache-when-updating-apicv-state.patch
selftests-x86-fix-incorrect-kernel-headers-search-path.patch
virt-sev-guest-return-eio-if-certificate-buffer-is-not-large-enough.patch
x86-crash-disable-virt-in-core-nmi-crash-handler-to-avoid-double-shootdown.patch
x86-kprobes-fix-__recover_optprobed_insn-check-optimizing-logic.patch
x86-kprobes-fix-arch_check_optimized_kprobe-check-within-optimized_kprobe-range.patch
x86-microcode-amd-add-a-cpu-parameter-to-the-reloading-functions.patch
x86-microcode-amd-fix-mixed-steppings-support.patch
x86-microcode-amd-remove-load_microcode_amd-s-bsp-parameter.patch
x86-reboot-disable-svm-not-just-vmx-when-stopping-cpus.patch
x86-reboot-disable-virtualization-in-an-emergency-if-svm-is-supported.patch
x86-speculation-allow-enabling-stibp-with-legacy-ibrs.patch
x86-virt-force-gif-1-prior-to-disabling-svm-for-reboot-flows.patch

25 files changed:
queue-6.2/documentation-hw-vuln-document-the-interaction-between-ibrs-and-stibp.patch [new file with mode: 0644]
queue-6.2/kvm-svm-don-t-put-load-avic-when-setting-virtual-apic-mode.patch [new file with mode: 0644]
queue-6.2/kvm-svm-fix-potential-overflow-in-sev-s-send-receive_update_data.patch [new file with mode: 0644]
queue-6.2/kvm-svm-flush-the-current-tlb-when-activating-avic.patch [new file with mode: 0644]
queue-6.2/kvm-svm-hyper-v-placate-modpost-section-mismatch-error.patch [new file with mode: 0644]
queue-6.2/kvm-svm-process-icr-on-avic-ipi-delivery-failure-due-to-invalid-target.patch [new file with mode: 0644]
queue-6.2/kvm-x86-blindly-get-current-x2apic-reg-value-on-nodecode-write-traps.patch [new file with mode: 0644]
queue-6.2/kvm-x86-don-t-inhibit-apicv-avic-if-xapic-id-mismatch-is-due-to-32-bit-id.patch [new file with mode: 0644]
queue-6.2/kvm-x86-don-t-inhibit-apicv-avic-on-xapic-id-change-if-apic-is-disabled.patch [new file with mode: 0644]
queue-6.2/kvm-x86-inject-gp-if-wrmsr-sets-reserved-bits-in-apic-self-ipi.patch [new file with mode: 0644]
queue-6.2/kvm-x86-inject-gp-on-x2apic-wrmsr-that-sets-reserved-bits-63-32.patch [new file with mode: 0644]
queue-6.2/kvm-x86-purge-highest-isr-cache-when-updating-apicv-state.patch [new file with mode: 0644]
queue-6.2/selftests-x86-fix-incorrect-kernel-headers-search-path.patch [new file with mode: 0644]
queue-6.2/series
queue-6.2/virt-sev-guest-return-eio-if-certificate-buffer-is-not-large-enough.patch [new file with mode: 0644]
queue-6.2/x86-crash-disable-virt-in-core-nmi-crash-handler-to-avoid-double-shootdown.patch [new file with mode: 0644]
queue-6.2/x86-kprobes-fix-__recover_optprobed_insn-check-optimizing-logic.patch [new file with mode: 0644]
queue-6.2/x86-kprobes-fix-arch_check_optimized_kprobe-check-within-optimized_kprobe-range.patch [new file with mode: 0644]
queue-6.2/x86-microcode-amd-add-a-cpu-parameter-to-the-reloading-functions.patch [new file with mode: 0644]
queue-6.2/x86-microcode-amd-fix-mixed-steppings-support.patch [new file with mode: 0644]
queue-6.2/x86-microcode-amd-remove-load_microcode_amd-s-bsp-parameter.patch [new file with mode: 0644]
queue-6.2/x86-reboot-disable-svm-not-just-vmx-when-stopping-cpus.patch [new file with mode: 0644]
queue-6.2/x86-reboot-disable-virtualization-in-an-emergency-if-svm-is-supported.patch [new file with mode: 0644]
queue-6.2/x86-speculation-allow-enabling-stibp-with-legacy-ibrs.patch [new file with mode: 0644]
queue-6.2/x86-virt-force-gif-1-prior-to-disabling-svm-for-reboot-flows.patch [new file with mode: 0644]

diff --git a/queue-6.2/documentation-hw-vuln-document-the-interaction-between-ibrs-and-stibp.patch b/queue-6.2/documentation-hw-vuln-document-the-interaction-between-ibrs-and-stibp.patch
new file mode 100644 (file)
index 0000000..5fc0dcd
--- /dev/null
@@ -0,0 +1,58 @@
+From e02b50ca442e88122e1302d4dbc1b71a4808c13f Mon Sep 17 00:00:00 2001
+From: KP Singh <kpsingh@kernel.org>
+Date: Mon, 27 Feb 2023 07:05:41 +0100
+Subject: Documentation/hw-vuln: Document the interaction between IBRS and STIBP
+
+From: KP Singh <kpsingh@kernel.org>
+
+commit e02b50ca442e88122e1302d4dbc1b71a4808c13f upstream.
+
+Explain why STIBP is needed with legacy IBRS as currently implemented
+(KERNEL_IBRS) and why STIBP is not needed when enhanced IBRS is enabled.
+
+Fixes: 7c693f54c873 ("x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS")
+Signed-off-by: KP Singh <kpsingh@kernel.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230227060541.1939092-2-kpsingh@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/hw-vuln/spectre.rst |   21 ++++++++++++++++-----
+ 1 file changed, 16 insertions(+), 5 deletions(-)
+
+--- a/Documentation/admin-guide/hw-vuln/spectre.rst
++++ b/Documentation/admin-guide/hw-vuln/spectre.rst
+@@ -479,8 +479,16 @@ Spectre variant 2
+    On Intel Skylake-era systems the mitigation covers most, but not all,
+    cases. See :ref:`[3] <spec_ref3>` for more details.
+-   On CPUs with hardware mitigation for Spectre variant 2 (e.g. Enhanced
+-   IBRS on x86), retpoline is automatically disabled at run time.
++   On CPUs with hardware mitigation for Spectre variant 2 (e.g. IBRS
++   or enhanced IBRS on x86), retpoline is automatically disabled at run time.
++
++   Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
++   boot, by setting the IBRS bit, and they're automatically protected against
++   Spectre v2 variant attacks, including cross-thread branch target injections
++   on SMT systems (STIBP). In other words, eIBRS enables STIBP too.
++
++   Legacy IBRS systems clear the IBRS bit on exit to userspace and
++   therefore explicitly enable STIBP for that
+    The retpoline mitigation is turned on by default on vulnerable
+    CPUs. It can be forced on or off by the administrator
+@@ -504,9 +512,12 @@ Spectre variant 2
+    For Spectre variant 2 mitigation, individual user programs
+    can be compiled with return trampolines for indirect branches.
+    This protects them from consuming poisoned entries in the branch
+-   target buffer left by malicious software.  Alternatively, the
+-   programs can disable their indirect branch speculation via prctl()
+-   (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
++   target buffer left by malicious software.
++
++   On legacy IBRS systems, at return to userspace, implicit STIBP is disabled
++   because the kernel clears the IBRS bit. In this case, the userspace programs
++   can disable indirect branch speculation via prctl() (See
++   :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+    On x86, this will turn on STIBP to guard against attacks from the
+    sibling thread when the user program is running, and use IBPB to
+    flush the branch target buffer when switching to/from the program.
diff --git a/queue-6.2/kvm-svm-don-t-put-load-avic-when-setting-virtual-apic-mode.patch b/queue-6.2/kvm-svm-don-t-put-load-avic-when-setting-virtual-apic-mode.patch
new file mode 100644 (file)
index 0000000..ea928c5
--- /dev/null
@@ -0,0 +1,156 @@
+From e0bead97e7590da888148feb9e9133bc278c534b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 6 Jan 2023 01:12:40 +0000
+Subject: KVM: SVM: Don't put/load AVIC when setting virtual APIC mode
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit e0bead97e7590da888148feb9e9133bc278c534b upstream.
+
+Move the VMCB updates from avic_refresh_apicv_exec_ctrl() into
+avic_set_virtual_apic_mode() and invert the dependency being said
+functions to avoid calling avic_vcpu_{load,put}() and
+avic_set_pi_irte_mode() when "only" setting the virtual APIC mode.
+
+avic_set_virtual_apic_mode() is invoked from common x86 with preemption
+enabled, which makes avic_vcpu_{load,put}() unhappy.  Luckily, calling
+those and updating IRTE stuff is unnecessary as the only reason
+avic_set_virtual_apic_mode() is called is to handle transitions between
+xAPIC and x2APIC that don't also toggle APICv activation.  And if
+activation doesn't change, there's no need to fiddle with the physical
+APIC ID table or update IRTE.
+
+The "full" refresh is guaranteed to be called if activation changes in
+this case as the only call to the "set" path is:
+
+       kvm_vcpu_update_apicv(vcpu);
+       static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
+
+and kvm_vcpu_update_apicv() invokes the refresh if activation changes:
+
+       if (apic->apicv_active == activate)
+               goto out;
+
+       apic->apicv_active = activate;
+       kvm_apic_update_apicv(vcpu);
+       static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
+
+Rename the helper to reflect that it is also called during "refresh".
+
+  WARNING: CPU: 183 PID: 49186 at arch/x86/kvm/svm/avic.c:1081 avic_vcpu_put+0xde/0xf0 [kvm_amd]
+  CPU: 183 PID: 49186 Comm: stable Tainted: G           O       6.0.0-smp--fcddbca45f0a-sink #34
+  Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 10.48.0 01/27/2022
+  RIP: 0010:avic_vcpu_put+0xde/0xf0 [kvm_amd]
+   avic_refresh_apicv_exec_ctrl+0x142/0x1c0 [kvm_amd]
+   avic_set_virtual_apic_mode+0x5a/0x70 [kvm_amd]
+   kvm_lapic_set_base+0x149/0x1a0 [kvm]
+   kvm_set_apic_base+0x8f/0xd0 [kvm]
+   kvm_set_msr_common+0xa3a/0xdc0 [kvm]
+   svm_set_msr+0x364/0x6b0 [kvm_amd]
+   __kvm_set_msr+0xb8/0x1c0 [kvm]
+   kvm_emulate_wrmsr+0x58/0x1d0 [kvm]
+   msr_interception+0x1c/0x30 [kvm_amd]
+   svm_invoke_exit_handler+0x31/0x100 [kvm_amd]
+   svm_handle_exit+0xfc/0x160 [kvm_amd]
+   vcpu_enter_guest+0x21bb/0x23e0 [kvm]
+   vcpu_run+0x92/0x450 [kvm]
+   kvm_arch_vcpu_ioctl_run+0x43e/0x6e0 [kvm]
+   kvm_vcpu_ioctl+0x559/0x620 [kvm]
+
+Fixes: 05c4fe8c1bd9 ("KVM: SVM: Refresh AVIC configuration when changing APIC mode")
+Cc: stable@vger.kernel.org
+Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230106011306.85230-8-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/avic.c |   31 +++++++++++++++----------------
+ arch/x86/kvm/svm/svm.c  |    2 +-
+ arch/x86/kvm/svm/svm.h  |    2 +-
+ 3 files changed, 17 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/kvm/svm/avic.c
++++ b/arch/x86/kvm/svm/avic.c
+@@ -747,18 +747,6 @@ void avic_apicv_post_state_restore(struc
+       avic_handle_ldr_update(vcpu);
+ }
+-void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
+-{
+-      if (!lapic_in_kernel(vcpu) || avic_mode == AVIC_MODE_NONE)
+-              return;
+-
+-      if (kvm_get_apic_mode(vcpu) == LAPIC_MODE_INVALID) {
+-              WARN_ONCE(true, "Invalid local APIC state (vcpu_id=%d)", vcpu->vcpu_id);
+-              return;
+-      }
+-      avic_refresh_apicv_exec_ctrl(vcpu);
+-}
+-
+ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
+ {
+       int ret = 0;
+@@ -1100,17 +1088,18 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu
+       WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+ }
+-
+-void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
++void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct vmcb *vmcb = svm->vmcb01.ptr;
+-      bool activated = kvm_vcpu_apicv_active(vcpu);
++
++      if (!lapic_in_kernel(vcpu) || avic_mode == AVIC_MODE_NONE)
++              return;
+       if (!enable_apicv)
+               return;
+-      if (activated) {
++      if (kvm_vcpu_apicv_active(vcpu)) {
+               /**
+                * During AVIC temporary deactivation, guest could update
+                * APIC ID, DFR and LDR registers, which would not be trapped
+@@ -1124,6 +1113,16 @@ void avic_refresh_apicv_exec_ctrl(struct
+               avic_deactivate_vmcb(svm);
+       }
+       vmcb_mark_dirty(vmcb, VMCB_AVIC);
++}
++
++void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
++{
++      bool activated = kvm_vcpu_apicv_active(vcpu);
++
++      if (!enable_apicv)
++              return;
++
++      avic_refresh_virtual_apic_mode(vcpu);
+       if (activated)
+               avic_vcpu_load(vcpu, vcpu->cpu);
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4771,7 +4771,7 @@ static struct kvm_x86_ops svm_x86_ops __
+       .enable_nmi_window = svm_enable_nmi_window,
+       .enable_irq_window = svm_enable_irq_window,
+       .update_cr8_intercept = svm_update_cr8_intercept,
+-      .set_virtual_apic_mode = avic_set_virtual_apic_mode,
++      .set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
+       .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
+       .check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,
+       .apicv_post_state_restore = avic_apicv_post_state_restore,
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -648,7 +648,7 @@ void avic_vcpu_blocking(struct kvm_vcpu
+ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
+ void avic_ring_doorbell(struct kvm_vcpu *vcpu);
+ unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
+-void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
++void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu);
+ /* sev.c */
diff --git a/queue-6.2/kvm-svm-fix-potential-overflow-in-sev-s-send-receive_update_data.patch b/queue-6.2/kvm-svm-fix-potential-overflow-in-sev-s-send-receive_update_data.patch
new file mode 100644 (file)
index 0000000..5570173
--- /dev/null
@@ -0,0 +1,65 @@
+From f94f053aa3a5d6ff17951870483d9eb9e13de2e2 Mon Sep 17 00:00:00 2001
+From: Peter Gonda <pgonda@google.com>
+Date: Tue, 7 Feb 2023 09:13:54 -0800
+Subject: KVM: SVM: Fix potential overflow in SEV's send|receive_update_data()
+
+From: Peter Gonda <pgonda@google.com>
+
+commit f94f053aa3a5d6ff17951870483d9eb9e13de2e2 upstream.
+
+KVM_SEV_SEND_UPDATE_DATA and KVM_SEV_RECEIVE_UPDATE_DATA have an integer
+overflow issue. Params.guest_len and offset are both 32 bits wide, with a
+large params.guest_len the check to confirm a page boundary is not
+crossed can falsely pass:
+
+    /* Check if we are crossing the page boundary *
+    offset = params.guest_uaddr & (PAGE_SIZE - 1);
+    if ((params.guest_len + offset > PAGE_SIZE))
+
+Add an additional check to confirm that params.guest_len itself is not
+greater than PAGE_SIZE.
+
+Note, this isn't a security concern as overflow can happen if and only if
+params.guest_len is greater than 0xfffff000, and the FW spec says these
+commands fail with lengths greater than 16KB, i.e. the PSP will detect
+KVM's goof.
+
+Fixes: 15fb7de1a7f5 ("KVM: SVM: Add KVM_SEV_RECEIVE_UPDATE_DATA command")
+Fixes: d3d1af85e2c7 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command")
+Reported-by: Andy Nguyen <theflow@google.com>
+Suggested-by: Thomas Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Peter Gonda <pgonda@google.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20230207171354.4012821-1-pgonda@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -1293,7 +1293,7 @@ static int sev_send_update_data(struct k
+       /* Check if we are crossing the page boundary */
+       offset = params.guest_uaddr & (PAGE_SIZE - 1);
+-      if ((params.guest_len + offset > PAGE_SIZE))
++      if (params.guest_len > PAGE_SIZE || (params.guest_len + offset) > PAGE_SIZE)
+               return -EINVAL;
+       /* Pin guest memory */
+@@ -1473,7 +1473,7 @@ static int sev_receive_update_data(struc
+       /* Check if we are crossing the page boundary */
+       offset = params.guest_uaddr & (PAGE_SIZE - 1);
+-      if ((params.guest_len + offset > PAGE_SIZE))
++      if (params.guest_len > PAGE_SIZE || (params.guest_len + offset) > PAGE_SIZE)
+               return -EINVAL;
+       hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
diff --git a/queue-6.2/kvm-svm-flush-the-current-tlb-when-activating-avic.patch b/queue-6.2/kvm-svm-flush-the-current-tlb-when-activating-avic.patch
new file mode 100644 (file)
index 0000000..3ada7eb
--- /dev/null
@@ -0,0 +1,44 @@
+From 0ccf3e7cb95a2db8ddb2a44812037ffba8166dc9 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 6 Jan 2023 01:12:36 +0000
+Subject: KVM: SVM: Flush the "current" TLB when activating AVIC
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 0ccf3e7cb95a2db8ddb2a44812037ffba8166dc9 upstream.
+
+Flush the TLB when activating AVIC as the CPU can insert into the TLB
+while AVIC is "locally" disabled.  KVM doesn't treat "APIC hardware
+disabled" as VM-wide AVIC inhibition, and so when a vCPU has its APIC
+hardware disabled, AVIC is not guaranteed to be inhibited.  As a result,
+KVM may create a valid NPT mapping for the APIC base, which the CPU can
+cache as a non-AVIC translation.
+
+Note, Intel handles this in vmx_set_virtual_apic_mode().
+
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20230106011306.85230-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/avic.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/svm/avic.c
++++ b/arch/x86/kvm/svm/avic.c
+@@ -86,6 +86,12 @@ static void avic_activate_vmcb(struct vc
+               /* Disabling MSR intercept for x2APIC registers */
+               svm_set_x2apic_msr_interception(svm, false);
+       } else {
++              /*
++               * Flush the TLB, the guest may have inserted a non-APIC
++               * mapping into the TLB while AVIC was disabled.
++               */
++              kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
++
+               /* For xAVIC and hybrid-xAVIC modes */
+               vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
+               /* Enabling MSR intercept for x2APIC registers */
diff --git a/queue-6.2/kvm-svm-hyper-v-placate-modpost-section-mismatch-error.patch b/queue-6.2/kvm-svm-hyper-v-placate-modpost-section-mismatch-error.patch
new file mode 100644 (file)
index 0000000..134be57
--- /dev/null
@@ -0,0 +1,57 @@
+From 45dd9bc75d9adc9483f0c7d662ba6e73ed698a0b Mon Sep 17 00:00:00 2001
+From: Randy Dunlap <rdunlap@infradead.org>
+Date: Tue, 21 Feb 2023 23:33:15 -0800
+Subject: KVM: SVM: hyper-v: placate modpost section mismatch error
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+commit 45dd9bc75d9adc9483f0c7d662ba6e73ed698a0b upstream.
+
+modpost reports section mismatch errors/warnings:
+WARNING: modpost: vmlinux.o: section mismatch in reference: svm_hv_hardware_setup (section: .text) -> (unknown) (section: .init.data)
+WARNING: modpost: vmlinux.o: section mismatch in reference: svm_hv_hardware_setup (section: .text) -> (unknown) (section: .init.data)
+WARNING: modpost: vmlinux.o: section mismatch in reference: svm_hv_hardware_setup (section: .text) -> (unknown) (section: .init.data)
+
+This "(unknown) (section: .init.data)" all refer to svm_x86_ops.
+
+Tag svm_hv_hardware_setup() with __init to fix a modpost warning as the
+non-stub implementation accesses __initdata (svm_x86_ops), i.e. would
+generate a use-after-free if svm_hv_hardware_setup() were actually invoked
+post-init.  The helper is only called from svm_hardware_setup(), which is
+also __init, i.e. lack of __init is benign other than the modpost warning.
+
+Fixes: 1e0c7d40758b ("KVM: SVM: hyper-v: Remote TLB flush for SVM")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Cc: Vineeth Pillai <viremana@linux.microsoft.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20230222073315.9081-1-rdunlap@infradead.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm_onhyperv.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm_onhyperv.h
++++ b/arch/x86/kvm/svm/svm_onhyperv.h
+@@ -30,7 +30,7 @@ static inline void svm_hv_init_vmcb(stru
+               hve->hv_enlightenments_control.msr_bitmap = 1;
+ }
+-static inline void svm_hv_hardware_setup(void)
++static inline __init void svm_hv_hardware_setup(void)
+ {
+       if (npt_enabled &&
+           ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
+@@ -84,7 +84,7 @@ static inline void svm_hv_init_vmcb(stru
+ {
+ }
+-static inline void svm_hv_hardware_setup(void)
++static inline __init void svm_hv_hardware_setup(void)
+ {
+ }
diff --git a/queue-6.2/kvm-svm-process-icr-on-avic-ipi-delivery-failure-due-to-invalid-target.patch b/queue-6.2/kvm-svm-process-icr-on-avic-ipi-delivery-failure-due-to-invalid-target.patch
new file mode 100644 (file)
index 0000000..fc759f7
--- /dev/null
@@ -0,0 +1,64 @@
+From 5aede752a839904059c2b5d68be0dc4501c6c15f Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 6 Jan 2023 01:12:37 +0000
+Subject: KVM: SVM: Process ICR on AVIC IPI delivery failure due to invalid target
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 5aede752a839904059c2b5d68be0dc4501c6c15f upstream.
+
+Emulate ICR writes on AVIC IPI failures due to invalid targets using the
+same logic as failures due to invalid types.  AVIC acceleration fails if
+_any_ of the targets are invalid, and crucially VM-Exits before sending
+IPIs to targets that _are_ valid.  In logical mode, the destination is a
+bitmap, i.e. a single IPI can target multiple logical IDs.  Doing nothing
+causes KVM to drop IPIs if at least one target is valid and at least one
+target is invalid.
+
+Fixes: 18f40c53e10f ("svm: Add VMEXIT handlers for AVIC")
+Cc: stable@vger.kernel.org
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230106011306.85230-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/avic.c |   16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/svm/avic.c
++++ b/arch/x86/kvm/svm/avic.c
+@@ -502,14 +502,18 @@ int avic_incomplete_ipi_interception(str
+       trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
+       switch (id) {
++      case AVIC_IPI_FAILURE_INVALID_TARGET:
+       case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
+               /*
+                * Emulate IPIs that are not handled by AVIC hardware, which
+-               * only virtualizes Fixed, Edge-Triggered INTRs.  The exit is
+-               * a trap, e.g. ICR holds the correct value and RIP has been
+-               * advanced, KVM is responsible only for emulating the IPI.
+-               * Sadly, hardware may sometimes leave the BUSY flag set, in
+-               * which case KVM needs to emulate the ICR write as well in
++               * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
++               * if _any_ targets are invalid, e.g. if the logical mode mask
++               * is a superset of running vCPUs.
++               *
++               * The exit is a trap, e.g. ICR holds the correct value and RIP
++               * has been advanced, KVM is responsible only for emulating the
++               * IPI.  Sadly, hardware may sometimes leave the BUSY flag set,
++               * in which case KVM needs to emulate the ICR write as well in
+                * order to clear the BUSY flag.
+                */
+               if (icrl & APIC_ICR_BUSY)
+@@ -525,8 +529,6 @@ int avic_incomplete_ipi_interception(str
+                */
+               avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
+               break;
+-      case AVIC_IPI_FAILURE_INVALID_TARGET:
+-              break;
+       case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
+               WARN_ONCE(1, "Invalid backing page\n");
+               break;
diff --git a/queue-6.2/kvm-x86-blindly-get-current-x2apic-reg-value-on-nodecode-write-traps.patch b/queue-6.2/kvm-x86-blindly-get-current-x2apic-reg-value-on-nodecode-write-traps.patch
new file mode 100644 (file)
index 0000000..d485465
--- /dev/null
@@ -0,0 +1,62 @@
+From 0a19807b464fb10aa79b9dd7f494bc317438fada Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 6 Jan 2023 01:12:34 +0000
+Subject: KVM: x86: Blindly get current x2APIC reg value on "nodecode write" traps
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 0a19807b464fb10aa79b9dd7f494bc317438fada upstream.
+
+When emulating a x2APIC write in response to an APICv/AVIC trap, get the
+the written value from the vAPIC page without checking that reads are
+allowed for the target register.  AVIC can generate trap-like VM-Exits on
+writes to EOI, and so KVM needs to get the written value from the backing
+page without running afoul of EOI's write-only behavior.
+
+Alternatively, EOI could be special cased to always write '0', e.g. so
+that the sanity check could be preserved, but x2APIC on AMD is actually
+supposed to disallow non-zero writes (not emulated by KVM), and the
+sanity check was a byproduct of how the KVM code was written, i.e. wasn't
+added to guard against anything in particular.
+
+Fixes: 70c8327c11c6 ("KVM: x86: Bug the VM if an accelerated x2APIC trap occurs on a "bad" reg")
+Fixes: 1bd9dfec9fd4 ("KVM: x86: Do not block APIC write for non ICR registers")
+Reported-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230106011306.85230-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |    9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2284,23 +2284,18 @@ void kvm_apic_write_nodecode(struct kvm_
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       u64 val;
+-      if (apic_x2apic_mode(apic)) {
+-              if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm))
+-                      return;
+-      } else {
+-              val = kvm_lapic_get_reg(apic, offset);
+-      }
+-
+       /*
+        * ICR is a single 64-bit register when x2APIC is enabled.  For legacy
+        * xAPIC, ICR writes need to go down the common (slightly slower) path
+        * to get the upper half from ICR2.
+        */
+       if (apic_x2apic_mode(apic) && offset == APIC_ICR) {
++              val = kvm_lapic_get_reg64(apic, APIC_ICR);
+               kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32));
+               trace_kvm_apic_write(APIC_ICR, val);
+       } else {
+               /* TODO: optimize to just emulate side effect w/o one more write */
++              val = kvm_lapic_get_reg(apic, offset);
+               kvm_lapic_reg_write(apic, offset, (u32)val);
+       }
+ }
diff --git a/queue-6.2/kvm-x86-don-t-inhibit-apicv-avic-if-xapic-id-mismatch-is-due-to-32-bit-id.patch b/queue-6.2/kvm-x86-don-t-inhibit-apicv-avic-if-xapic-id-mismatch-is-due-to-32-bit-id.patch
new file mode 100644 (file)
index 0000000..6f8e6cf
--- /dev/null
@@ -0,0 +1,60 @@
+From f651a008954803d7bb2d85b7042d0fd46133d782 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 6 Jan 2023 01:12:39 +0000
+Subject: KVM: x86: Don't inhibit APICv/AVIC if xAPIC ID mismatch is due to 32-bit ID
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f651a008954803d7bb2d85b7042d0fd46133d782 upstream.
+
+Truncate the vcpu_id, a.k.a. x2APIC ID, to an 8-bit value when comparing
+it against the xAPIC ID to avoid false positives (sort of) on systems
+with >255 CPUs, i.e. with IDs that don't fit into a u8.  The intent of
+APIC_ID_MODIFIED is to inhibit APICv/AVIC when the xAPIC is changed from
+it's original value,
+
+The mismatch isn't technically a false positive, as architecturally the
+xAPIC IDs do end up being aliased in this scenario, and neither APICv
+nor AVIC correctly handles IPI virtualization when there is aliasing.
+However, KVM already deliberately does not honor the aliasing behavior
+that results when an x2APIC ID gets truncated to an xAPIC ID.  I.e. the
+resulting APICv/AVIC behavior is aligned with KVM's existing behavior
+when KVM's x2APIC hotplug hack is effectively enabled.
+
+If/when KVM provides a way to disable the hotplug hack, APICv/AVIC can
+piggyback whatever logic disables the optimized APIC map (which is what
+provides the hotplug hack), i.e. so that KVM's optimized map and APIC
+virtualization yield the same behavior.
+
+For now, fix the immediate problem of APIC virtualization being disabled
+for large VMs, which is a much more pressing issue than ensuring KVM
+honors architectural behavior for APIC ID aliasing.
+
+Fixes: 3743c2f02517 ("KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base")
+Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230106011306.85230-7-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2078,7 +2078,12 @@ static void kvm_lapic_xapic_id_updated(s
+       if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
+               return;
+-      if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
++      /*
++       * Deliberately truncate the vCPU ID when detecting a modified APIC ID
++       * to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a 32-bit
++       * value.
++       */
++      if (kvm_xapic_id(apic) == (u8)apic->vcpu->vcpu_id)
+               return;
+       kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
diff --git a/queue-6.2/kvm-x86-don-t-inhibit-apicv-avic-on-xapic-id-change-if-apic-is-disabled.patch b/queue-6.2/kvm-x86-don-t-inhibit-apicv-avic-on-xapic-id-change-if-apic-is-disabled.patch
new file mode 100644 (file)
index 0000000..d517b87
--- /dev/null
@@ -0,0 +1,37 @@
+From a58a66afc464d6d2ec294cd3102f36f3652e7ce4 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 6 Jan 2023 01:12:38 +0000
+Subject: KVM: x86: Don't inhibit APICv/AVIC on xAPIC ID "change" if APIC is disabled
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit a58a66afc464d6d2ec294cd3102f36f3652e7ce4 upstream.
+
+Don't inhibit APICv/AVIC due to an xAPIC ID mismatch if the APIC is
+hardware disabled.  The ID cannot be consumed while the APIC is disabled,
+and the ID is guaranteed to be set back to the vcpu_id when the APIC is
+hardware enabled (architectural behavior correctly emulated by KVM).
+
+Fixes: 3743c2f02517 ("KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base")
+Cc: stable@vger.kernel.org
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230106011306.85230-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2072,6 +2072,9 @@ static void kvm_lapic_xapic_id_updated(s
+ {
+       struct kvm *kvm = apic->vcpu->kvm;
++      if (!kvm_apic_hw_enabled(apic))
++              return;
++
+       if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
+               return;
diff --git a/queue-6.2/kvm-x86-inject-gp-if-wrmsr-sets-reserved-bits-in-apic-self-ipi.patch b/queue-6.2/kvm-x86-inject-gp-if-wrmsr-sets-reserved-bits-in-apic-self-ipi.patch
new file mode 100644 (file)
index 0000000..aa10566
--- /dev/null
@@ -0,0 +1,44 @@
+From ba5838abb05334e4abfdff1490585c7f365e0424 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Sat, 7 Jan 2023 01:10:20 +0000
+Subject: KVM: x86: Inject #GP if WRMSR sets reserved bits in APIC Self-IPI
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ba5838abb05334e4abfdff1490585c7f365e0424 upstream.
+
+Inject a #GP if the guest attempts to set reserved bits in the x2APIC-only
+Self-IPI register.  Bits 7:0 hold the vector, all other bits are reserved.
+
+Reported-by: Marc Orr <marcorr@google.com>
+Cc: Ben Gardon <bgardon@google.com>
+Cc: Venkatesh Srinivas <venkateshs@chromium.org>
+Cc: stable@vger.kernel.org
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20230107011025.565472-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2227,10 +2227,14 @@ static int kvm_lapic_reg_write(struct kv
+               break;
+       case APIC_SELF_IPI:
+-              if (apic_x2apic_mode(apic))
+-                      kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0);
+-              else
++              /*
++               * Self-IPI exists only when x2APIC is enabled.  Bits 7:0 hold
++               * the vector, everything else is reserved.
++               */
++              if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
+                       ret = 1;
++              else
++                      kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
+               break;
+       default:
+               ret = 1;
diff --git a/queue-6.2/kvm-x86-inject-gp-on-x2apic-wrmsr-that-sets-reserved-bits-63-32.patch b/queue-6.2/kvm-x86-inject-gp-on-x2apic-wrmsr-that-sets-reserved-bits-63-32.patch
new file mode 100644 (file)
index 0000000..b92a015
--- /dev/null
@@ -0,0 +1,48 @@
+From ab52be1b310bcb39e6745d34a8f0e8475d67381a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Sat, 7 Jan 2023 01:10:21 +0000
+Subject: KVM: x86: Inject #GP on x2APIC WRMSR that sets reserved bits 63:32
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ab52be1b310bcb39e6745d34a8f0e8475d67381a upstream.
+
+Reject attempts to set bits 63:32 for 32-bit x2APIC registers, i.e. all
+x2APIC registers except ICR.  Per Intel's SDM:
+
+  Non-zero writes (by WRMSR instruction) to reserved bits to these
+  registers will raise a general protection fault exception
+
+Opportunistically fix a typo in a nearby comment.
+
+Reported-by: Marc Orr <marcorr@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20230107011025.565472-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2949,13 +2949,17 @@ static int kvm_lapic_msr_read(struct kvm
+ static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
+ {
+       /*
+-       * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and
++       * ICR is a 64-bit register in x2APIC mode (and Hyper-V PV vAPIC) and
+        * can be written as such, all other registers remain accessible only
+        * through 32-bit reads/writes.
+        */
+       if (reg == APIC_ICR)
+               return kvm_x2apic_icr_write(apic, data);
++      /* Bits 63:32 are reserved in all other registers. */
++      if (data >> 32)
++              return 1;
++
+       return kvm_lapic_reg_write(apic, reg, (u32)data);
+ }
diff --git a/queue-6.2/kvm-x86-purge-highest-isr-cache-when-updating-apicv-state.patch b/queue-6.2/kvm-x86-purge-highest-isr-cache-when-updating-apicv-state.patch
new file mode 100644 (file)
index 0000000..00b3906
--- /dev/null
@@ -0,0 +1,58 @@
+From 97a71c444a147ae41c7d0ab5b3d855d7f762f3ed Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 6 Jan 2023 01:12:35 +0000
+Subject: KVM: x86: Purge "highest ISR" cache when updating APICv state
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 97a71c444a147ae41c7d0ab5b3d855d7f762f3ed upstream.
+
+Purge the "highest ISR" cache when updating APICv state on a vCPU.  The
+cache must not be used when APICv is active as hardware may emulate EOIs
+(and other operations) without exiting to KVM.
+
+This fixes a bug where KVM will effectively block IRQs in perpetuity due
+to the "highest ISR" never getting reset if APICv is activated on a vCPU
+while an IRQ is in-service.  Hardware emulates the EOI and KVM never gets
+a chance to update its cache.
+
+Fixes: b26a695a1d78 ("kvm: lapic: Introduce APICv update helper function")
+Cc: stable@vger.kernel.org
+Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Cc: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230106011306.85230-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2429,6 +2429,7 @@ void kvm_apic_update_apicv(struct kvm_vc
+                */
+               apic->isr_count = count_vectors(apic->regs + APIC_ISR);
+       }
++      apic->highest_isr_cache = -1;
+ }
+ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
+@@ -2484,7 +2485,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vc
+               kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
+       }
+       kvm_apic_update_apicv(vcpu);
+-      apic->highest_isr_cache = -1;
+       update_divide_count(apic);
+       atomic_set(&apic->lapic_timer.pending, 0);
+@@ -2772,7 +2772,6 @@ int kvm_apic_set_state(struct kvm_vcpu *
+       __start_apic_timer(apic, APIC_TMCCT);
+       kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
+       kvm_apic_update_apicv(vcpu);
+-      apic->highest_isr_cache = -1;
+       if (apic->apicv_active) {
+               static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
+               static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
diff --git a/queue-6.2/selftests-x86-fix-incorrect-kernel-headers-search-path.patch b/queue-6.2/selftests-x86-fix-incorrect-kernel-headers-search-path.patch
new file mode 100644 (file)
index 0000000..74e0001
--- /dev/null
@@ -0,0 +1,36 @@
+From ac5ec90e94fe8eddb4499e51398640fa6a89d657 Mon Sep 17 00:00:00 2001
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Date: Fri, 27 Jan 2023 08:57:50 -0500
+Subject: selftests: x86: Fix incorrect kernel headers search path
+
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+commit ac5ec90e94fe8eddb4499e51398640fa6a89d657 upstream.
+
+Use $(KHDR_INCLUDES) as lookup path for kernel headers. This prevents
+building against kernel headers from the build environment in scenarios
+where kernel headers are installed into a specific output directory
+(O=...).
+
+Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: linux-kselftest@vger.kernel.org
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: <stable@vger.kernel.org>  # 5.18+
+Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/x86/Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/x86/Makefile
++++ b/tools/testing/selftests/x86/Makefile
+@@ -34,7 +34,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%
+ BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+-CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
++CFLAGS := -O2 -g -std=gnu99 -pthread -Wall $(KHDR_INCLUDES)
+ # call32_from_64 in thunks.S uses absolute addresses.
+ ifeq ($(CAN_BUILD_WITH_NOPIE),1)
index cfae825aec27f73f2697477d321143e158636f2d..a7c554196122b578706c539e193e3ba80f95b654 100644 (file)
@@ -822,3 +822,27 @@ md-don-t-update-recovery_cp-when-curr_resync-is-active.patch
 kvm-destroy-target-device-if-coalesced-mmio-unregistration-fails.patch
 kvm-vmx-fix-crash-due-to-uninitialized-current_vmcs.patch
 kvm-register-dev-kvm-as-the-_very_-last-thing-during-initialization.patch
+kvm-x86-purge-highest-isr-cache-when-updating-apicv-state.patch
+kvm-x86-blindly-get-current-x2apic-reg-value-on-nodecode-write-traps.patch
+kvm-x86-don-t-inhibit-apicv-avic-on-xapic-id-change-if-apic-is-disabled.patch
+kvm-x86-don-t-inhibit-apicv-avic-if-xapic-id-mismatch-is-due-to-32-bit-id.patch
+kvm-svm-flush-the-current-tlb-when-activating-avic.patch
+kvm-svm-process-icr-on-avic-ipi-delivery-failure-due-to-invalid-target.patch
+kvm-svm-don-t-put-load-avic-when-setting-virtual-apic-mode.patch
+kvm-x86-inject-gp-if-wrmsr-sets-reserved-bits-in-apic-self-ipi.patch
+kvm-x86-inject-gp-on-x2apic-wrmsr-that-sets-reserved-bits-63-32.patch
+kvm-svm-fix-potential-overflow-in-sev-s-send-receive_update_data.patch
+kvm-svm-hyper-v-placate-modpost-section-mismatch-error.patch
+selftests-x86-fix-incorrect-kernel-headers-search-path.patch
+x86-virt-force-gif-1-prior-to-disabling-svm-for-reboot-flows.patch
+x86-crash-disable-virt-in-core-nmi-crash-handler-to-avoid-double-shootdown.patch
+x86-reboot-disable-virtualization-in-an-emergency-if-svm-is-supported.patch
+x86-reboot-disable-svm-not-just-vmx-when-stopping-cpus.patch
+x86-kprobes-fix-__recover_optprobed_insn-check-optimizing-logic.patch
+x86-kprobes-fix-arch_check_optimized_kprobe-check-within-optimized_kprobe-range.patch
+x86-microcode-amd-remove-load_microcode_amd-s-bsp-parameter.patch
+x86-microcode-amd-add-a-cpu-parameter-to-the-reloading-functions.patch
+x86-microcode-amd-fix-mixed-steppings-support.patch
+x86-speculation-allow-enabling-stibp-with-legacy-ibrs.patch
+documentation-hw-vuln-document-the-interaction-between-ibrs-and-stibp.patch
+virt-sev-guest-return-eio-if-certificate-buffer-is-not-large-enough.patch
diff --git a/queue-6.2/virt-sev-guest-return-eio-if-certificate-buffer-is-not-large-enough.patch b/queue-6.2/virt-sev-guest-return-eio-if-certificate-buffer-is-not-large-enough.patch
new file mode 100644 (file)
index 0000000..fd3f385
--- /dev/null
@@ -0,0 +1,71 @@
+From dd093fb08e8f8a958fec4eef36f9f09eac047f60 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 22 Feb 2023 10:39:39 -0600
+Subject: virt/sev-guest: Return -EIO if certificate buffer is not large enough
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit dd093fb08e8f8a958fec4eef36f9f09eac047f60 upstream.
+
+Commit
+
+  47894e0fa6a5 ("virt/sev-guest: Prevent IV reuse in the SNP guest driver")
+
+changed the behavior associated with the return value when the caller
+does not supply a large enough certificate buffer. Prior to the commit a
+value of -EIO was returned. Now, 0 is returned.  This breaks the
+established ABI with the user.
+
+Change the code to detect the buffer size error and return -EIO.
+
+Fixes: 47894e0fa6a5 ("virt/sev-guest: Prevent IV reuse in the SNP guest driver")
+Reported-by: Larry Dewey <larry.dewey@amd.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: Larry Dewey <larry.dewey@amd.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/2afbcae6daf13f7ad5a4296692e0a0fe1bc1e4ee.1677083979.git.thomas.lendacky@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/virt/coco/sev-guest/sev-guest.c |   20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+--- a/drivers/virt/coco/sev-guest/sev-guest.c
++++ b/drivers/virt/coco/sev-guest/sev-guest.c
+@@ -377,9 +377,26 @@ static int handle_guest_request(struct s
+               snp_dev->input.data_npages = certs_npages;
+       }
++      /*
++       * Increment the message sequence number. There is no harm in doing
++       * this now because decryption uses the value stored in the response
++       * structure and any failure will wipe the VMPCK, preventing further
++       * use anyway.
++       */
++      snp_inc_msg_seqno(snp_dev);
++
+       if (fw_err)
+               *fw_err = err;
++      /*
++       * If an extended guest request was issued and the supplied certificate
++       * buffer was not large enough, a standard guest request was issued to
++       * prevent IV reuse. If the standard request was successful, return -EIO
++       * back to the caller as would have originally been returned.
++       */
++      if (!rc && err == SNP_GUEST_REQ_INVALID_LEN)
++              return -EIO;
++
+       if (rc) {
+               dev_alert(snp_dev->dev,
+                         "Detected error from ASP request. rc: %d, fw_err: %llu\n",
+@@ -395,9 +412,6 @@ static int handle_guest_request(struct s
+               goto disable_vmpck;
+       }
+-      /* Increment to new message sequence after payload decryption was successful. */
+-      snp_inc_msg_seqno(snp_dev);
+-
+       return 0;
+ disable_vmpck:
diff --git a/queue-6.2/x86-crash-disable-virt-in-core-nmi-crash-handler-to-avoid-double-shootdown.patch b/queue-6.2/x86-crash-disable-virt-in-core-nmi-crash-handler-to-avoid-double-shootdown.patch
new file mode 100644 (file)
index 0000000..120c3b2
--- /dev/null
@@ -0,0 +1,243 @@
+From 26044aff37a5455b19a91785086914fd33053ef4 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 30 Nov 2022 23:36:47 +0000
+Subject: x86/crash: Disable virt in core NMI crash handler to avoid double shootdown
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 26044aff37a5455b19a91785086914fd33053ef4 upstream.
+
+Disable virtualization in crash_nmi_callback() and rework the
+emergency_vmx_disable_all() path to do an NMI shootdown if and only if a
+shootdown has not already occurred.   NMI crash shootdown fundamentally
+can't support multiple invocations as responding CPUs are deliberately
+put into halt state without unblocking NMIs.  But, the emergency reboot
+path doesn't have any work of its own, it simply cares about disabling
+virtualization, i.e. so long as a shootdown occurred, emergency reboot
+doesn't care who initiated the shootdown, or when.
+
+If "crash_kexec_post_notifiers" is specified on the kernel command line,
+panic() will invoke crash_smp_send_stop() and result in a second call to
+nmi_shootdown_cpus() during native_machine_emergency_restart().
+
+Invoke the callback _before_ disabling virtualization, as the current
+VMCS needs to be cleared before doing VMXOFF.  Note, this results in a
+subtle change in ordering between disabling virtualization and stopping
+Intel PT on the responding CPUs.  While VMX and Intel PT do interact,
+VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one
+another, which is all that matters when panicking.
+
+Harden nmi_shootdown_cpus() against multiple invocations to try and
+capture any such kernel bugs via a WARN instead of hanging the system
+during a crash/dump, e.g. prior to the recent hardening of
+register_nmi_handler(), re-registering the NMI handler would trigger a
+double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y.
+
+ list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800.
+ WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70
+ Call Trace:
+  __register_nmi_handler+0xcf/0x130
+  nmi_shootdown_cpus+0x39/0x90
+  native_machine_emergency_restart+0x1c9/0x1d0
+  panic+0x237/0x29b
+
+Extract the disabling logic to a common helper to deduplicate code, and
+to prepare for doing the shootdown in the emergency reboot path if SVM
+is supported.
+
+Note, prior to commit ed72736183c4 ("x86/reboot: Force all cpus to exit
+VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected
+against a second invocation by a cpu_vmx_enabled() check as the kdump
+handler would disable VMX if it ran first.
+
+Fixes: ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
+Cc: stable@vger.kernel.org
+Reported-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com
+Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/reboot.h |    2 +
+ arch/x86/kernel/crash.c       |   17 ----------
+ arch/x86/kernel/reboot.c      |   65 ++++++++++++++++++++++++++++++++++--------
+ 3 files changed, 56 insertions(+), 28 deletions(-)
+
+--- a/arch/x86/include/asm/reboot.h
++++ b/arch/x86/include/asm/reboot.h
+@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(uns
+ #define MRR_BIOS      0
+ #define MRR_APM               1
++void cpu_emergency_disable_virtualization(void);
++
+ typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
+ void nmi_panic_self_stop(struct pt_regs *regs);
+ void nmi_shootdown_cpus(nmi_shootdown_cb callback);
+--- a/arch/x86/kernel/crash.c
++++ b/arch/x86/kernel/crash.c
+@@ -37,7 +37,6 @@
+ #include <linux/kdebug.h>
+ #include <asm/cpu.h>
+ #include <asm/reboot.h>
+-#include <asm/virtext.h>
+ #include <asm/intel_pt.h>
+ #include <asm/crash.h>
+ #include <asm/cmdline.h>
+@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu,
+        */
+       cpu_crash_vmclear_loaded_vmcss();
+-      /* Disable VMX or SVM if needed.
+-       *
+-       * We need to disable virtualization on all CPUs.
+-       * Having VMX or SVM enabled on any CPU may break rebooting
+-       * after the kdump kernel has finished its task.
+-       */
+-      cpu_emergency_vmxoff();
+-      cpu_emergency_svm_disable();
+-
+       /*
+        * Disable Intel PT to stop its logging
+        */
+@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struc
+        */
+       cpu_crash_vmclear_loaded_vmcss();
+-      /* Booting kdump kernel with VMX or SVM enabled won't work,
+-       * because (among other limitations) we can't disable paging
+-       * with the virt flags.
+-       */
+-      cpu_emergency_vmxoff();
+-      cpu_emergency_svm_disable();
++      cpu_emergency_disable_virtualization();
+       /*
+        * Disable Intel PT to stop its logging
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -528,10 +528,7 @@ static inline void kb_wait(void)
+       }
+ }
+-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
+-{
+-      cpu_emergency_vmxoff();
+-}
++static inline void nmi_shootdown_cpus_on_restart(void);
+ /* Use NMIs as IPIs to tell all CPUs to disable virtualization */
+ static void emergency_vmx_disable_all(void)
+@@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(vo
+               __cpu_emergency_vmxoff();
+               /* Halt and exit VMX root operation on the other CPUs. */
+-              nmi_shootdown_cpus(vmxoff_nmi);
++              nmi_shootdown_cpus_on_restart();
+       }
+ }
+@@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_re
+ /* This is the CPU performing the emergency shutdown work. */
+ int crashing_cpu = -1;
++/*
++ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
++ * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
++ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
++ */
++void cpu_emergency_disable_virtualization(void)
++{
++      cpu_emergency_vmxoff();
++      cpu_emergency_svm_disable();
++}
++
+ #if defined(CONFIG_SMP)
+ static nmi_shootdown_cb shootdown_callback;
+@@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned i
+               return NMI_HANDLED;
+       local_irq_disable();
+-      shootdown_callback(cpu, regs);
++      if (shootdown_callback)
++              shootdown_callback(cpu, regs);
++
++      /*
++       * Prepare the CPU for reboot _after_ invoking the callback so that the
++       * callback can safely use virtualization instructions, e.g. VMCLEAR.
++       */
++      cpu_emergency_disable_virtualization();
+       atomic_dec(&waiting_for_crash_ipi);
+       /* Assume hlt works */
+@@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned i
+       return NMI_HANDLED;
+ }
+-/*
+- * Halt all other CPUs, calling the specified function on each of them
++/**
++ * nmi_shootdown_cpus - Stop other CPUs via NMI
++ * @callback: Optional callback to be invoked from the NMI handler
++ *
++ * The NMI handler on the remote CPUs invokes @callback, if not
++ * NULL, first and then disables virtualization to ensure that
++ * INIT is recognized during reboot.
+  *
+- * This function can be used to halt all other CPUs on crash
+- * or emergency reboot time. The function passed as parameter
+- * will be called inside a NMI handler on all CPUs.
++ * nmi_shootdown_cpus() can only be invoked once. After the first
++ * invocation all other CPUs are stuck in crash_nmi_callback() and
++ * cannot respond to a second NMI.
+  */
+ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+ {
+       unsigned long msecs;
++
+       local_irq_disable();
++      /*
++       * Avoid certain doom if a shootdown already occurred; re-registering
++       * the NMI handler will cause list corruption, modifying the callback
++       * will do who knows what, etc...
++       */
++      if (WARN_ON_ONCE(crash_ipi_issued))
++              return;
++
+       /* Make a note of crashing cpu. Will be used in NMI callback. */
+       crashing_cpu = safe_smp_processor_id();
+@@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb
+               msecs--;
+       }
+-      /* Leave the nmi callback set */
++      /*
++       * Leave the nmi callback set, shootdown is a one-time thing.  Clearing
++       * the callback could result in a NULL pointer dereference if a CPU
++       * (finally) responds after the timeout expires.
++       */
++}
++
++static inline void nmi_shootdown_cpus_on_restart(void)
++{
++      if (!crash_ipi_issued)
++              nmi_shootdown_cpus(NULL);
+ }
+ /*
+@@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb
+       /* No other CPUs to shoot down */
+ }
++static inline void nmi_shootdown_cpus_on_restart(void) { }
++
+ void run_crash_ipi_callback(struct pt_regs *regs)
+ {
+ }
diff --git a/queue-6.2/x86-kprobes-fix-__recover_optprobed_insn-check-optimizing-logic.patch b/queue-6.2/x86-kprobes-fix-__recover_optprobed_insn-check-optimizing-logic.patch
new file mode 100644 (file)
index 0000000..4d69db9
--- /dev/null
@@ -0,0 +1,72 @@
+From 868a6fc0ca2407622d2833adefe1c4d284766c4c Mon Sep 17 00:00:00 2001
+From: Yang Jihong <yangjihong1@huawei.com>
+Date: Tue, 21 Feb 2023 08:49:16 +0900
+Subject: x86/kprobes: Fix __recover_optprobed_insn check optimizing logic
+
+From: Yang Jihong <yangjihong1@huawei.com>
+
+commit 868a6fc0ca2407622d2833adefe1c4d284766c4c upstream.
+
+Since the following commit:
+
+  commit f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code")
+
+modified the update timing of the KPROBE_FLAG_OPTIMIZED, a optimized_kprobe
+may be in the optimizing or unoptimizing state when op.kp->flags
+has KPROBE_FLAG_OPTIMIZED and op->list is not empty.
+
+The __recover_optprobed_insn check logic is incorrect, a kprobe in the
+unoptimizing state may be incorrectly determined as unoptimizing.
+As a result, incorrect instructions are copied.
+
+The optprobe_queued_unopt function needs to be exported for invoking in
+arch directory.
+
+Link: https://lore.kernel.org/all/20230216034247.32348-2-yangjihong1@huawei.com/
+
+Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/opt.c |    4 ++--
+ include/linux/kprobes.h       |    1 +
+ kernel/kprobes.c              |    2 +-
+ 3 files changed, 4 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/kprobes/opt.c
++++ b/arch/x86/kernel/kprobes/opt.c
+@@ -46,8 +46,8 @@ unsigned long __recover_optprobed_insn(k
+               /* This function only handles jump-optimized kprobe */
+               if (kp && kprobe_optimized(kp)) {
+                       op = container_of(kp, struct optimized_kprobe, kp);
+-                      /* If op->list is not empty, op is under optimizing */
+-                      if (list_empty(&op->list))
++                      /* If op is optimized or under unoptimizing */
++                      if (list_empty(&op->list) || optprobe_queued_unopt(op))
+                               goto found;
+               }
+       }
+--- a/include/linux/kprobes.h
++++ b/include/linux/kprobes.h
+@@ -378,6 +378,7 @@ extern void opt_pre_handler(struct kprob
+ DEFINE_INSN_CACHE_OPS(optinsn);
+ extern void wait_for_kprobe_optimizer(void);
++bool optprobe_queued_unopt(struct optimized_kprobe *op);
+ #else /* !CONFIG_OPTPROBES */
+ static inline void wait_for_kprobe_optimizer(void) { }
+ #endif /* CONFIG_OPTPROBES */
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -662,7 +662,7 @@ void wait_for_kprobe_optimizer(void)
+       mutex_unlock(&kprobe_mutex);
+ }
+-static bool optprobe_queued_unopt(struct optimized_kprobe *op)
++bool optprobe_queued_unopt(struct optimized_kprobe *op)
+ {
+       struct optimized_kprobe *_op;
diff --git a/queue-6.2/x86-kprobes-fix-arch_check_optimized_kprobe-check-within-optimized_kprobe-range.patch b/queue-6.2/x86-kprobes-fix-arch_check_optimized_kprobe-check-within-optimized_kprobe-range.patch
new file mode 100644 (file)
index 0000000..e3df2e8
--- /dev/null
@@ -0,0 +1,144 @@
+From f1c97a1b4ef709e3f066f82e3ba3108c3b133ae6 Mon Sep 17 00:00:00 2001
+From: Yang Jihong <yangjihong1@huawei.com>
+Date: Tue, 21 Feb 2023 08:49:16 +0900
+Subject: x86/kprobes: Fix arch_check_optimized_kprobe check within optimized_kprobe range
+
+From: Yang Jihong <yangjihong1@huawei.com>
+
+commit f1c97a1b4ef709e3f066f82e3ba3108c3b133ae6 upstream.
+
+When arch_prepare_optimized_kprobe calculating jump destination address,
+it copies original instructions from jmp-optimized kprobe (see
+__recover_optprobed_insn), and calculated based on length of original
+instruction.
+
+arch_check_optimized_kprobe does not check KPROBE_FLAG_OPTIMATED when
+checking whether jmp-optimized kprobe exists.
+As a result, setup_detour_execution may jump to a range that has been
+overwritten by jump destination address, resulting in an inval opcode error.
+
+For example, assume that register two kprobes whose addresses are
+<func+9> and <func+11> in "func" function.
+The original code of "func" function is as follows:
+
+   0xffffffff816cb5e9 <+9>:     push   %r12
+   0xffffffff816cb5eb <+11>:    xor    %r12d,%r12d
+   0xffffffff816cb5ee <+14>:    test   %rdi,%rdi
+   0xffffffff816cb5f1 <+17>:    setne  %r12b
+   0xffffffff816cb5f5 <+21>:    push   %rbp
+
+1.Register the kprobe for <func+11>, assume that is kp1, corresponding optimized_kprobe is op1.
+  After the optimization, "func" code changes to:
+
+   0xffffffff816cc079 <+9>:     push   %r12
+   0xffffffff816cc07b <+11>:    jmp    0xffffffffa0210000
+   0xffffffff816cc080 <+16>:    incl   0xf(%rcx)
+   0xffffffff816cc083 <+19>:    xchg   %eax,%ebp
+   0xffffffff816cc084 <+20>:    (bad)
+   0xffffffff816cc085 <+21>:    push   %rbp
+
+Now op1->flags == KPROBE_FLAG_OPTIMATED;
+
+2. Register the kprobe for <func+9>, assume that is kp2, corresponding optimized_kprobe is op2.
+
+register_kprobe(kp2)
+  register_aggr_kprobe
+    alloc_aggr_kprobe
+      __prepare_optimized_kprobe
+        arch_prepare_optimized_kprobe
+          __recover_optprobed_insn    // copy original bytes from kp1->optinsn.copied_insn,
+                                      // jump address = <func+14>
+
+3. disable kp1:
+
+disable_kprobe(kp1)
+  __disable_kprobe
+    ...
+    if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
+      ret = disarm_kprobe(orig_p, true)       // add op1 in unoptimizing_list, not unoptimized
+      orig_p->flags |= KPROBE_FLAG_DISABLED;  // op1->flags ==  KPROBE_FLAG_OPTIMATED | KPROBE_FLAG_DISABLED
+    ...
+
+4. unregister kp2
+__unregister_kprobe_top
+  ...
+  if (!kprobe_disabled(ap) && !kprobes_all_disarmed) {
+    optimize_kprobe(op)
+      ...
+      if (arch_check_optimized_kprobe(op) < 0) // because op1 has KPROBE_FLAG_DISABLED, here not return
+        return;
+      p->kp.flags |= KPROBE_FLAG_OPTIMIZED;   //  now op2 has KPROBE_FLAG_OPTIMIZED
+  }
+
+"func" code now is:
+
+   0xffffffff816cc079 <+9>:     int3
+   0xffffffff816cc07a <+10>:    push   %rsp
+   0xffffffff816cc07b <+11>:    jmp    0xffffffffa0210000
+   0xffffffff816cc080 <+16>:    incl   0xf(%rcx)
+   0xffffffff816cc083 <+19>:    xchg   %eax,%ebp
+   0xffffffff816cc084 <+20>:    (bad)
+   0xffffffff816cc085 <+21>:    push   %rbp
+
+5. if call "func", int3 handler call setup_detour_execution:
+
+  if (p->flags & KPROBE_FLAG_OPTIMIZED) {
+    ...
+    regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+    ...
+  }
+
+The code for the destination address is
+
+   0xffffffffa021072c:  push   %r12
+   0xffffffffa021072e:  xor    %r12d,%r12d
+   0xffffffffa0210731:  jmp    0xffffffff816cb5ee <func+14>
+
+However, <func+14> is not a valid start instruction address. As a result, an error occurs.
+
+Link: https://lore.kernel.org/all/20230216034247.32348-3-yangjihong1@huawei.com/
+
+Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code")
+Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
+Cc: stable@vger.kernel.org
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/opt.c |    2 +-
+ include/linux/kprobes.h       |    1 +
+ kernel/kprobes.c              |    2 +-
+ 3 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/kprobes/opt.c
++++ b/arch/x86/kernel/kprobes/opt.c
+@@ -353,7 +353,7 @@ int arch_check_optimized_kprobe(struct o
+       for (i = 1; i < op->optinsn.size; i++) {
+               p = get_kprobe(op->kp.addr + i);
+-              if (p && !kprobe_disabled(p))
++              if (p && !kprobe_disarmed(p))
+                       return -EEXIST;
+       }
+--- a/include/linux/kprobes.h
++++ b/include/linux/kprobes.h
+@@ -379,6 +379,7 @@ DEFINE_INSN_CACHE_OPS(optinsn);
+ extern void wait_for_kprobe_optimizer(void);
+ bool optprobe_queued_unopt(struct optimized_kprobe *op);
++bool kprobe_disarmed(struct kprobe *p);
+ #else /* !CONFIG_OPTPROBES */
+ static inline void wait_for_kprobe_optimizer(void) { }
+ #endif /* CONFIG_OPTPROBES */
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -458,7 +458,7 @@ static inline int kprobe_optready(struct
+ }
+ /* Return true if the kprobe is disarmed. Note: p must be on hash list */
+-static inline bool kprobe_disarmed(struct kprobe *p)
++bool kprobe_disarmed(struct kprobe *p)
+ {
+       struct optimized_kprobe *op;
diff --git a/queue-6.2/x86-microcode-amd-add-a-cpu-parameter-to-the-reloading-functions.patch b/queue-6.2/x86-microcode-amd-add-a-cpu-parameter-to-the-reloading-functions.patch
new file mode 100644 (file)
index 0000000..b326720
--- /dev/null
@@ -0,0 +1,96 @@
+From a5ad92134bd153a9ccdcddf09a95b088f36c3cce Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Thu, 26 Jan 2023 00:08:03 +0100
+Subject: x86/microcode/AMD: Add a @cpu parameter to the reloading functions
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit a5ad92134bd153a9ccdcddf09a95b088f36c3cce upstream.
+
+Will be used in a subsequent change.
+
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230130161709.11615-3-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/microcode.h     |    4 ++--
+ arch/x86/include/asm/microcode_amd.h |    4 ++--
+ arch/x86/kernel/cpu/microcode/amd.c  |    2 +-
+ arch/x86/kernel/cpu/microcode/core.c |    6 +++---
+ 4 files changed, 8 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/microcode.h
++++ b/arch/x86/include/asm/microcode.h
+@@ -125,13 +125,13 @@ static inline unsigned int x86_cpuid_fam
+ #ifdef CONFIG_MICROCODE
+ extern void __init load_ucode_bsp(void);
+ extern void load_ucode_ap(void);
+-void reload_early_microcode(void);
++void reload_early_microcode(unsigned int cpu);
+ extern bool initrd_gone;
+ void microcode_bsp_resume(void);
+ #else
+ static inline void __init load_ucode_bsp(void)                        { }
+ static inline void load_ucode_ap(void)                                { }
+-static inline void reload_early_microcode(void)                       { }
++static inline void reload_early_microcode(unsigned int cpu)   { }
+ static inline void microcode_bsp_resume(void)                 { }
+ #endif
+--- a/arch/x86/include/asm/microcode_amd.h
++++ b/arch/x86/include/asm/microcode_amd.h
+@@ -47,12 +47,12 @@ struct microcode_amd {
+ extern void __init load_ucode_amd_bsp(unsigned int family);
+ extern void load_ucode_amd_ap(unsigned int family);
+ extern int __init save_microcode_in_initrd_amd(unsigned int family);
+-void reload_ucode_amd(void);
++void reload_ucode_amd(unsigned int cpu);
+ #else
+ static inline void __init load_ucode_amd_bsp(unsigned int family) {}
+ static inline void load_ucode_amd_ap(unsigned int family) {}
+ static inline int __init
+ save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
+-static inline void reload_ucode_amd(void) {}
++static inline void reload_ucode_amd(unsigned int cpu) {}
+ #endif
+ #endif /* _ASM_X86_MICROCODE_AMD_H */
+--- a/arch/x86/kernel/cpu/microcode/amd.c
++++ b/arch/x86/kernel/cpu/microcode/amd.c
+@@ -578,7 +578,7 @@ int __init save_microcode_in_initrd_amd(
+       return 0;
+ }
+-void reload_ucode_amd(void)
++void reload_ucode_amd(unsigned int cpu)
+ {
+       struct microcode_amd *mc;
+       u32 rev, dummy __always_unused;
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -298,7 +298,7 @@ struct cpio_data find_microcode_in_initr
+ #endif
+ }
+-void reload_early_microcode(void)
++void reload_early_microcode(unsigned int cpu)
+ {
+       int vendor, family;
+@@ -312,7 +312,7 @@ void reload_early_microcode(void)
+               break;
+       case X86_VENDOR_AMD:
+               if (family >= 0x10)
+-                      reload_ucode_amd();
++                      reload_ucode_amd(cpu);
+               break;
+       default:
+               break;
+@@ -567,7 +567,7 @@ void microcode_bsp_resume(void)
+       if (uci->mc)
+               microcode_ops->apply_microcode(cpu);
+       else
+-              reload_early_microcode();
++              reload_early_microcode(cpu);
+ }
+ static struct syscore_ops mc_syscore_ops = {
diff --git a/queue-6.2/x86-microcode-amd-fix-mixed-steppings-support.patch b/queue-6.2/x86-microcode-amd-fix-mixed-steppings-support.patch
new file mode 100644 (file)
index 0000000..3910de3
--- /dev/null
@@ -0,0 +1,106 @@
+From 7ff6edf4fef38ab404ee7861f257e28eaaeed35f Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Thu, 26 Jan 2023 16:26:17 +0100
+Subject: x86/microcode/AMD: Fix mixed steppings support
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit 7ff6edf4fef38ab404ee7861f257e28eaaeed35f upstream.
+
+The AMD side of the loader has always claimed to support mixed
+steppings. But somewhere along the way, it broke that by assuming that
+the cached patch blob is a single one instead of it being one per
+*node*.
+
+So turn it into a per-node one so that each node can stash the blob
+relevant for it.
+
+  [ NB: Fixes tag is not really the exactly correct one but it is good
+    enough. ]
+
+Fixes: fe055896c040 ("x86/microcode: Merge the early microcode loader")
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: <stable@kernel.org> # 2355370cd941 ("x86/microcode/amd: Remove load_microcode_amd()'s bsp parameter")
+Cc: <stable@kernel.org> # a5ad92134bd1 ("x86/microcode/AMD: Add a @cpu parameter to the reloading functions")
+Link: https://lore.kernel.org/r/20230130161709.11615-4-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/microcode/amd.c |   34 +++++++++++++++++++++-------------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/kernel/cpu/microcode/amd.c
++++ b/arch/x86/kernel/cpu/microcode/amd.c
+@@ -55,7 +55,9 @@ struct cont_desc {
+ };
+ static u32 ucode_new_rev;
+-static u8 amd_ucode_patch[PATCH_MAX_SIZE];
++
++/* One blob per node. */
++static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE];
+ /*
+  * Microcode patch container file is prepended to the initrd in cpio
+@@ -428,7 +430,7 @@ apply_microcode_early_amd(u32 cpuid_1_ea
+       patch   = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
+ #else
+       new_rev = &ucode_new_rev;
+-      patch   = &amd_ucode_patch;
++      patch   = &amd_ucode_patch[0];
+ #endif
+       desc.cpuid_1_eax = cpuid_1_eax;
+@@ -580,10 +582,10 @@ int __init save_microcode_in_initrd_amd(
+ void reload_ucode_amd(unsigned int cpu)
+ {
+-      struct microcode_amd *mc;
+       u32 rev, dummy __always_unused;
++      struct microcode_amd *mc;
+-      mc = (struct microcode_amd *)amd_ucode_patch;
++      mc = (struct microcode_amd *)amd_ucode_patch[cpu_to_node(cpu)];
+       rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+@@ -851,6 +853,8 @@ static enum ucode_state __load_microcode
+ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+ {
++      struct cpuinfo_x86 *c;
++      unsigned int nid, cpu;
+       struct ucode_patch *p;
+       enum ucode_state ret;
+@@ -863,18 +867,22 @@ static enum ucode_state load_microcode_a
+               return ret;
+       }
+-      p = find_patch(0);
+-      if (!p) {
+-              return ret;
+-      } else {
+-              if (boot_cpu_data.microcode >= p->patch_id)
+-                      return ret;
++      for_each_node(nid) {
++              cpu = cpumask_first(cpumask_of_node(nid));
++              c = &cpu_data(cpu);
++
++              p = find_patch(cpu);
++              if (!p)
++                      continue;
++
++              if (c->microcode >= p->patch_id)
++                      continue;
+               ret = UCODE_NEW;
+-      }
+-      memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+-      memcpy(amd_ucode_patch, p->data, min_t(u32, p->size, PATCH_MAX_SIZE));
++              memset(&amd_ucode_patch[nid], 0, PATCH_MAX_SIZE);
++              memcpy(&amd_ucode_patch[nid], p->data, min_t(u32, p->size, PATCH_MAX_SIZE));
++      }
+       return ret;
+ }
diff --git a/queue-6.2/x86-microcode-amd-remove-load_microcode_amd-s-bsp-parameter.patch b/queue-6.2/x86-microcode-amd-remove-load_microcode_amd-s-bsp-parameter.patch
new file mode 100644 (file)
index 0000000..9b1439a
--- /dev/null
@@ -0,0 +1,86 @@
+From 2355370cd941cbb20882cc3f34460f9f2b8f9a18 Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Tue, 17 Jan 2023 23:59:24 +0100
+Subject: x86/microcode/amd: Remove load_microcode_amd()'s bsp parameter
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit 2355370cd941cbb20882cc3f34460f9f2b8f9a18 upstream.
+
+It is always the BSP.
+
+No functional changes.
+
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20230130161709.11615-2-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/microcode/amd.c |   19 ++++---------------
+ 1 file changed, 4 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kernel/cpu/microcode/amd.c
++++ b/arch/x86/kernel/cpu/microcode/amd.c
+@@ -553,8 +553,7 @@ void load_ucode_amd_ap(unsigned int cpui
+       apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false);
+ }
+-static enum ucode_state
+-load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
++static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
+ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
+ {
+@@ -572,7 +571,7 @@ int __init save_microcode_in_initrd_amd(
+       if (!desc.mc)
+               return -EINVAL;
+-      ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
++      ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
+       if (ret > UCODE_UPDATED)
+               return -EINVAL;
+@@ -850,8 +849,7 @@ static enum ucode_state __load_microcode
+       return UCODE_OK;
+ }
+-static enum ucode_state
+-load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
++static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+ {
+       struct ucode_patch *p;
+       enum ucode_state ret;
+@@ -875,10 +873,6 @@ load_microcode_amd(bool save, u8 family,
+               ret = UCODE_NEW;
+       }
+-      /* save BSP's matching patch for early load */
+-      if (!save)
+-              return ret;
+-
+       memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+       memcpy(amd_ucode_patch, p->data, min_t(u32, p->size, PATCH_MAX_SIZE));
+@@ -905,14 +899,9 @@ static enum ucode_state request_microcod
+ {
+       char fw_name[36] = "amd-ucode/microcode_amd.bin";
+       struct cpuinfo_x86 *c = &cpu_data(cpu);
+-      bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
+       enum ucode_state ret = UCODE_NFOUND;
+       const struct firmware *fw;
+-      /* reload ucode container only on the boot cpu */
+-      if (!bsp)
+-              return UCODE_OK;
+-
+       if (c->x86 >= 0x15)
+               snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
+@@ -925,7 +914,7 @@ static enum ucode_state request_microcod
+       if (!verify_container(fw->data, fw->size, false))
+               goto fw_release;
+-      ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
++      ret = load_microcode_amd(c->x86, fw->data, fw->size);
+  fw_release:
+       release_firmware(fw);
diff --git a/queue-6.2/x86-reboot-disable-svm-not-just-vmx-when-stopping-cpus.patch b/queue-6.2/x86-reboot-disable-svm-not-just-vmx-when-stopping-cpus.patch
new file mode 100644 (file)
index 0000000..f99fe03
--- /dev/null
@@ -0,0 +1,54 @@
+From a2b07fa7b93321c059af0c6d492cc9a4f1e390aa Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 30 Nov 2022 23:36:50 +0000
+Subject: x86/reboot: Disable SVM, not just VMX, when stopping CPUs
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit a2b07fa7b93321c059af0c6d492cc9a4f1e390aa upstream.
+
+Disable SVM and more importantly force GIF=1 when halting a CPU or
+rebooting the machine.  Similar to VMX, SVM allows software to block
+INITs via CLGI, and thus can be problematic for a crash/reboot.  The
+window for failure is smaller with SVM as INIT is only blocked while
+GIF=0, i.e. between CLGI and STGI, but the window does exist.
+
+Fixes: fba4f472b33a ("x86/reboot: Turn off KVM when halting a CPU")
+Cc: stable@vger.kernel.org
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20221130233650.1404148-5-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/smp.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/smp.c
++++ b/arch/x86/kernel/smp.c
+@@ -32,7 +32,7 @@
+ #include <asm/mce.h>
+ #include <asm/trace/irq_vectors.h>
+ #include <asm/kexec.h>
+-#include <asm/virtext.h>
++#include <asm/reboot.h>
+ /*
+  *    Some notes on x86 processor bugs affecting SMP operation:
+@@ -122,7 +122,7 @@ static int smp_stop_nmi_callback(unsigne
+       if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
+               return NMI_HANDLED;
+-      cpu_emergency_vmxoff();
++      cpu_emergency_disable_virtualization();
+       stop_this_cpu(NULL);
+       return NMI_HANDLED;
+@@ -134,7 +134,7 @@ static int smp_stop_nmi_callback(unsigne
+ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
+ {
+       ack_APIC_irq();
+-      cpu_emergency_vmxoff();
++      cpu_emergency_disable_virtualization();
+       stop_this_cpu(NULL);
+ }
diff --git a/queue-6.2/x86-reboot-disable-virtualization-in-an-emergency-if-svm-is-supported.patch b/queue-6.2/x86-reboot-disable-virtualization-in-an-emergency-if-svm-is-supported.patch
new file mode 100644 (file)
index 0000000..e634627
--- /dev/null
@@ -0,0 +1,72 @@
+From d81f952aa657b76cea381384bef1fea35c5fd266 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 30 Nov 2022 23:36:49 +0000
+Subject: x86/reboot: Disable virtualization in an emergency if SVM is supported
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d81f952aa657b76cea381384bef1fea35c5fd266 upstream.
+
+Disable SVM on all CPUs via NMI shootdown during an emergency reboot.
+Like VMX, SVM can block INIT, e.g. if the emergency reboot is triggered
+between CLGI and STGI, and thus can prevent bringing up other CPUs via
+INIT-SIPI-SIPI.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20221130233650.1404148-4-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/reboot.c |   23 +++++++++++------------
+ 1 file changed, 11 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -530,27 +530,26 @@ static inline void kb_wait(void)
+ static inline void nmi_shootdown_cpus_on_restart(void);
+-/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
+-static void emergency_vmx_disable_all(void)
++static void emergency_reboot_disable_virtualization(void)
+ {
+       /* Just make sure we won't change CPUs while doing this */
+       local_irq_disable();
+       /*
+-       * Disable VMX on all CPUs before rebooting, otherwise we risk hanging
+-       * the machine, because the CPU blocks INIT when it's in VMX root.
++       * Disable virtualization on all CPUs before rebooting to avoid hanging
++       * the system, as VMX and SVM block INIT when running in the host.
+        *
+        * We can't take any locks and we may be on an inconsistent state, so
+-       * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
++       * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
+        *
+-       * Do the NMI shootdown even if VMX if off on _this_ CPU, as that
+-       * doesn't prevent a different CPU from being in VMX root operation.
++       * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
++       * other CPUs may have virtualization enabled.
+        */
+-      if (cpu_has_vmx()) {
+-              /* Safely force _this_ CPU out of VMX root operation. */
+-              __cpu_emergency_vmxoff();
++      if (cpu_has_vmx() || cpu_has_svm(NULL)) {
++              /* Safely force _this_ CPU out of VMX/SVM operation. */
++              cpu_emergency_disable_virtualization();
+-              /* Halt and exit VMX root operation on the other CPUs. */
++              /* Disable VMX/SVM and halt on other CPUs. */
+               nmi_shootdown_cpus_on_restart();
+       }
+ }
+@@ -587,7 +586,7 @@ static void native_machine_emergency_res
+       unsigned short mode;
+       if (reboot_emergency)
+-              emergency_vmx_disable_all();
++              emergency_reboot_disable_virtualization();
+       tboot_shutdown(TB_SHUTDOWN_REBOOT);
diff --git a/queue-6.2/x86-speculation-allow-enabling-stibp-with-legacy-ibrs.patch b/queue-6.2/x86-speculation-allow-enabling-stibp-with-legacy-ibrs.patch
new file mode 100644 (file)
index 0000000..ab49c21
--- /dev/null
@@ -0,0 +1,96 @@
+From 6921ed9049bc7457f66c1596c5b78aec0dae4a9d Mon Sep 17 00:00:00 2001
+From: KP Singh <kpsingh@kernel.org>
+Date: Mon, 27 Feb 2023 07:05:40 +0100
+Subject: x86/speculation: Allow enabling STIBP with legacy IBRS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: KP Singh <kpsingh@kernel.org>
+
+commit 6921ed9049bc7457f66c1596c5b78aec0dae4a9d upstream.
+
+When plain IBRS is enabled (not enhanced IBRS), the logic in
+spectre_v2_user_select_mitigation() determines that STIBP is not needed.
+
+The IBRS bit implicitly protects against cross-thread branch target
+injection. However, with legacy IBRS, the IBRS bit is cleared on
+returning to userspace for performance reasons which leaves userspace
+threads vulnerable to cross-thread branch target injection against which
+STIBP protects.
+
+Exclude IBRS from the spectre_v2_in_ibrs_mode() check to allow for
+enabling STIBP (through seccomp/prctl() by default or always-on, if
+selected by spectre_v2_user kernel cmdline parameter).
+
+  [ bp: Massage. ]
+
+Fixes: 7c693f54c873 ("x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS")
+Reported-by: José Oliveira <joseloliveira11@gmail.com>
+Reported-by: Rodrigo Branco <rodrigo@kernelhacking.com>
+Signed-off-by: KP Singh <kpsingh@kernel.org>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230220120127.1975241-1-kpsingh@kernel.org
+Link: https://lore.kernel.org/r/20230221184908.2349578-1-kpsingh@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   25 ++++++++++++++++++-------
+ 1 file changed, 18 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1132,14 +1132,18 @@ spectre_v2_parse_user_cmdline(void)
+       return SPECTRE_V2_USER_CMD_AUTO;
+ }
+-static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
++static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+ {
+-      return mode == SPECTRE_V2_IBRS ||
+-             mode == SPECTRE_V2_EIBRS ||
++      return mode == SPECTRE_V2_EIBRS ||
+              mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+              mode == SPECTRE_V2_EIBRS_LFENCE;
+ }
++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
++{
++      return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS;
++}
++
+ static void __init
+ spectre_v2_user_select_mitigation(void)
+ {
+@@ -1202,12 +1206,19 @@ spectre_v2_user_select_mitigation(void)
+       }
+       /*
+-       * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
+-       * STIBP is not required.
++       * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
++       * is not required.
++       *
++       * Enhanced IBRS also protects against cross-thread branch target
++       * injection in user-mode as the IBRS bit remains always set which
++       * implicitly enables cross-thread protections.  However, in legacy IBRS
++       * mode, the IBRS bit is set only on kernel entry and cleared on return
++       * to userspace. This disables the implicit cross-thread protection,
++       * so allow for STIBP to be selected in that case.
+        */
+       if (!boot_cpu_has(X86_FEATURE_STIBP) ||
+           !smt_possible ||
+-          spectre_v2_in_ibrs_mode(spectre_v2_enabled))
++          spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+               return;
+       /*
+@@ -2335,7 +2346,7 @@ static ssize_t mmio_stale_data_show_stat
+ static char *stibp_state(void)
+ {
+-      if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
++      if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+               return "";
+       switch (spectre_v2_user_stibp) {
diff --git a/queue-6.2/x86-virt-force-gif-1-prior-to-disabling-svm-for-reboot-flows.patch b/queue-6.2/x86-virt-force-gif-1-prior-to-disabling-svm-for-reboot-flows.patch
new file mode 100644 (file)
index 0000000..5823e44
--- /dev/null
@@ -0,0 +1,54 @@
+From 6a3236580b0b1accc3976345e723104f74f6f8e6 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 30 Nov 2022 23:36:48 +0000
+Subject: x86/virt: Force GIF=1 prior to disabling SVM (for reboot flows)
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 6a3236580b0b1accc3976345e723104f74f6f8e6 upstream.
+
+Set GIF=1 prior to disabling SVM to ensure that INIT is recognized if the
+kernel is disabling SVM in an emergency, e.g. if the kernel is about to
+jump into a crash kernel or may reboot without doing a full CPU RESET.
+If GIF is left cleared, the new kernel (or firmware) will be unabled to
+awaken APs.  Eat faults on STGI (due to EFER.SVME=0) as it's possible
+that SVM could be disabled via NMI shootdown between reading EFER.SVME
+and executing STGI.
+
+Link: https://lore.kernel.org/all/cbcb6f35-e5d7-c1c9-4db9-fe5cc4de579a@amd.com
+Cc: stable@vger.kernel.org
+Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20221130233650.1404148-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/virtext.h |   16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/virtext.h
++++ b/arch/x86/include/asm/virtext.h
+@@ -126,7 +126,21 @@ static inline void cpu_svm_disable(void)
+       wrmsrl(MSR_VM_HSAVE_PA, 0);
+       rdmsrl(MSR_EFER, efer);
+-      wrmsrl(MSR_EFER, efer & ~EFER_SVME);
++      if (efer & EFER_SVME) {
++              /*
++               * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
++               * aren't blocked, e.g. if a fatal error occurred between CLGI
++               * and STGI.  Note, STGI may #UD if SVM is disabled from NMI
++               * context between reading EFER and executing STGI.  In that
++               * case, GIF must already be set, otherwise the NMI would have
++               * been blocked, so just eat the fault.
++               */
++              asm_volatile_goto("1: stgi\n\t"
++                                _ASM_EXTABLE(1b, %l[fault])
++                                ::: "memory" : fault);
++fault:
++              wrmsrl(MSR_EFER, efer & ~EFER_SVME);
++      }
+ }
+ /** Makes sure SVM is disabled, if it is supported on the CPU