not needed anymore.
+++ /dev/null
-From 6df6ee6aa80c0ffb1f45001da2e5e20f45440c03 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 23 Aug 2022 14:34:14 +0800
-Subject: KVM: fix memoryleak in kvm_init()
-
-From: Miaohe Lin <linmiaohe@huawei.com>
-
-[ Upstream commit 5a2a961be2ad6a16eb388a80442443b353c11d16 ]
-
-When alloc_cpumask_var_node() fails for a certain cpu, there might be some
-allocated cpumasks for percpu cpu_kick_mask. We should free these cpumasks
-or memoryleak will occur.
-
-Fixes: baff59ccdc65 ("KVM: Pre-allocate cpumasks for kvm_make_all_cpus_request_except()")
-Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
-Link: https://lore.kernel.org/r/20220823063414.59778-1-linmiaohe@huawei.com
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- virt/kvm/kvm_main.c | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
-index 584a5bab3af3..dcf47da44844 100644
---- a/virt/kvm/kvm_main.c
-+++ b/virt/kvm/kvm_main.c
-@@ -5881,7 +5881,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
-
- r = kvm_async_pf_init();
- if (r)
-- goto out_free_5;
-+ goto out_free_4;
-
- kvm_chardev_ops.owner = module;
-
-@@ -5905,10 +5905,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
-
- out_unreg:
- kvm_async_pf_deinit();
--out_free_5:
-+out_free_4:
- for_each_possible_cpu(cpu)
- free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
--out_free_4:
- kmem_cache_destroy(kvm_vcpu_cache);
- out_free_3:
- unregister_reboot_notifier(&kvm_reboot_notifier);
---
-2.35.1
-
+++ /dev/null
-From a5026653be6ccf7dff187ec87d628ef4ba5d89c5 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:16:06 +0000
-Subject: KVM: nVMX: Add a helper to identify low-priority #DB traps
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 2b384165f4d15540f94998b751f50058642ad110 ]
-
-Add a helper to identify "low"-priority #DB traps, i.e. trap-like #DBs
-that aren't TSS T flag #DBs, and tweak the related code to operate on any
-queued exception. A future commit will separate exceptions that are
-intercepted by L1, i.e. cause nested VM-Exit, from those that do NOT
-trigger nested VM-Exit. I.e. there will be multiple exception structs
-and multiple invocations of the helpers.
-
-No functional change intended.
-
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-20-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/vmx/nested.c | 23 +++++++++++++++++------
- 1 file changed, 17 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
-index 7655b5acbbcd..dfd5e13e5202 100644
---- a/arch/x86/kvm/vmx/nested.c
-+++ b/arch/x86/kvm/vmx/nested.c
-@@ -3871,14 +3871,24 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
- * from the emulator (because such #DBs are fault-like and thus don't trigger
- * actions that fire on instruction retire).
- */
--static inline unsigned long vmx_get_pending_dbg_trap(struct kvm_vcpu *vcpu)
-+static unsigned long vmx_get_pending_dbg_trap(struct kvm_queued_exception *ex)
- {
-- if (!vcpu->arch.exception.pending ||
-- vcpu->arch.exception.vector != DB_VECTOR)
-+ if (!ex->pending || ex->vector != DB_VECTOR)
- return 0;
-
- /* General Detect #DBs are always fault-like. */
-- return vcpu->arch.exception.payload & ~DR6_BD;
-+ return ex->payload & ~DR6_BD;
-+}
-+
-+/*
-+ * Returns true if there's a pending #DB exception that is lower priority than
-+ * a pending Monitor Trap Flag VM-Exit. TSS T-flag #DBs are not emulated by
-+ * KVM, but could theoretically be injected by userspace. Note, this code is
-+ * imperfect, see above.
-+ */
-+static bool vmx_is_low_priority_db_trap(struct kvm_queued_exception *ex)
-+{
-+ return vmx_get_pending_dbg_trap(ex) & ~DR6_BT;
- }
-
- /*
-@@ -3890,8 +3900,9 @@ static inline unsigned long vmx_get_pending_dbg_trap(struct kvm_vcpu *vcpu)
- */
- static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
- {
-- unsigned long pending_dbg = vmx_get_pending_dbg_trap(vcpu);
-+ unsigned long pending_dbg;
-
-+ pending_dbg = vmx_get_pending_dbg_trap(&vcpu->arch.exception);
- if (pending_dbg)
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, pending_dbg);
- }
-@@ -3961,7 +3972,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- * prioritize SMI over MTF and trap-like #DBs.
- */
- if (vcpu->arch.exception.pending &&
-- !(vmx_get_pending_dbg_trap(vcpu) & ~DR6_BT)) {
-+ !vmx_is_low_priority_db_trap(&vcpu->arch.exception)) {
- if (block_nested_exceptions)
- return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
---
-2.35.1
-
+++ /dev/null
-From 48c98fb5bd6fccc2492ce5f3b5e0e24ddd705383 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:15:57 +0000
-Subject: KVM: nVMX: Ignore SIPI that arrives in L2 when vCPU is not in WFS
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit c2086eca86585bfd8132dd91e802497a202185c8 ]
-
-Fall through to handling other pending exception/events for L2 if SIPI
-is pending while the CPU is not in Wait-for-SIPI. KVM correctly ignores
-the event, but incorrectly returns immediately, e.g. a SIPI coincident
-with another event could lead to KVM incorrectly routing the event to L1
-instead of L2.
-
-Fixes: bf0cd88ce363 ("KVM: x86: emulate wait-for-SIPI and SIPI-VMExit")
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-11-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/vmx/nested.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
-index 93c34841e51e..c06c25fb9cbe 100644
---- a/arch/x86/kvm/vmx/nested.c
-+++ b/arch/x86/kvm/vmx/nested.c
-@@ -3937,10 +3937,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- return -EBUSY;
-
- clear_bit(KVM_APIC_SIPI, &apic->pending_events);
-- if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
-+ if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
- apic->sipi_vector & 0xFFUL);
-- return 0;
-+ return 0;
-+ }
-+ /* Fallthrough, the SIPI is completely ignored. */
- }
-
- /*
---
-2.35.1
-
+++ /dev/null
-From ce3c7cd86f3efdea63bb63bad636972751fe18fa Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:15:54 +0000
-Subject: KVM: nVMX: Prioritize TSS T-flag #DBs over Monitor Trap Flag
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit b9d44f9091ac6c325fc2f7b7671b462fb36abbed ]
-
-Service TSS T-flag #DBs prior to pending MTFs, as such #DBs are higher
-priority than MTF. KVM itself doesn't emulate TSS #DBs, and any such
-exceptions injected from L1 will be handled by hardware (or morphed to
-a fault-like exception if injection fails), but theoretically userspace
-could pend a TSS T-flag #DB in conjunction with a pending MTF.
-
-Note, there's no known use case this fixes, it's purely to be technically
-correct with respect to Intel's SDM.
-
-Cc: Oliver Upton <oupton@google.com>
-Cc: Peter Shier <pshier@google.com>
-Fixes: 5ef8acbdd687 ("KVM: nVMX: Emulate MTF when performing instruction emulation")
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-8-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/vmx/nested.c | 8 +++++---
- 1 file changed, 5 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
-index 4b96b5a25ba5..93c34841e51e 100644
---- a/arch/x86/kvm/vmx/nested.c
-+++ b/arch/x86/kvm/vmx/nested.c
-@@ -3944,15 +3944,17 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- }
-
- /*
-- * Process any exceptions that are not debug traps before MTF.
-+ * Process exceptions that are higher priority than Monitor Trap Flag:
-+ * fault-like exceptions, TSS T flag #DB (not emulated by KVM, but
-+ * could theoretically come in from userspace), and ICEBP (INT1).
- *
- * Note that only a pending nested run can block a pending exception.
- * Otherwise an injected NMI/interrupt should either be
- * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO,
- * while delivering the pending exception.
- */
--
-- if (vcpu->arch.exception.pending && !vmx_get_pending_dbg_trap(vcpu)) {
-+ if (vcpu->arch.exception.pending &&
-+ !(vmx_get_pending_dbg_trap(vcpu) & ~DR6_BT)) {
- if (vmx->nested.nested_run_pending)
- return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
---
-2.35.1
-
+++ /dev/null
-From 74aca2738bdc8deee250f0cdc6dd4959c9dfac3f Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:15:53 +0000
-Subject: KVM: nVMX: Treat General Detect #DB (DR7.GD=1) as fault-like
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 8d178f460772ecdee8e6d72389b43a8d35a14ff5 ]
-
-Exclude General Detect #DBs, which have fault-like behavior but also have
-a non-zero payload (DR6.BD=1), from nVMX's handling of pending debug
-traps. Opportunistically rewrite the comment to better document what is
-being checked, i.e. "has a non-zero payload" vs. "has a payload", and to
-call out the many caveats surrounding #DBs that KVM dodges one way or
-another.
-
-Cc: Oliver Upton <oupton@google.com>
-Cc: Peter Shier <pshier@google.com>
-Fixes: 684c0422da71 ("KVM: nVMX: Handle pending #DB when injecting INIT VM-exit")
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-7-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/vmx/nested.c | 36 +++++++++++++++++++++++++-----------
- 1 file changed, 25 insertions(+), 11 deletions(-)
-
-diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
-index 03d348fa6485..4b96b5a25ba5 100644
---- a/arch/x86/kvm/vmx/nested.c
-+++ b/arch/x86/kvm/vmx/nested.c
-@@ -3858,16 +3858,29 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
- }
-
- /*
-- * Returns true if a debug trap is pending delivery.
-+ * Returns true if a debug trap is (likely) pending delivery. Infer the class
-+ * of a #DB (trap-like vs. fault-like) from the exception payload (to-be-DR6).
-+ * Using the payload is flawed because code breakpoints (fault-like) and data
-+ * breakpoints (trap-like) set the same bits in DR6 (breakpoint detected), i.e.
-+ * this will return false positives if a to-be-injected code breakpoint #DB is
-+ * pending (from KVM's perspective, but not "pending" across an instruction
-+ * boundary). ICEBP, a.k.a. INT1, is also not reflected here even though it
-+ * too is trap-like.
- *
-- * In KVM, debug traps bear an exception payload. As such, the class of a #DB
-- * exception may be inferred from the presence of an exception payload.
-+ * KVM "works" despite these flaws as ICEBP isn't currently supported by the
-+ * emulator, Monitor Trap Flag is not marked pending on intercepted #DBs (the
-+ * #DB has already happened), and MTF isn't marked pending on code breakpoints
-+ * from the emulator (because such #DBs are fault-like and thus don't trigger
-+ * actions that fire on instruction retire).
- */
--static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
-+static inline unsigned long vmx_get_pending_dbg_trap(struct kvm_vcpu *vcpu)
- {
-- return vcpu->arch.exception.pending &&
-- vcpu->arch.exception.nr == DB_VECTOR &&
-- vcpu->arch.exception.payload;
-+ if (!vcpu->arch.exception.pending ||
-+ vcpu->arch.exception.nr != DB_VECTOR)
-+ return 0;
-+
-+ /* General Detect #DBs are always fault-like. */
-+ return vcpu->arch.exception.payload & ~DR6_BD;
- }
-
- /*
-@@ -3879,9 +3892,10 @@ static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
- */
- static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
- {
-- if (vmx_pending_dbg_trap(vcpu))
-- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
-- vcpu->arch.exception.payload);
-+ unsigned long pending_dbg = vmx_get_pending_dbg_trap(vcpu);
-+
-+ if (pending_dbg)
-+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, pending_dbg);
- }
-
- static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
-@@ -3938,7 +3952,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- * while delivering the pending exception.
- */
-
-- if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
-+ if (vcpu->arch.exception.pending && !vmx_get_pending_dbg_trap(vcpu)) {
- if (vmx->nested.nested_run_pending)
- return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
---
-2.35.1
-
+++ /dev/null
-From 9960eda0640025a4b547fa5ca741bbb2ac8dc0c4 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:15:58 +0000
-Subject: KVM: nVMX: Unconditionally clear mtf_pending on nested VM-Exit
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 593a5c2e3c12a2f65967739267093255c47e9fe0 ]
-
-Clear mtf_pending on nested VM-Exit instead of handling the clear on a
-case-by-case basis in vmx_check_nested_events(). The pending MTF should
-never survive nested VM-Exit, as it is a property of KVM's run of the
-current L2, i.e. should never affect the next L2 run by L1. In practice,
-this is likely a nop as getting to L1 with nested_run_pending is
-impossible, and KVM doesn't correctly handle morphing a pending exception
-that occurs on a prior injected exception (need for re-injected exception
-being the other case where MTF isn't cleared). However, KVM will
-hopefully soon correctly deal with a pending exception on top of an
-injected exception.
-
-Add a TODO to document that KVM has an inversion priority bug between
-SMIs and MTF (and trap-like #DBS), and that KVM also doesn't properly
-save/restore MTF across SMI/RSM.
-
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-12-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/vmx/nested.c | 21 ++++++++++++---------
- 1 file changed, 12 insertions(+), 9 deletions(-)
-
-diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
-index c06c25fb9cbe..0aa40ea496a8 100644
---- a/arch/x86/kvm/vmx/nested.c
-+++ b/arch/x86/kvm/vmx/nested.c
-@@ -3910,16 +3910,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- unsigned long exit_qual;
- bool block_nested_events =
- vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
-- bool mtf_pending = vmx->nested.mtf_pending;
- struct kvm_lapic *apic = vcpu->arch.apic;
-
-- /*
-- * Clear the MTF state. If a higher priority VM-exit is delivered first,
-- * this state is discarded.
-- */
-- if (!block_nested_events)
-- vmx->nested.mtf_pending = false;
--
- if (lapic_in_kernel(vcpu) &&
- test_bit(KVM_APIC_INIT, &apic->pending_events)) {
- if (block_nested_events)
-@@ -3928,6 +3920,9 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- clear_bit(KVM_APIC_INIT, &apic->pending_events);
- if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
- nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
-+
-+ /* MTF is discarded if the vCPU is in WFS. */
-+ vmx->nested.mtf_pending = false;
- return 0;
- }
-
-@@ -3950,6 +3945,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- * fault-like exceptions, TSS T flag #DB (not emulated by KVM, but
- * could theoretically come in from userspace), and ICEBP (INT1).
- *
-+ * TODO: SMIs have higher priority than MTF and trap-like #DBs (except
-+ * for TSS T flag #DBs). KVM also doesn't save/restore pending MTF
-+ * across SMI/RSM as it should; that needs to be addressed in order to
-+ * prioritize SMI over MTF and trap-like #DBs.
-+ *
- * Note that only a pending nested run can block a pending exception.
- * Otherwise an injected NMI/interrupt should either be
- * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO,
-@@ -3965,7 +3965,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- return 0;
- }
-
-- if (mtf_pending) {
-+ if (vmx->nested.mtf_pending) {
- if (block_nested_events)
- return -EBUSY;
- nested_vmx_update_pending_dbg(vcpu);
-@@ -4562,6 +4562,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
-+ /* Pending MTF traps are discarded on VM-Exit. */
-+ vmx->nested.mtf_pending = false;
-+
- /* trying to cancel vmlaunch/vmresume is a bug */
- WARN_ON_ONCE(vmx->nested.nested_run_pending);
-
---
-2.35.1
-
+++ /dev/null
-From 890cdfadae6607769ae9504dec90df8e4cb95943 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 16 Aug 2022 19:25:17 -0300
-Subject: KVM: PPC: Book3S HV: Fix decrementer migration
-
-From: Fabiano Rosas <farosas@linux.ibm.com>
-
-[ Upstream commit 0a5bfb824a6ea35e54b7e5ac6f881beea5e309d2 ]
-
-We used to have a workaround[1] for a hang during migration that was
-made ineffective when we converted the decrementer expiry to be
-relative to guest timebase.
-
-The point of the workaround was that in the absence of an explicit
-decrementer expiry value provided by userspace during migration, KVM
-needs to initialize dec_expires to a value that will result in an
-expired decrementer after subtracting the current guest timebase. That
-stops the vcpu from hanging after migration due to a decrementer
-that's too large.
-
-If the dec_expires is now relative to guest timebase, its
-initialization needs to be guest timebase-relative as well, otherwise
-we end up with a decrementer expiry that is still larger than the
-guest timebase.
-
-1- https://git.kernel.org/torvalds/c/5855564c8ab2
-
-Fixes: 3c1a4322bba7 ("KVM: PPC: Book3S HV: Change dec_expires to be relative to guest timebase")
-Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
-Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-Link: https://lore.kernel.org/r/20220816222517.1916391-1-farosas@linux.ibm.com
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/powerpc/kvm/book3s_hv.c | 18 ++++++++++++++++--
- arch/powerpc/kvm/powerpc.c | 1 -
- 2 files changed, 16 insertions(+), 3 deletions(-)
-
-diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
-index 57d0835e56fd..917abda9e5ce 100644
---- a/arch/powerpc/kvm/book3s_hv.c
-+++ b/arch/powerpc/kvm/book3s_hv.c
-@@ -2517,10 +2517,24 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
- r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
- break;
- case KVM_REG_PPC_TB_OFFSET:
-+ {
- /* round up to multiple of 2^24 */
-- vcpu->arch.vcore->tb_offset =
-- ALIGN(set_reg_val(id, *val), 1UL << 24);
-+ u64 tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24);
-+
-+ /*
-+ * Now that we know the timebase offset, update the
-+ * decrementer expiry with a guest timebase value. If
-+ * the userspace does not set DEC_EXPIRY, this ensures
-+ * a migrated vcpu at least starts with an expired
-+ * decrementer, which is better than a large one that
-+ * causes a hang.
-+ */
-+ if (!vcpu->arch.dec_expires && tb_offset)
-+ vcpu->arch.dec_expires = get_tb() + tb_offset;
-+
-+ vcpu->arch.vcore->tb_offset = tb_offset;
- break;
-+ }
- case KVM_REG_PPC_LPCR:
- kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
- break;
-diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
-index fb1490761c87..757491dd6b7b 100644
---- a/arch/powerpc/kvm/powerpc.c
-+++ b/arch/powerpc/kvm/powerpc.c
-@@ -786,7 +786,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
-
- hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
-- vcpu->arch.dec_expires = get_tb();
-
- #ifdef CONFIG_KVM_EXIT_TIMING
- mutex_init(&vcpu->arch.exit_timing_lock);
---
-2.35.1
-
+++ /dev/null
-From 3f7b9afa2b8e5b655f45b72f75eadd94a2ece613 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 8 Sep 2022 23:25:41 +1000
-Subject: KVM: PPC: Book3S HV P9: Clear vcpu cpu fields before enabling host
- irqs
-
-From: Nicholas Piggin <npiggin@gmail.com>
-
-[ Upstream commit bc91c04bfff7cdf676011b97bb21b2861d7b21c9 ]
-
-On guest entry, vcpu->cpu and vcpu->arch.thread_cpu are set after
-disabling host irqs. On guest exit there is a window whre tick time
-accounting briefly enables irqs before these fields are cleared.
-
-Move them up to ensure they are cleared before host irqs are run.
-This is possibly not a problem, but is more symmetric and makes the
-fields less surprising.
-
-Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
-Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-Link: https://lore.kernel.org/r/20220908132545.4085849-1-npiggin@gmail.com
-Stable-dep-of: 1a5486b3c351 ("KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/powerpc/kvm/book3s_hv.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
-index d72df696837d..0f8dee657336 100644
---- a/arch/powerpc/kvm/book3s_hv.c
-+++ b/arch/powerpc/kvm/book3s_hv.c
-@@ -4629,6 +4629,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
-
- set_irq_happened(trap);
-
-+ vcpu->cpu = -1;
-+ vcpu->arch.thread_cpu = -1;
-+
- context_tracking_guest_exit();
- if (!vtime_accounting_enabled_this_cpu()) {
- powerpc_local_irq_pmu_restore(flags);
-@@ -4644,9 +4647,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
- }
- vtime_account_guest_exit();
-
-- vcpu->cpu = -1;
-- vcpu->arch.thread_cpu = -1;
--
- powerpc_local_irq_pmu_restore(flags);
-
- preempt_enable();
---
-2.35.1
-
+++ /dev/null
-From 5d52e92c4206869cc55d9336d147dc0ed9e6b857 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 8 Sep 2022 23:25:42 +1000
-Subject: KVM: PPC: Book3S HV P9: Fix irq disabling in tick accounting
-
-From: Nicholas Piggin <npiggin@gmail.com>
-
-[ Upstream commit c953f7500b65f2b157d1eb468ca8b86328834cce ]
-
-kvmhv_run_single_vcpu() disables PMIs as well as Linux irqs,
-however the tick time accounting code enables and disables irqs and
-not PMIs within this region. By chance this might not actually cause
-a bug, but it is clearly an incorrect use of the APIs.
-
-Fixes: 2251fbe76395e ("KVM: PPC: Book3S HV P9: Improve mtmsrd scheduling by delaying MSR[EE] disable")
-Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
-Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-Link: https://lore.kernel.org/r/20220908132545.4085849-2-npiggin@gmail.com
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/powerpc/kvm/book3s_hv.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
-index 917abda9e5ce..d72df696837d 100644
---- a/arch/powerpc/kvm/book3s_hv.c
-+++ b/arch/powerpc/kvm/book3s_hv.c
-@@ -4631,7 +4631,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
-
- context_tracking_guest_exit();
- if (!vtime_accounting_enabled_this_cpu()) {
-- local_irq_enable();
-+ powerpc_local_irq_pmu_restore(flags);
- /*
- * Service IRQs here before vtime_account_guest_exit() so any
- * ticks that occurred while running the guest are accounted to
-@@ -4640,7 +4640,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
- * interrupts here, which has the problem that it accounts
- * interrupt processing overhead to the host.
- */
-- local_irq_disable();
-+ powerpc_local_irq_pmu_save(flags);
- }
- vtime_account_guest_exit();
-
---
-2.35.1
-
+++ /dev/null
-From afe3395a6c00b7cb77f86640479cda6046f95a6c Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 8 Sep 2022 23:25:44 +1000
-Subject: KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl
-
-From: Nicholas Piggin <npiggin@gmail.com>
-
-[ Upstream commit 1a5486b3c3517aa1f608a10003ade4da122cb175 ]
-
-Stolen time logging in dtl was removed from the P9 path, so guests had
-no stolen time accounting. Add it back in a simpler way that still
-avoids locks and per-core accounting code.
-
-Fixes: ecb6a7207f92 ("KVM: PPC: Book3S HV P9: Remove most of the vcore logic")
-Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
-Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-Link: https://lore.kernel.org/r/20220908132545.4085849-4-npiggin@gmail.com
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/powerpc/kvm/book3s_hv.c | 49 +++++++++++++++++++++++++++++++++---
- 1 file changed, 45 insertions(+), 4 deletions(-)
-
-diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
-index 0f8dee657336..2feacb1ee9d9 100644
---- a/arch/powerpc/kvm/book3s_hv.c
-+++ b/arch/powerpc/kvm/book3s_hv.c
-@@ -249,6 +249,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
-
- /*
- * We use the vcpu_load/put functions to measure stolen time.
-+ *
- * Stolen time is counted as time when either the vcpu is able to
- * run as part of a virtual core, but the task running the vcore
- * is preempted or sleeping, or when the vcpu needs something done
-@@ -278,6 +279,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
- * lock. The stolen times are measured in units of timebase ticks.
- * (Note that the != TB_NIL checks below are purely defensive;
- * they should never fail.)
-+ *
-+ * The POWER9 path is simpler, one vcpu per virtual core so the
-+ * former case does not exist. If a vcpu is preempted when it is
-+ * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate
-+ * the stolen cycles in busy_stolen. RUNNING is not a preemptible
-+ * state in the P9 path.
- */
-
- static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
-@@ -311,8 +318,14 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
- unsigned long flags;
- u64 now;
-
-- if (cpu_has_feature(CPU_FTR_ARCH_300))
-+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
-+ if (vcpu->arch.busy_preempt != TB_NIL) {
-+ WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST);
-+ vc->stolen_tb += mftb() - vcpu->arch.busy_preempt;
-+ vcpu->arch.busy_preempt = TB_NIL;
-+ }
- return;
-+ }
-
- now = mftb();
-
-@@ -340,8 +353,21 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
- unsigned long flags;
- u64 now;
-
-- if (cpu_has_feature(CPU_FTR_ARCH_300))
-+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
-+ /*
-+ * In the P9 path, RUNNABLE is not preemptible
-+ * (nor takes host interrupts)
-+ */
-+ WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE);
-+ /*
-+ * Account stolen time when preempted while the vcpu task is
-+ * running in the kernel (but not in qemu, which is INACTIVE).
-+ */
-+ if (task_is_running(current) &&
-+ vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
-+ vcpu->arch.busy_preempt = mftb();
- return;
-+ }
-
- now = mftb();
-
-@@ -740,6 +766,18 @@ static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
- vcpu->arch.dtl.dirty = true;
- }
-
-+static void kvmppc_create_dtl_entry_p9(struct kvm_vcpu *vcpu,
-+ struct kvmppc_vcore *vc,
-+ u64 now)
-+{
-+ unsigned long stolen;
-+
-+ stolen = vc->stolen_tb - vcpu->arch.stolen_logged;
-+ vcpu->arch.stolen_logged = vc->stolen_tb;
-+
-+ __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now, stolen);
-+}
-+
- static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
- struct kvmppc_vcore *vc)
- {
-@@ -4534,7 +4572,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
- vc = vcpu->arch.vcore;
- vcpu->arch.ceded = 0;
- vcpu->arch.run_task = current;
-- vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
- vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
-
- /* See if the MMU is ready to go */
-@@ -4561,6 +4598,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
- /* flags save not required, but irq_pmu has no disable/enable API */
- powerpc_local_irq_pmu_save(flags);
-
-+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
-+
- if (signal_pending(current))
- goto sigpend;
- if (need_resched() || !kvm->arch.mmu_ready)
-@@ -4605,7 +4644,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
-
- tb = mftb();
-
-- __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
-+ kvmppc_create_dtl_entry_p9(vcpu, vc, tb + vc->tb_offset);
-
- trace_kvm_guest_enter(vcpu);
-
-@@ -4631,6 +4670,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
-
- vcpu->cpu = -1;
- vcpu->arch.thread_cpu = -1;
-+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
-
- context_tracking_guest_exit();
- if (!vtime_accounting_enabled_this_cpu()) {
-@@ -4708,6 +4748,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
- out:
- vcpu->cpu = -1;
- vcpu->arch.thread_cpu = -1;
-+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
- powerpc_local_irq_pmu_restore(flags);
- preempt_enable();
- goto done;
---
-2.35.1
-
+++ /dev/null
-From 60973cdaf3fd26815a9f305379f542e6f93b166a Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:15:59 +0000
-Subject: KVM: VMX: Inject #PF on ENCLS as "emulated" #PF
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit bfcb08a0b9e99b959814a329fabace22c3df046d ]
-
-Treat #PFs that occur during emulation of ENCLS as, wait for it, emulated
-page faults. Practically speaking, this is a glorified nop as the
-exception is never of the nested flavor, and it's extremely unlikely the
-guest is relying on the side effect of an implicit INVLPG on the faulting
-address.
-
-Fixes: 70210c044b4e ("KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions")
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-13-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/vmx/sgx.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
-index aba8cebdc587..8f95c7c01433 100644
---- a/arch/x86/kvm/vmx/sgx.c
-+++ b/arch/x86/kvm/vmx/sgx.c
-@@ -129,7 +129,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
- ex.address = gva;
- ex.error_code_valid = true;
- ex.nested_page_fault = false;
-- kvm_inject_page_fault(vcpu, &ex);
-+ kvm_inject_emulated_page_fault(vcpu, &ex);
- } else {
- kvm_inject_gp(vcpu, 0);
- }
---
-2.35.1
-
+++ /dev/null
-From 2af948a8e9becc744b8a2f1d0718d6e255df8b46 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 15:37:08 +0200
-Subject: KVM: x86: Check for existing Hyper-V vCPU in kvm_hv_vcpu_init()
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 1cac8d9f6bd25df3713103e44e2d9ca0c2e03c33 ]
-
-When potentially allocating/initializing the Hyper-V vCPU struct, check
-for an existing instance in kvm_hv_vcpu_init() instead of requiring
-callers to perform the check. Relying on callers to do the check is
-risky as it's all too easy for KVM to overwrite vcpu->arch.hyperv and
-leak memory, and it adds additional burden on callers without much
-benefit.
-
-No functional change intended.
-
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Wei Liu <wei.liu@kernel.org>
-Link: https://lore.kernel.org/r/20220830133737.1539624-5-vkuznets@redhat.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Stable-dep-of: 3be29eb7b525 ("KVM: x86: Report error when setting CPUID if Hyper-V allocation fails")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/hyperv.c | 27 ++++++++++++---------------
- 1 file changed, 12 insertions(+), 15 deletions(-)
-
-diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
-index 611c349a08bf..8aadd31ed058 100644
---- a/arch/x86/kvm/hyperv.c
-+++ b/arch/x86/kvm/hyperv.c
-@@ -936,9 +936,12 @@ static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
-
- static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
- {
-- struct kvm_vcpu_hv *hv_vcpu;
-+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
- int i;
-
-+ if (hv_vcpu)
-+ return 0;
-+
- hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT);
- if (!hv_vcpu)
- return -ENOMEM;
-@@ -962,11 +965,9 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
- struct kvm_vcpu_hv_synic *synic;
- int r;
-
-- if (!to_hv_vcpu(vcpu)) {
-- r = kvm_hv_vcpu_init(vcpu);
-- if (r)
-- return r;
-- }
-+ r = kvm_hv_vcpu_init(vcpu);
-+ if (r)
-+ return r;
-
- synic = to_hv_synic(vcpu);
-
-@@ -1660,10 +1661,8 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
- if (!host && !vcpu->arch.hyperv_enabled)
- return 1;
-
-- if (!to_hv_vcpu(vcpu)) {
-- if (kvm_hv_vcpu_init(vcpu))
-- return 1;
-- }
-+ if (kvm_hv_vcpu_init(vcpu))
-+ return 1;
-
- if (kvm_hv_msr_partition_wide(msr)) {
- int r;
-@@ -1683,10 +1682,8 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
- if (!host && !vcpu->arch.hyperv_enabled)
- return 1;
-
-- if (!to_hv_vcpu(vcpu)) {
-- if (kvm_hv_vcpu_init(vcpu))
-- return 1;
-- }
-+ if (kvm_hv_vcpu_init(vcpu))
-+ return 1;
-
- if (kvm_hv_msr_partition_wide(msr)) {
- int r;
-@@ -2000,7 +1997,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
- return;
- }
-
-- if (!to_hv_vcpu(vcpu) && kvm_hv_vcpu_init(vcpu))
-+ if (kvm_hv_vcpu_init(vcpu))
- return;
-
- hv_vcpu = to_hv_vcpu(vcpu);
---
-2.35.1
-
+++ /dev/null
-From e1f7f2457b1342553570bfcaeadae1496f75eec1 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Fri, 29 Jul 2022 15:43:29 -0700
-Subject: kvm: x86: Do proper cleanup if kvm_x86_ops->vm_init() fails
-
-From: Junaid Shahid <junaids@google.com>
-
-[ Upstream commit b24ede22538b4d984cbe20532bbcb303692e7f52 ]
-
-If vm_init() fails [which can happen, for instance, if a memory
-allocation fails during avic_vm_init()], we need to cleanup some
-state in order to avoid resource leaks.
-
-Signed-off-by: Junaid Shahid <junaids@google.com>
-Link: https://lore.kernel.org/r/20220729224329.323378-1-junaids@google.com
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Stable-dep-of: 5a2a961be2ad ("KVM: fix memoryleak in kvm_init()")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/x86.c | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index e2435090f225..14cb589683a1 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -12103,6 +12103,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
- if (ret)
- goto out_page_track;
-
-+ ret = static_call(kvm_x86_vm_init)(kvm);
-+ if (ret)
-+ goto out_uninit_mmu;
-+
- INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
- INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
- atomic_set(&kvm->arch.noncoherent_dma_count, 0);
-@@ -12138,8 +12142,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
- kvm_hv_init_vm(kvm);
- kvm_xen_init_vm(kvm);
-
-- return static_call(kvm_x86_vm_init)(kvm);
-+ return 0;
-
-+out_uninit_mmu:
-+ kvm_mmu_uninit_vm(kvm);
- out_page_track:
- kvm_page_track_cleanup(kvm);
- out:
---
-2.35.1
-
+++ /dev/null
-From 37892c242b5293bddc508ec7fa3c598104fc29c7 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:16:05 +0000
-Subject: KVM: x86: Evaluate ability to inject SMI/NMI/IRQ after potential
- VM-Exit
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 28360f88706837fc3f1ac8944b45b4a630a71c75 ]
-
-Determine whether or not new events can be injected after checking nested
-events. If a VM-Exit occurred during nested event handling, any previous
-event that needed re-injection is gone from's KVM perspective; the event
-is captured in the vmc*12 VM-Exit information, but doesn't exist in terms
-of what needs to be done for entry to L1.
-
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-19-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/x86.c | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 15229a5ad9ff..01d59f93d93e 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -9683,7 +9683,7 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
-
- static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
- {
-- bool can_inject = !kvm_event_needs_reinjection(vcpu);
-+ bool can_inject;
- int r;
-
- /*
-@@ -9748,7 +9748,13 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
- if (r < 0)
- goto out;
-
-- /* try to inject new event if pending */
-+ /*
-+ * New events, other than exceptions, cannot be injected if KVM needs
-+ * to re-inject a previous event. See above comments on re-injecting
-+ * for why pending exceptions get priority.
-+ */
-+ can_inject = !kvm_event_needs_reinjection(vcpu);
-+
- if (vcpu->arch.exception.pending) {
- /*
- * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS
---
-2.35.1
-
+++ /dev/null
-From 3b49b279b88de56dc9d042feb7b8bf101a21ea30 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:16:02 +0000
-Subject: KVM: x86: Formalize blocking of nested pending exceptions
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 72c14e00bdc445e96045c28d04bba45cbe69cf95 ]
-
-Capture nested_run_pending as block_pending_exceptions so that the logic
-of why exceptions are blocked only needs to be documented once instead of
-at every place that employs the logic.
-
-No functional change intended.
-
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-16-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/svm/nested.c | 26 ++++++++++++++++----------
- arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++----------
- 2 files changed, 33 insertions(+), 20 deletions(-)
-
-diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
-index 8f991592d277..a6111392985c 100644
---- a/arch/x86/kvm/svm/nested.c
-+++ b/arch/x86/kvm/svm/nested.c
-@@ -1356,10 +1356,22 @@ static inline bool nested_exit_on_init(struct vcpu_svm *svm)
-
- static int svm_check_nested_events(struct kvm_vcpu *vcpu)
- {
-- struct vcpu_svm *svm = to_svm(vcpu);
-- bool block_nested_events =
-- kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending;
- struct kvm_lapic *apic = vcpu->arch.apic;
-+ struct vcpu_svm *svm = to_svm(vcpu);
-+ /*
-+ * Only a pending nested run blocks a pending exception. If there is a
-+ * previously injected event, the pending exception occurred while said
-+ * event was being delivered and thus needs to be handled.
-+ */
-+ bool block_nested_exceptions = svm->nested.nested_run_pending;
-+ /*
-+ * New events (not exceptions) are only recognized at instruction
-+ * boundaries. If an event needs reinjection, then KVM is handling a
-+ * VM-Exit that occurred _during_ instruction execution; new events are
-+ * blocked until the instruction completes.
-+ */
-+ bool block_nested_events = block_nested_exceptions ||
-+ kvm_event_needs_reinjection(vcpu);
-
- if (lapic_in_kernel(vcpu) &&
- test_bit(KVM_APIC_INIT, &apic->pending_events)) {
-@@ -1372,13 +1384,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
- }
-
- if (vcpu->arch.exception.pending) {
-- /*
-- * Only a pending nested run can block a pending exception.
-- * Otherwise an injected NMI/interrupt should either be
-- * lost or delivered to the nested hypervisor in the EXITINTINFO
-- * vmcb field, while delivering the pending exception.
-- */
-- if (svm->nested.nested_run_pending)
-+ if (block_nested_exceptions)
- return -EBUSY;
- if (!nested_exit_on_exception(svm))
- return 0;
-diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
-index 83239d47fc0f..7655b5acbbcd 100644
---- a/arch/x86/kvm/vmx/nested.c
-+++ b/arch/x86/kvm/vmx/nested.c
-@@ -3904,11 +3904,23 @@ static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
-
- static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- {
-+ struct kvm_lapic *apic = vcpu->arch.apic;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long exit_qual;
-- bool block_nested_events =
-- vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
-- struct kvm_lapic *apic = vcpu->arch.apic;
-+ /*
-+ * Only a pending nested run blocks a pending exception. If there is a
-+ * previously injected event, the pending exception occurred while said
-+ * event was being delivered and thus needs to be handled.
-+ */
-+ bool block_nested_exceptions = vmx->nested.nested_run_pending;
-+ /*
-+ * New events (not exceptions) are only recognized at instruction
-+ * boundaries. If an event needs reinjection, then KVM is handling a
-+ * VM-Exit that occurred _during_ instruction execution; new events are
-+ * blocked until the instruction completes.
-+ */
-+ bool block_nested_events = block_nested_exceptions ||
-+ kvm_event_needs_reinjection(vcpu);
-
- if (lapic_in_kernel(vcpu) &&
- test_bit(KVM_APIC_INIT, &apic->pending_events)) {
-@@ -3947,15 +3959,10 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- * for TSS T flag #DBs). KVM also doesn't save/restore pending MTF
- * across SMI/RSM as it should; that needs to be addressed in order to
- * prioritize SMI over MTF and trap-like #DBs.
-- *
-- * Note that only a pending nested run can block a pending exception.
-- * Otherwise an injected NMI/interrupt should either be
-- * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO,
-- * while delivering the pending exception.
- */
- if (vcpu->arch.exception.pending &&
- !(vmx_get_pending_dbg_trap(vcpu) & ~DR6_BT)) {
-- if (vmx->nested.nested_run_pending)
-+ if (block_nested_exceptions)
- return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
- goto no_vmexit;
-@@ -3972,7 +3979,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- }
-
- if (vcpu->arch.exception.pending) {
-- if (vmx->nested.nested_run_pending)
-+ if (block_nested_exceptions)
- return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
- goto no_vmexit;
---
-2.35.1
-
+++ /dev/null
-From 7a5eb73b397d1336923a66280c1a818b1479792e Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:16:04 +0000
-Subject: KVM: x86: Hoist nested event checks above event injection logic
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 6c593b5276e6ce411dcdf03e2f7d4b93c2e7138e ]
-
-Perform nested event checks before re-injecting exceptions/events into
-L2. If a pending exception causes VM-Exit to L1, re-injecting events
-into vmcs02 is premature and wasted effort. Take care to ensure events
-that need to be re-injected are still re-injected if checking for nested
-events "fails", i.e. if KVM needs to force an immediate entry+exit to
-complete the to-be-re-injecteed event.
-
-Keep the "can_inject" logic the same for now; it too can be pushed below
-the nested checks, but is a slightly riskier change (see past bugs about
-events not being properly purged on nested VM-Exit).
-
-Add and/or modify comments to better document the various interactions.
-Of note is the comment regarding "blocking" previously injected NMIs and
-IRQs if an exception is pending. The old comment isn't wrong strictly
-speaking, but it failed to capture the reason why the logic even exists.
-
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-18-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/x86.c | 89 +++++++++++++++++++++++++++-------------------
- 1 file changed, 53 insertions(+), 36 deletions(-)
-
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 14182b5b2c93..15229a5ad9ff 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -9683,53 +9683,70 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
-
- static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
- {
-+ bool can_inject = !kvm_event_needs_reinjection(vcpu);
- int r;
-- bool can_inject = true;
-
-- /* try to reinject previous events if any */
-+ /*
-+ * Process nested events first, as nested VM-Exit supercedes event
-+ * re-injection. If there's an event queued for re-injection, it will
-+ * be saved into the appropriate vmc{b,s}12 fields on nested VM-Exit.
-+ */
-+ if (is_guest_mode(vcpu))
-+ r = kvm_check_nested_events(vcpu);
-+ else
-+ r = 0;
-
-- if (vcpu->arch.exception.injected) {
-- kvm_inject_exception(vcpu);
-- can_inject = false;
-- }
- /*
-- * Do not inject an NMI or interrupt if there is a pending
-- * exception. Exceptions and interrupts are recognized at
-- * instruction boundaries, i.e. the start of an instruction.
-- * Trap-like exceptions, e.g. #DB, have higher priority than
-- * NMIs and interrupts, i.e. traps are recognized before an
-- * NMI/interrupt that's pending on the same instruction.
-- * Fault-like exceptions, e.g. #GP and #PF, are the lowest
-- * priority, but are only generated (pended) during instruction
-- * execution, i.e. a pending fault-like exception means the
-- * fault occurred on the *previous* instruction and must be
-- * serviced prior to recognizing any new events in order to
-- * fully complete the previous instruction.
-+ * Re-inject exceptions and events *especially* if immediate entry+exit
-+ * to/from L2 is needed, as any event that has already been injected
-+ * into L2 needs to complete its lifecycle before injecting a new event.
-+ *
-+ * Don't re-inject an NMI or interrupt if there is a pending exception.
-+ * This collision arises if an exception occurred while vectoring the
-+ * injected event, KVM intercepted said exception, and KVM ultimately
-+ * determined the fault belongs to the guest and queues the exception
-+ * for injection back into the guest.
-+ *
-+ * "Injected" interrupts can also collide with pending exceptions if
-+ * userspace ignores the "ready for injection" flag and blindly queues
-+ * an interrupt. In that case, prioritizing the exception is correct,
-+ * as the exception "occurred" before the exit to userspace. Trap-like
-+ * exceptions, e.g. most #DBs, have higher priority than interrupts.
-+ * And while fault-like exceptions, e.g. #GP and #PF, are the lowest
-+ * priority, they're only generated (pended) during instruction
-+ * execution, and interrupts are recognized at instruction boundaries.
-+ * Thus a pending fault-like exception means the fault occurred on the
-+ * *previous* instruction and must be serviced prior to recognizing any
-+ * new events in order to fully complete the previous instruction.
- */
-- else if (!vcpu->arch.exception.pending) {
-- if (vcpu->arch.nmi_injected) {
-- static_call(kvm_x86_inject_nmi)(vcpu);
-- can_inject = false;
-- } else if (vcpu->arch.interrupt.injected) {
-- static_call(kvm_x86_inject_irq)(vcpu, true);
-- can_inject = false;
-- }
-- }
-+ if (vcpu->arch.exception.injected)
-+ kvm_inject_exception(vcpu);
-+ else if (vcpu->arch.exception.pending)
-+ ; /* see above */
-+ else if (vcpu->arch.nmi_injected)
-+ static_call(kvm_x86_inject_nmi)(vcpu);
-+ else if (vcpu->arch.interrupt.injected)
-+ static_call(kvm_x86_inject_irq)(vcpu, true);
-
-+ /*
-+ * Exceptions that morph to VM-Exits are handled above, and pending
-+ * exceptions on top of injected exceptions that do not VM-Exit should
-+ * either morph to #DF or, sadly, override the injected exception.
-+ */
- WARN_ON_ONCE(vcpu->arch.exception.injected &&
- vcpu->arch.exception.pending);
-
- /*
-- * Call check_nested_events() even if we reinjected a previous event
-- * in order for caller to determine if it should require immediate-exit
-- * from L2 to L1 due to pending L1 events which require exit
-- * from L2 to L1.
-+ * Bail if immediate entry+exit to/from the guest is needed to complete
-+ * nested VM-Enter or event re-injection so that a different pending
-+ * event can be serviced (or if KVM needs to exit to userspace).
-+ *
-+ * Otherwise, continue processing events even if VM-Exit occurred. The
-+ * VM-Exit will have cleared exceptions that were meant for L2, but
-+ * there may now be events that can be injected into L1.
- */
-- if (is_guest_mode(vcpu)) {
-- r = kvm_check_nested_events(vcpu);
-- if (r < 0)
-- goto out;
-- }
-+ if (r < 0)
-+ goto out;
-
- /* try to inject new event if pending */
- if (vcpu->arch.exception.pending) {
---
-2.35.1
-
+++ /dev/null
-From 35646ab067697782bc4fe48ae07c7b0515e6446d Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:16:01 +0000
-Subject: KVM: x86: Make kvm_queued_exception a properly named, visible struct
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit d4963e319f1f7851a098df6610a27f9f4cf6d42a ]
-
-Move the definition of "struct kvm_queued_exception" out of kvm_vcpu_arch
-in anticipation of adding a second instance in kvm_vcpu_arch to handle
-exceptions that occur when vectoring an injected exception and are
-morphed to VM-Exit instead of leading to #DF.
-
-Opportunistically take advantage of the churn to rename "nr" to "vector".
-
-No functional change intended.
-
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-15-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/include/asm/kvm_host.h | 23 +++++-----
- arch/x86/kvm/svm/nested.c | 47 ++++++++++---------
- arch/x86/kvm/svm/svm.c | 14 +++---
- arch/x86/kvm/vmx/nested.c | 42 +++++++++--------
- arch/x86/kvm/vmx/vmx.c | 20 ++++-----
- arch/x86/kvm/x86.c | 80 ++++++++++++++++-----------------
- arch/x86/kvm/x86.h | 3 +-
- 7 files changed, 113 insertions(+), 116 deletions(-)
-
-diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index aa381ab69a19..36e4fde359a7 100644
---- a/arch/x86/include/asm/kvm_host.h
-+++ b/arch/x86/include/asm/kvm_host.h
-@@ -639,6 +639,17 @@ struct kvm_vcpu_xen {
- struct timer_list poll_timer;
- };
-
-+struct kvm_queued_exception {
-+ bool pending;
-+ bool injected;
-+ bool has_error_code;
-+ u8 vector;
-+ u32 error_code;
-+ unsigned long payload;
-+ bool has_payload;
-+ u8 nested_apf;
-+};
-+
- struct kvm_vcpu_arch {
- /*
- * rip and regs accesses must go through
-@@ -738,16 +749,8 @@ struct kvm_vcpu_arch {
-
- u8 event_exit_inst_len;
-
-- struct kvm_queued_exception {
-- bool pending;
-- bool injected;
-- bool has_error_code;
-- u8 nr;
-- u32 error_code;
-- unsigned long payload;
-- bool has_payload;
-- u8 nested_apf;
-- } exception;
-+ /* Exceptions to be injected to the guest. */
-+ struct kvm_queued_exception exception;
-
- struct kvm_queued_interrupt {
- bool injected;
-diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
-index 76dcc8a3e849..8f991592d277 100644
---- a/arch/x86/kvm/svm/nested.c
-+++ b/arch/x86/kvm/svm/nested.c
-@@ -468,7 +468,7 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
- unsigned int nr;
-
- if (vcpu->arch.exception.injected) {
-- nr = vcpu->arch.exception.nr;
-+ nr = vcpu->arch.exception.vector;
- exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
-
- if (vcpu->arch.exception.has_error_code) {
-@@ -1306,42 +1306,45 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
-
- static bool nested_exit_on_exception(struct vcpu_svm *svm)
- {
-- unsigned int nr = svm->vcpu.arch.exception.nr;
-+ unsigned int vector = svm->vcpu.arch.exception.vector;
-
-- return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr));
-+ return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector));
- }
-
--static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
-+static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
- {
-- unsigned int nr = svm->vcpu.arch.exception.nr;
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
-+ struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
-
-- vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
-+ vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector;
- vmcb->control.exit_code_hi = 0;
-
-- if (svm->vcpu.arch.exception.has_error_code)
-- vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
-+ if (ex->has_error_code)
-+ vmcb->control.exit_info_1 = ex->error_code;
-
- /*
- * EXITINFO2 is undefined for all exception intercepts other
- * than #PF.
- */
-- if (nr == PF_VECTOR) {
-- if (svm->vcpu.arch.exception.nested_apf)
-- vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
-- else if (svm->vcpu.arch.exception.has_payload)
-- vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
-+ if (ex->vector == PF_VECTOR) {
-+ if (ex->nested_apf)
-+ vmcb->control.exit_info_2 = vcpu->arch.apf.nested_apf_token;
-+ else if (ex->has_payload)
-+ vmcb->control.exit_info_2 = ex->payload;
- else
-- vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-- } else if (nr == DB_VECTOR) {
-+ vmcb->control.exit_info_2 = vcpu->arch.cr2;
-+ } else if (ex->vector == DB_VECTOR) {
- /* See inject_pending_event. */
-- kvm_deliver_exception_payload(&svm->vcpu);
-- if (svm->vcpu.arch.dr7 & DR7_GD) {
-- svm->vcpu.arch.dr7 &= ~DR7_GD;
-- kvm_update_dr7(&svm->vcpu);
-+ kvm_deliver_exception_payload(vcpu, ex);
-+
-+ if (vcpu->arch.dr7 & DR7_GD) {
-+ vcpu->arch.dr7 &= ~DR7_GD;
-+ kvm_update_dr7(vcpu);
- }
-- } else
-- WARN_ON(svm->vcpu.arch.exception.has_payload);
-+ } else {
-+ WARN_ON(ex->has_payload);
-+ }
-
- nested_svm_vmexit(svm);
- }
-@@ -1379,7 +1382,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
- return -EBUSY;
- if (!nested_exit_on_exception(svm))
- return 0;
-- nested_svm_inject_exception_vmexit(svm);
-+ nested_svm_inject_exception_vmexit(vcpu);
- return 0;
- }
-
-diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index f3813dbacb9f..b96c091f6c3d 100644
---- a/arch/x86/kvm/svm/svm.c
-+++ b/arch/x86/kvm/svm/svm.c
-@@ -463,22 +463,20 @@ static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
-
- static void svm_queue_exception(struct kvm_vcpu *vcpu)
- {
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
- struct vcpu_svm *svm = to_svm(vcpu);
-- unsigned nr = vcpu->arch.exception.nr;
-- bool has_error_code = vcpu->arch.exception.has_error_code;
-- u32 error_code = vcpu->arch.exception.error_code;
-
-- kvm_deliver_exception_payload(vcpu);
-+ kvm_deliver_exception_payload(vcpu, ex);
-
-- if (kvm_exception_is_soft(nr) &&
-+ if (kvm_exception_is_soft(ex->vector) &&
- svm_update_soft_interrupt_rip(vcpu))
- return;
-
-- svm->vmcb->control.event_inj = nr
-+ svm->vmcb->control.event_inj = ex->vector
- | SVM_EVTINJ_VALID
-- | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
-+ | (ex->has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
- | SVM_EVTINJ_TYPE_EXEPT;
-- svm->vmcb->control.event_inj_err = error_code;
-+ svm->vmcb->control.event_inj_err = ex->error_code;
- }
-
- static void svm_init_erratum_383(void)
-diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
-index 0aa40ea496a8..83239d47fc0f 100644
---- a/arch/x86/kvm/vmx/nested.c
-+++ b/arch/x86/kvm/vmx/nested.c
-@@ -446,29 +446,27 @@ static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
- */
- static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
- {
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-- unsigned int nr = vcpu->arch.exception.nr;
-- bool has_payload = vcpu->arch.exception.has_payload;
-- unsigned long payload = vcpu->arch.exception.payload;
-
-- if (nr == PF_VECTOR) {
-- if (vcpu->arch.exception.nested_apf) {
-+ if (ex->vector == PF_VECTOR) {
-+ if (ex->nested_apf) {
- *exit_qual = vcpu->arch.apf.nested_apf_token;
- return 1;
- }
-- if (nested_vmx_is_page_fault_vmexit(vmcs12,
-- vcpu->arch.exception.error_code)) {
-- *exit_qual = has_payload ? payload : vcpu->arch.cr2;
-+ if (nested_vmx_is_page_fault_vmexit(vmcs12, ex->error_code)) {
-+ *exit_qual = ex->has_payload ? ex->payload : vcpu->arch.cr2;
- return 1;
- }
-- } else if (vmcs12->exception_bitmap & (1u << nr)) {
-- if (nr == DB_VECTOR) {
-- if (!has_payload) {
-- payload = vcpu->arch.dr6;
-- payload &= ~DR6_BT;
-- payload ^= DR6_ACTIVE_LOW;
-+ } else if (vmcs12->exception_bitmap & (1u << ex->vector)) {
-+ if (ex->vector == DB_VECTOR) {
-+ if (ex->has_payload) {
-+ *exit_qual = ex->payload;
-+ } else {
-+ *exit_qual = vcpu->arch.dr6;
-+ *exit_qual &= ~DR6_BT;
-+ *exit_qual ^= DR6_ACTIVE_LOW;
- }
-- *exit_qual = payload;
- } else
- *exit_qual = 0;
- return 1;
-@@ -3723,7 +3721,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
- is_double_fault(exit_intr_info))) {
- vmcs12->idt_vectoring_info_field = 0;
- } else if (vcpu->arch.exception.injected) {
-- nr = vcpu->arch.exception.nr;
-+ nr = vcpu->arch.exception.vector;
- idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
-
- if (kvm_exception_is_soft(nr)) {
-@@ -3827,11 +3825,11 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
- static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
- unsigned long exit_qual)
- {
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
-+ u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-- unsigned int nr = vcpu->arch.exception.nr;
-- u32 intr_info = nr | INTR_INFO_VALID_MASK;
-
-- if (vcpu->arch.exception.has_error_code) {
-+ if (ex->has_error_code) {
- /*
- * Intel CPUs do not generate error codes with bits 31:16 set,
- * and more importantly VMX disallows setting bits 31:16 in the
-@@ -3841,11 +3839,11 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
- * generate "full" 32-bit error codes, so KVM allows userspace
- * to inject exception error codes with bits 31:16 set.
- */
-- vmcs12->vm_exit_intr_error_code = (u16)vcpu->arch.exception.error_code;
-+ vmcs12->vm_exit_intr_error_code = (u16)ex->error_code;
- intr_info |= INTR_INFO_DELIVER_CODE_MASK;
- }
-
-- if (kvm_exception_is_soft(nr))
-+ if (kvm_exception_is_soft(ex->vector))
- intr_info |= INTR_TYPE_SOFT_EXCEPTION;
- else
- intr_info |= INTR_TYPE_HARD_EXCEPTION;
-@@ -3876,7 +3874,7 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
- static inline unsigned long vmx_get_pending_dbg_trap(struct kvm_vcpu *vcpu)
- {
- if (!vcpu->arch.exception.pending ||
-- vcpu->arch.exception.nr != DB_VECTOR)
-+ vcpu->arch.exception.vector != DB_VECTOR)
- return 0;
-
- /* General Detect #DBs are always fault-like. */
-diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
-index 7f3581960eb5..0f68ed966944 100644
---- a/arch/x86/kvm/vmx/vmx.c
-+++ b/arch/x86/kvm/vmx/vmx.c
-@@ -1659,7 +1659,7 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
- */
- if (nested_cpu_has_mtf(vmcs12) &&
- (!vcpu->arch.exception.pending ||
-- vcpu->arch.exception.nr == DB_VECTOR))
-+ vcpu->arch.exception.vector == DB_VECTOR))
- vmx->nested.mtf_pending = true;
- else
- vmx->nested.mtf_pending = false;
-@@ -1686,15 +1686,13 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
-
- static void vmx_queue_exception(struct kvm_vcpu *vcpu)
- {
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
-+ u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-- unsigned nr = vcpu->arch.exception.nr;
-- bool has_error_code = vcpu->arch.exception.has_error_code;
-- u32 error_code = vcpu->arch.exception.error_code;
-- u32 intr_info = nr | INTR_INFO_VALID_MASK;
-
-- kvm_deliver_exception_payload(vcpu);
-+ kvm_deliver_exception_payload(vcpu, ex);
-
-- if (has_error_code) {
-+ if (ex->has_error_code) {
- /*
- * Despite the error code being architecturally defined as 32
- * bits, and the VMCS field being 32 bits, Intel CPUs and thus
-@@ -1705,21 +1703,21 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
- * the upper bits to avoid VM-Fail, losing information that
- * does't really exist is preferable to killing the VM.
- */
-- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)error_code);
-+ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code);
- intr_info |= INTR_INFO_DELIVER_CODE_MASK;
- }
-
- if (vmx->rmode.vm86_active) {
- int inc_eip = 0;
-- if (kvm_exception_is_soft(nr))
-+ if (kvm_exception_is_soft(ex->vector))
- inc_eip = vcpu->arch.event_exit_inst_len;
-- kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
-+ kvm_inject_realmode_interrupt(vcpu, ex->vector, inc_eip);
- return;
- }
-
- WARN_ON_ONCE(vmx->emulation_required);
-
-- if (kvm_exception_is_soft(nr)) {
-+ if (kvm_exception_is_soft(ex->vector)) {
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
- vmx->vcpu.arch.event_exit_inst_len);
- intr_info |= INTR_TYPE_SOFT_EXCEPTION;
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 14cb589683a1..14182b5b2c93 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -556,16 +556,13 @@ static int exception_type(int vector)
- return EXCPT_FAULT;
- }
-
--void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
-+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
-+ struct kvm_queued_exception *ex)
- {
-- unsigned nr = vcpu->arch.exception.nr;
-- bool has_payload = vcpu->arch.exception.has_payload;
-- unsigned long payload = vcpu->arch.exception.payload;
--
-- if (!has_payload)
-+ if (!ex->has_payload)
- return;
-
-- switch (nr) {
-+ switch (ex->vector) {
- case DB_VECTOR:
- /*
- * "Certain debug exceptions may clear bit 0-3. The
-@@ -590,8 +587,8 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
- * So they need to be flipped for DR6.
- */
- vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
-- vcpu->arch.dr6 |= payload;
-- vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW;
-+ vcpu->arch.dr6 |= ex->payload;
-+ vcpu->arch.dr6 ^= ex->payload & DR6_ACTIVE_LOW;
-
- /*
- * The #DB payload is defined as compatible with the 'pending
-@@ -602,12 +599,12 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
- vcpu->arch.dr6 &= ~BIT(12);
- break;
- case PF_VECTOR:
-- vcpu->arch.cr2 = payload;
-+ vcpu->arch.cr2 = ex->payload;
- break;
- }
-
-- vcpu->arch.exception.has_payload = false;
-- vcpu->arch.exception.payload = 0;
-+ ex->has_payload = false;
-+ ex->payload = 0;
- }
- EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
-
-@@ -646,17 +643,18 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
- vcpu->arch.exception.injected = false;
- }
- vcpu->arch.exception.has_error_code = has_error;
-- vcpu->arch.exception.nr = nr;
-+ vcpu->arch.exception.vector = nr;
- vcpu->arch.exception.error_code = error_code;
- vcpu->arch.exception.has_payload = has_payload;
- vcpu->arch.exception.payload = payload;
- if (!is_guest_mode(vcpu))
-- kvm_deliver_exception_payload(vcpu);
-+ kvm_deliver_exception_payload(vcpu,
-+ &vcpu->arch.exception);
- return;
- }
-
- /* to check exception */
-- prev_nr = vcpu->arch.exception.nr;
-+ prev_nr = vcpu->arch.exception.vector;
- if (prev_nr == DF_VECTOR) {
- /* triple fault -> shutdown */
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-@@ -674,7 +672,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.injected = false;
- vcpu->arch.exception.has_error_code = true;
-- vcpu->arch.exception.nr = DF_VECTOR;
-+ vcpu->arch.exception.vector = DF_VECTOR;
- vcpu->arch.exception.error_code = 0;
- vcpu->arch.exception.has_payload = false;
- vcpu->arch.exception.payload = 0;
-@@ -5023,25 +5021,24 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
- static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
- {
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
-+
- process_nmi(vcpu);
-
- if (kvm_check_request(KVM_REQ_SMI, vcpu))
- process_smi(vcpu);
-
- /*
-- * In guest mode, payload delivery should be deferred,
-- * so that the L1 hypervisor can intercept #PF before
-- * CR2 is modified (or intercept #DB before DR6 is
-- * modified under nVMX). Unless the per-VM capability,
-- * KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of
-- * an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we
-- * opportunistically defer the exception payload, deliver it if the
-- * capability hasn't been requested before processing a
-- * KVM_GET_VCPU_EVENTS.
-+ * In guest mode, payload delivery should be deferred if the exception
-+ * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
-+ * intercepts #PF, ditto for DR6 and #DBs. If the per-VM capability,
-+ * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not
-+ * propagate the payload and so it cannot be safely deferred. Deliver
-+ * the payload if the capability hasn't been requested.
- */
- if (!vcpu->kvm->arch.exception_payload_enabled &&
-- vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
-- kvm_deliver_exception_payload(vcpu);
-+ ex->pending && ex->has_payload)
-+ kvm_deliver_exception_payload(vcpu, ex);
-
- /*
- * The API doesn't provide the instruction length for software
-@@ -5049,26 +5046,25 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
- * isn't advanced, we should expect to encounter the exception
- * again.
- */
-- if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
-+ if (kvm_exception_is_soft(ex->vector)) {
- events->exception.injected = 0;
- events->exception.pending = 0;
- } else {
-- events->exception.injected = vcpu->arch.exception.injected;
-- events->exception.pending = vcpu->arch.exception.pending;
-+ events->exception.injected = ex->injected;
-+ events->exception.pending = ex->pending;
- /*
- * For ABI compatibility, deliberately conflate
- * pending and injected exceptions when
- * KVM_CAP_EXCEPTION_PAYLOAD isn't enabled.
- */
- if (!vcpu->kvm->arch.exception_payload_enabled)
-- events->exception.injected |=
-- vcpu->arch.exception.pending;
-+ events->exception.injected |= ex->pending;
- }
-- events->exception.nr = vcpu->arch.exception.nr;
-- events->exception.has_error_code = vcpu->arch.exception.has_error_code;
-- events->exception.error_code = vcpu->arch.exception.error_code;
-- events->exception_has_payload = vcpu->arch.exception.has_payload;
-- events->exception_payload = vcpu->arch.exception.payload;
-+ events->exception.nr = ex->vector;
-+ events->exception.has_error_code = ex->has_error_code;
-+ events->exception.error_code = ex->error_code;
-+ events->exception_has_payload = ex->has_payload;
-+ events->exception_payload = ex->payload;
-
- events->interrupt.injected =
- vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
-@@ -5140,7 +5136,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
- process_nmi(vcpu);
- vcpu->arch.exception.injected = events->exception.injected;
- vcpu->arch.exception.pending = events->exception.pending;
-- vcpu->arch.exception.nr = events->exception.nr;
-+ vcpu->arch.exception.vector = events->exception.nr;
- vcpu->arch.exception.has_error_code = events->exception.has_error_code;
- vcpu->arch.exception.error_code = events->exception.error_code;
- vcpu->arch.exception.has_payload = events->exception_has_payload;
-@@ -9675,7 +9671,7 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
-
- static void kvm_inject_exception(struct kvm_vcpu *vcpu)
- {
-- trace_kvm_inj_exception(vcpu->arch.exception.nr,
-+ trace_kvm_inj_exception(vcpu->arch.exception.vector,
- vcpu->arch.exception.has_error_code,
- vcpu->arch.exception.error_code,
- vcpu->arch.exception.injected);
-@@ -9747,12 +9743,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
- * describe the behavior of General Detect #DBs, which are
- * fault-like. They do _not_ set RF, a la code breakpoints.
- */
-- if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
-+ if (exception_type(vcpu->arch.exception.vector) == EXCPT_FAULT)
- __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
- X86_EFLAGS_RF);
-
-- if (vcpu->arch.exception.nr == DB_VECTOR) {
-- kvm_deliver_exception_payload(vcpu);
-+ if (vcpu->arch.exception.vector == DB_VECTOR) {
-+ kvm_deliver_exception_payload(vcpu, &vcpu->arch.exception);
- if (vcpu->arch.dr7 & DR7_GD) {
- vcpu->arch.dr7 &= ~DR7_GD;
- kvm_update_dr7(vcpu);
-diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
-index 1926d2cb8e79..4147d27f9fbc 100644
---- a/arch/x86/kvm/x86.h
-+++ b/arch/x86/kvm/x86.h
-@@ -286,7 +286,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
-
- int handle_ud(struct kvm_vcpu *vcpu);
-
--void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu);
-+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
-+ struct kvm_queued_exception *ex);
-
- void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
- u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
---
-2.35.1
-
+++ /dev/null
-From 80c076cb1e7ff649cd729910c9f9058780e124cc Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 23 Aug 2022 14:32:37 +0800
-Subject: KVM: x86/mmu: fix memoryleak in kvm_mmu_vendor_module_init()
-
-From: Miaohe Lin <linmiaohe@huawei.com>
-
-[ Upstream commit d7c9bfb9caaffd496ae44b258ec7c793677d3eeb ]
-
-When register_shrinker() fails, KVM doesn't release the percpu counter
-kvm_total_used_mmu_pages leading to memoryleak. Fix this issue by calling
-percpu_counter_destroy() when register_shrinker() fails.
-
-Fixes: ab271bd4dfd5 ("x86: kvm: propagate register_shrinker return code")
-Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
-Link: https://lore.kernel.org/r/20220823063237.47299-1-linmiaohe@huawei.com
-[sean: tweak shortlog and changelog]
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/mmu/mmu.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
-index 3552e6af3684..858bc53cfab4 100644
---- a/arch/x86/kvm/mmu/mmu.c
-+++ b/arch/x86/kvm/mmu/mmu.c
-@@ -6704,10 +6704,12 @@ int kvm_mmu_vendor_module_init(void)
-
- ret = register_shrinker(&mmu_shrinker, "x86-mmu");
- if (ret)
-- goto out;
-+ goto out_shrinker;
-
- return 0;
-
-+out_shrinker:
-+ percpu_counter_destroy(&kvm_total_used_mmu_pages);
- out:
- mmu_destroy_caches();
- return ret;
---
-2.35.1
-
+++ /dev/null
-From 2c2075dbd009341c0223762348ffd9d61e289200 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 23:16:08 +0000
-Subject: KVM: x86: Morph pending exceptions to pending VM-Exits at queue time
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 7709aba8f71613ae5d18d8c00adb54948e6bedb3 ]
-
-Morph pending exceptions to pending VM-Exits (due to interception) when
-the exception is queued instead of waiting until nested events are
-checked at VM-Entry. This fixes a longstanding bug where KVM fails to
-handle an exception that occurs during delivery of a previous exception,
-KVM (L0) and L1 both want to intercept the exception (e.g. #PF for shadow
-paging), and KVM determines that the exception is in the guest's domain,
-i.e. queues the new exception for L2. Deferring the interception check
-causes KVM to esclate various combinations of injected+pending exceptions
-to double fault (#DF) without consulting L1's interception desires, and
-ends up injecting a spurious #DF into L2.
-
-KVM has fudged around the issue for #PF by special casing emulated #PF
-injection for shadow paging, but the underlying issue is not unique to
-shadow paging in L0, e.g. if KVM is intercepting #PF because the guest
-has a smaller maxphyaddr and L1 (but not L0) is using shadow paging.
-Other exceptions are affected as well, e.g. if KVM is intercepting #GP
-for one of SVM's workaround or for the VMware backdoor emulation stuff.
-The other cases have gone unnoticed because the #DF is spurious if and
-only if L1 resolves the exception, e.g. KVM's goofs go unnoticed if L1
-would have injected #DF anyways.
-
-The hack-a-fix has also led to ugly code, e.g. bailing from the emulator
-if #PF injection forced a nested VM-Exit and the emulator finds itself
-back in L1. Allowing for direct-to-VM-Exit queueing also neatly solves
-the async #PF in L2 mess; no need to set a magic flag and token, simply
-queue a #PF nested VM-Exit.
-
-Deal with event migration by flagging that a pending exception was queued
-by userspace and check for interception at the next KVM_RUN, e.g. so that
-KVM does the right thing regardless of the order in which userspace
-restores nested state vs. event state.
-
-When "getting" events from userspace, simply drop any pending excpetion
-that is destined to be intercepted if there is also an injected exception
-to be migrated. Ideally, KVM would migrate both events, but that would
-require new ABI, and practically speaking losing the event is unlikely to
-be noticed, let alone fatal. The injected exception is captured, RIP
-still points at the original faulting instruction, etc... So either the
-injection on the target will trigger the same intercepted exception, or
-the source of the intercepted exception was transient and/or
-non-deterministic, thus dropping it is ok-ish.
-
-Fixes: a04aead144fd ("KVM: nSVM: fix running nested guests when npt=0")
-Fixes: feaf0c7dc473 ("KVM: nVMX: Do not generate #DF if #PF happens during exception delivery into L2")
-Cc: Jim Mattson <jmattson@google.com>
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
-Link: https://lore.kernel.org/r/20220830231614.3580124-22-seanjc@google.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/include/asm/kvm_host.h | 12 ++-
- arch/x86/kvm/svm/nested.c | 45 +++------
- arch/x86/kvm/vmx/nested.c | 109 ++++++++++------------
- arch/x86/kvm/vmx/vmx.c | 6 +-
- arch/x86/kvm/x86.c | 159 ++++++++++++++++++++++----------
- arch/x86/kvm/x86.h | 7 ++
- 6 files changed, 188 insertions(+), 150 deletions(-)
-
-diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index 36e4fde359a7..bad74c8fbc65 100644
---- a/arch/x86/include/asm/kvm_host.h
-+++ b/arch/x86/include/asm/kvm_host.h
-@@ -647,7 +647,6 @@ struct kvm_queued_exception {
- u32 error_code;
- unsigned long payload;
- bool has_payload;
-- u8 nested_apf;
- };
-
- struct kvm_vcpu_arch {
-@@ -749,8 +748,12 @@ struct kvm_vcpu_arch {
-
- u8 event_exit_inst_len;
-
-+ bool exception_from_userspace;
-+
- /* Exceptions to be injected to the guest. */
- struct kvm_queued_exception exception;
-+ /* Exception VM-Exits to be synthesized to L1. */
-+ struct kvm_queued_exception exception_vmexit;
-
- struct kvm_queued_interrupt {
- bool injected;
-@@ -861,7 +864,6 @@ struct kvm_vcpu_arch {
- u32 id;
- bool send_user_only;
- u32 host_apf_flags;
-- unsigned long nested_apf_token;
- bool delivery_as_pf_vmexit;
- bool pageready_pending;
- } apf;
-@@ -1637,9 +1639,9 @@ struct kvm_x86_ops {
-
- struct kvm_x86_nested_ops {
- void (*leave_nested)(struct kvm_vcpu *vcpu);
-+ bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
-+ u32 error_code);
- int (*check_events)(struct kvm_vcpu *vcpu);
-- bool (*handle_page_fault_workaround)(struct kvm_vcpu *vcpu,
-- struct x86_exception *fault);
- bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
- void (*triple_fault)(struct kvm_vcpu *vcpu);
- int (*get_state)(struct kvm_vcpu *vcpu,
-@@ -1866,7 +1868,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay
- void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
- void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
- void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
--bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
-+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
- struct x86_exception *fault);
- bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
- bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
-diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
-index a6111392985c..405075286965 100644
---- a/arch/x86/kvm/svm/nested.c
-+++ b/arch/x86/kvm/svm/nested.c
-@@ -55,28 +55,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
- nested_svm_vmexit(svm);
- }
-
--static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
-- struct x86_exception *fault)
--{
-- struct vcpu_svm *svm = to_svm(vcpu);
-- struct vmcb *vmcb = svm->vmcb;
--
-- WARN_ON(!is_guest_mode(vcpu));
--
-- if (vmcb12_is_intercept(&svm->nested.ctl,
-- INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
-- !WARN_ON_ONCE(svm->nested.nested_run_pending)) {
-- vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
-- vmcb->control.exit_code_hi = 0;
-- vmcb->control.exit_info_1 = fault->error_code;
-- vmcb->control.exit_info_2 = fault->address;
-- nested_svm_vmexit(svm);
-- return true;
-- }
--
-- return false;
--}
--
- static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
- {
- struct vcpu_svm *svm = to_svm(vcpu);
-@@ -1304,16 +1282,17 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
- return 0;
- }
-
--static bool nested_exit_on_exception(struct vcpu_svm *svm)
-+static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
-+ u32 error_code)
- {
-- unsigned int vector = svm->vcpu.arch.exception.vector;
-+ struct vcpu_svm *svm = to_svm(vcpu);
-
- return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector));
- }
-
- static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
- {
-- struct kvm_queued_exception *ex = &vcpu->arch.exception;
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
- struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
-
-@@ -1328,9 +1307,7 @@ static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
- * than #PF.
- */
- if (ex->vector == PF_VECTOR) {
-- if (ex->nested_apf)
-- vmcb->control.exit_info_2 = vcpu->arch.apf.nested_apf_token;
-- else if (ex->has_payload)
-+ if (ex->has_payload)
- vmcb->control.exit_info_2 = ex->payload;
- else
- vmcb->control.exit_info_2 = vcpu->arch.cr2;
-@@ -1383,15 +1360,19 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
- return 0;
- }
-
-- if (vcpu->arch.exception.pending) {
-+ if (vcpu->arch.exception_vmexit.pending) {
- if (block_nested_exceptions)
- return -EBUSY;
-- if (!nested_exit_on_exception(svm))
-- return 0;
- nested_svm_inject_exception_vmexit(vcpu);
- return 0;
- }
-
-+ if (vcpu->arch.exception.pending) {
-+ if (block_nested_exceptions)
-+ return -EBUSY;
-+ return 0;
-+ }
-+
- if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
- if (block_nested_events)
- return -EBUSY;
-@@ -1729,8 +1710,8 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
-
- struct kvm_x86_nested_ops svm_nested_ops = {
- .leave_nested = svm_leave_nested,
-+ .is_exception_vmexit = nested_svm_is_exception_vmexit,
- .check_events = svm_check_nested_events,
-- .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround,
- .triple_fault = nested_svm_triple_fault,
- .get_nested_state_pages = svm_get_nested_state_pages,
- .get_state = svm_get_nested_state,
-diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
-index dfd5e13e5202..4bb3ccf82d63 100644
---- a/arch/x86/kvm/vmx/nested.c
-+++ b/arch/x86/kvm/vmx/nested.c
-@@ -439,59 +439,22 @@ static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
- return inequality ^ bit;
- }
-
--
--/*
-- * KVM wants to inject page-faults which it got to the guest. This function
-- * checks whether in a nested guest, we need to inject them to L1 or L2.
-- */
--static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
--{
-- struct kvm_queued_exception *ex = &vcpu->arch.exception;
-- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
--
-- if (ex->vector == PF_VECTOR) {
-- if (ex->nested_apf) {
-- *exit_qual = vcpu->arch.apf.nested_apf_token;
-- return 1;
-- }
-- if (nested_vmx_is_page_fault_vmexit(vmcs12, ex->error_code)) {
-- *exit_qual = ex->has_payload ? ex->payload : vcpu->arch.cr2;
-- return 1;
-- }
-- } else if (vmcs12->exception_bitmap & (1u << ex->vector)) {
-- if (ex->vector == DB_VECTOR) {
-- if (ex->has_payload) {
-- *exit_qual = ex->payload;
-- } else {
-- *exit_qual = vcpu->arch.dr6;
-- *exit_qual &= ~DR6_BT;
-- *exit_qual ^= DR6_ACTIVE_LOW;
-- }
-- } else
-- *exit_qual = 0;
-- return 1;
-- }
--
-- return 0;
--}
--
--static bool nested_vmx_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
-- struct x86_exception *fault)
-+static bool nested_vmx_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
-+ u32 error_code)
- {
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
-- WARN_ON(!is_guest_mode(vcpu));
-+ /*
-+ * Drop bits 31:16 of the error code when performing the #PF mask+match
-+ * check. All VMCS fields involved are 32 bits, but Intel CPUs never
-+ * set bits 31:16 and VMX disallows setting bits 31:16 in the injected
-+ * error code. Including the to-be-dropped bits in the check might
-+ * result in an "impossible" or missed exit from L1's perspective.
-+ */
-+ if (vector == PF_VECTOR)
-+ return nested_vmx_is_page_fault_vmexit(vmcs12, (u16)error_code);
-
-- if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
-- !WARN_ON_ONCE(to_vmx(vcpu)->nested.nested_run_pending)) {
-- vmcs12->vm_exit_intr_error_code = fault->error_code;
-- nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
-- PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
-- INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
-- fault->address);
-- return true;
-- }
-- return false;
-+ return (vmcs12->exception_bitmap & (1u << vector));
- }
-
- static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
-@@ -3822,12 +3785,24 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
- return -ENXIO;
- }
-
--static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
-- unsigned long exit_qual)
-+static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
- {
-- struct kvm_queued_exception *ex = &vcpu->arch.exception;
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
- u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-+ unsigned long exit_qual;
-+
-+ if (ex->has_payload) {
-+ exit_qual = ex->payload;
-+ } else if (ex->vector == PF_VECTOR) {
-+ exit_qual = vcpu->arch.cr2;
-+ } else if (ex->vector == DB_VECTOR) {
-+ exit_qual = vcpu->arch.dr6;
-+ exit_qual &= ~DR6_BT;
-+ exit_qual ^= DR6_ACTIVE_LOW;
-+ } else {
-+ exit_qual = 0;
-+ }
-
- if (ex->has_error_code) {
- /*
-@@ -3917,7 +3892,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- {
- struct kvm_lapic *apic = vcpu->arch.apic;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-- unsigned long exit_qual;
- /*
- * Only a pending nested run blocks a pending exception. If there is a
- * previously injected event, the pending exception occurred while said
-@@ -3971,14 +3945,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- * across SMI/RSM as it should; that needs to be addressed in order to
- * prioritize SMI over MTF and trap-like #DBs.
- */
-+ if (vcpu->arch.exception_vmexit.pending &&
-+ !vmx_is_low_priority_db_trap(&vcpu->arch.exception_vmexit)) {
-+ if (block_nested_exceptions)
-+ return -EBUSY;
-+
-+ nested_vmx_inject_exception_vmexit(vcpu);
-+ return 0;
-+ }
-+
- if (vcpu->arch.exception.pending &&
- !vmx_is_low_priority_db_trap(&vcpu->arch.exception)) {
- if (block_nested_exceptions)
- return -EBUSY;
-- if (!nested_vmx_check_exception(vcpu, &exit_qual))
-- goto no_vmexit;
-- nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
-- return 0;
-+ goto no_vmexit;
- }
-
- if (vmx->nested.mtf_pending) {
-@@ -3989,15 +3969,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
- return 0;
- }
-
-- if (vcpu->arch.exception.pending) {
-+ if (vcpu->arch.exception_vmexit.pending) {
- if (block_nested_exceptions)
- return -EBUSY;
-- if (!nested_vmx_check_exception(vcpu, &exit_qual))
-- goto no_vmexit;
-- nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
-+
-+ nested_vmx_inject_exception_vmexit(vcpu);
- return 0;
- }
-
-+ if (vcpu->arch.exception.pending) {
-+ if (block_nested_exceptions)
-+ return -EBUSY;
-+ goto no_vmexit;
-+ }
-+
- if (nested_vmx_preemption_timer_pending(vcpu)) {
- if (block_nested_events)
- return -EBUSY;
-@@ -6868,8 +6853,8 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
-
- struct kvm_x86_nested_ops vmx_nested_ops = {
- .leave_nested = vmx_leave_nested,
-+ .is_exception_vmexit = nested_vmx_is_exception_vmexit,
- .check_events = vmx_check_nested_events,
-- .handle_page_fault_workaround = nested_vmx_handle_page_fault_workaround,
- .hv_timer_pending = nested_vmx_preemption_timer_pending,
- .triple_fault = nested_vmx_triple_fault,
- .get_state = vmx_get_nested_state,
-diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
-index 0f68ed966944..9c2b8e2b2a28 100644
---- a/arch/x86/kvm/vmx/vmx.c
-+++ b/arch/x86/kvm/vmx/vmx.c
-@@ -1659,7 +1659,9 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
- */
- if (nested_cpu_has_mtf(vmcs12) &&
- (!vcpu->arch.exception.pending ||
-- vcpu->arch.exception.vector == DB_VECTOR))
-+ vcpu->arch.exception.vector == DB_VECTOR) &&
-+ (!vcpu->arch.exception_vmexit.pending ||
-+ vcpu->arch.exception_vmexit.vector == DB_VECTOR))
- vmx->nested.mtf_pending = true;
- else
- vmx->nested.mtf_pending = false;
-@@ -5718,7 +5720,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- return vmx->emulation_required && !vmx->rmode.vm86_active &&
-- (vcpu->arch.exception.pending || vcpu->arch.exception.injected);
-+ (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected);
- }
-
- static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 01d59f93d93e..8264e41b4fea 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -608,6 +608,21 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
- }
- EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
-
-+static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector,
-+ bool has_error_code, u32 error_code,
-+ bool has_payload, unsigned long payload)
-+{
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
-+
-+ ex->vector = vector;
-+ ex->injected = false;
-+ ex->pending = true;
-+ ex->has_error_code = has_error_code;
-+ ex->error_code = error_code;
-+ ex->has_payload = has_payload;
-+ ex->payload = payload;
-+}
-+
- static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
- unsigned nr, bool has_error, u32 error_code,
- bool has_payload, unsigned long payload, bool reinject)
-@@ -617,18 +632,31 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
-
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-
-+ /*
-+ * If the exception is destined for L2 and isn't being reinjected,
-+ * morph it to a VM-Exit if L1 wants to intercept the exception. A
-+ * previously injected exception is not checked because it was checked
-+ * when it was original queued, and re-checking is incorrect if _L1_
-+ * injected the exception, in which case it's exempt from interception.
-+ */
-+ if (!reinject && is_guest_mode(vcpu) &&
-+ kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) {
-+ kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code,
-+ has_payload, payload);
-+ return;
-+ }
-+
- if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
- queue:
- if (reinject) {
- /*
-- * On vmentry, vcpu->arch.exception.pending is only
-- * true if an event injection was blocked by
-- * nested_run_pending. In that case, however,
-- * vcpu_enter_guest requests an immediate exit,
-- * and the guest shouldn't proceed far enough to
-- * need reinjection.
-+ * On VM-Entry, an exception can be pending if and only
-+ * if event injection was blocked by nested_run_pending.
-+ * In that case, however, vcpu_enter_guest() requests an
-+ * immediate exit, and the guest shouldn't proceed far
-+ * enough to need reinjection.
- */
-- WARN_ON_ONCE(vcpu->arch.exception.pending);
-+ WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
- vcpu->arch.exception.injected = true;
- if (WARN_ON_ONCE(has_payload)) {
- /*
-@@ -734,20 +762,22 @@ static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
- void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
- {
- ++vcpu->stat.pf_guest;
-- vcpu->arch.exception.nested_apf =
-- is_guest_mode(vcpu) && fault->async_page_fault;
-- if (vcpu->arch.exception.nested_apf) {
-- vcpu->arch.apf.nested_apf_token = fault->address;
-- kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
-- } else {
-+
-+ /*
-+ * Async #PF in L2 is always forwarded to L1 as a VM-Exit regardless of
-+ * whether or not L1 wants to intercept "regular" #PF.
-+ */
-+ if (is_guest_mode(vcpu) && fault->async_page_fault)
-+ kvm_queue_exception_vmexit(vcpu, PF_VECTOR,
-+ true, fault->error_code,
-+ true, fault->address);
-+ else
- kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
- fault->address);
-- }
- }
- EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
-
--/* Returns true if the page fault was immediately morphed into a VM-Exit. */
--bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
-+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
- {
- struct kvm_mmu *fault_mmu;
-@@ -765,26 +795,7 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
- kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
- fault_mmu->root.hpa);
-
-- /*
-- * A workaround for KVM's bad exception handling. If KVM injected an
-- * exception into L2, and L2 encountered a #PF while vectoring the
-- * injected exception, manually check to see if L1 wants to intercept
-- * #PF, otherwise queuing the #PF will lead to #DF or a lost exception.
-- * In all other cases, defer the check to nested_ops->check_events(),
-- * which will correctly handle priority (this does not). Note, other
-- * exceptions, e.g. #GP, are theoretically affected, #PF is simply the
-- * most problematic, e.g. when L0 and L1 are both intercepting #PF for
-- * shadow paging.
-- *
-- * TODO: Rewrite exception handling to track injected and pending
-- * (VM-Exit) exceptions separately.
-- */
-- if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) &&
-- kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault))
-- return true;
--
- fault_mmu->inject_page_fault(vcpu, fault);
-- return false;
- }
- EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
-
-@@ -4846,7 +4857,7 @@ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
- return (kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_accept_dm_intr(vcpu) &&
- !kvm_event_needs_reinjection(vcpu) &&
-- !vcpu->arch.exception.pending);
-+ !kvm_is_exception_pending(vcpu));
- }
-
- static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
-@@ -5021,13 +5032,27 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
- static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
- {
-- struct kvm_queued_exception *ex = &vcpu->arch.exception;
-+ struct kvm_queued_exception *ex;
-
- process_nmi(vcpu);
-
- if (kvm_check_request(KVM_REQ_SMI, vcpu))
- process_smi(vcpu);
-
-+ /*
-+ * KVM's ABI only allows for one exception to be migrated. Luckily,
-+ * the only time there can be two queued exceptions is if there's a
-+ * non-exiting _injected_ exception, and a pending exiting exception.
-+ * In that case, ignore the VM-Exiting exception as it's an extension
-+ * of the injected exception.
-+ */
-+ if (vcpu->arch.exception_vmexit.pending &&
-+ !vcpu->arch.exception.pending &&
-+ !vcpu->arch.exception.injected)
-+ ex = &vcpu->arch.exception_vmexit;
-+ else
-+ ex = &vcpu->arch.exception;
-+
- /*
- * In guest mode, payload delivery should be deferred if the exception
- * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
-@@ -5134,6 +5159,19 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
- return -EINVAL;
-
- process_nmi(vcpu);
-+
-+ /*
-+ * Flag that userspace is stuffing an exception, the next KVM_RUN will
-+ * morph the exception to a VM-Exit if appropriate. Do this only for
-+ * pending exceptions, already-injected exceptions are not subject to
-+ * intercpetion. Note, userspace that conflates pending and injected
-+ * is hosed, and will incorrectly convert an injected exception into a
-+ * pending exception, which in turn may cause a spurious VM-Exit.
-+ */
-+ vcpu->arch.exception_from_userspace = events->exception.pending;
-+
-+ vcpu->arch.exception_vmexit.pending = false;
-+
- vcpu->arch.exception.injected = events->exception.injected;
- vcpu->arch.exception.pending = events->exception.pending;
- vcpu->arch.exception.vector = events->exception.nr;
-@@ -8164,18 +8202,17 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
- }
- }
-
--static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
-+static void inject_emulated_exception(struct kvm_vcpu *vcpu)
- {
- struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
-- if (ctxt->exception.vector == PF_VECTOR)
-- return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
-
-- if (ctxt->exception.error_code_valid)
-+ if (ctxt->exception.vector == PF_VECTOR)
-+ kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
-+ else if (ctxt->exception.error_code_valid)
- kvm_queue_exception_e(vcpu, ctxt->exception.vector,
- ctxt->exception.error_code);
- else
- kvm_queue_exception(vcpu, ctxt->exception.vector);
-- return false;
- }
-
- static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
-@@ -8773,8 +8810,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-
- if (ctxt->have_exception) {
- r = 1;
-- if (inject_emulated_exception(vcpu))
-- return r;
-+ inject_emulated_exception(vcpu);
- } else if (vcpu->arch.pio.count) {
- if (!vcpu->arch.pio.in) {
- /* FIXME: return into emulator if single-stepping. */
-@@ -9721,7 +9757,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
- */
- if (vcpu->arch.exception.injected)
- kvm_inject_exception(vcpu);
-- else if (vcpu->arch.exception.pending)
-+ else if (kvm_is_exception_pending(vcpu))
- ; /* see above */
- else if (vcpu->arch.nmi_injected)
- static_call(kvm_x86_inject_nmi)(vcpu);
-@@ -9748,6 +9784,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
- if (r < 0)
- goto out;
-
-+ /*
-+ * A pending exception VM-Exit should either result in nested VM-Exit
-+ * or force an immediate re-entry and exit to/from L2, and exception
-+ * VM-Exits cannot be injected (flag should _never_ be set).
-+ */
-+ WARN_ON_ONCE(vcpu->arch.exception_vmexit.injected ||
-+ vcpu->arch.exception_vmexit.pending);
-+
- /*
- * New events, other than exceptions, cannot be injected if KVM needs
- * to re-inject a previous event. See above comments on re-injecting
-@@ -9847,7 +9891,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
- kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
- *req_immediate_exit = true;
-
-- WARN_ON(vcpu->arch.exception.pending);
-+ WARN_ON(kvm_is_exception_pending(vcpu));
- return 0;
-
- out:
-@@ -10866,6 +10910,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
-
- int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
- {
-+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
- struct kvm_run *kvm_run = vcpu->run;
- int r;
-
-@@ -10924,6 +10969,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
- }
- }
-
-+ /*
-+ * If userspace set a pending exception and L2 is active, convert it to
-+ * a pending VM-Exit if L1 wants to intercept the exception.
-+ */
-+ if (vcpu->arch.exception_from_userspace && is_guest_mode(vcpu) &&
-+ kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, ex->vector,
-+ ex->error_code)) {
-+ kvm_queue_exception_vmexit(vcpu, ex->vector,
-+ ex->has_error_code, ex->error_code,
-+ ex->has_payload, ex->payload);
-+ ex->injected = false;
-+ ex->pending = false;
-+ }
-+ vcpu->arch.exception_from_userspace = false;
-+
- if (unlikely(vcpu->arch.complete_userspace_io)) {
- int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
- vcpu->arch.complete_userspace_io = NULL;
-@@ -11030,6 +11090,7 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
- kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
-
- vcpu->arch.exception.pending = false;
-+ vcpu->arch.exception_vmexit.pending = false;
-
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- }
-@@ -11410,7 +11471,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-
- if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
- r = -EBUSY;
-- if (vcpu->arch.exception.pending)
-+ if (kvm_is_exception_pending(vcpu))
- goto out;
- if (dbg->control & KVM_GUESTDBG_INJECT_DB)
- kvm_queue_exception(vcpu, DB_VECTOR);
-@@ -12643,7 +12704,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
- if (vcpu->arch.pv.pv_unhalted)
- return true;
-
-- if (vcpu->arch.exception.pending)
-+ if (kvm_is_exception_pending(vcpu))
- return true;
-
- if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
-@@ -12898,7 +12959,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
- {
- if (unlikely(!lapic_in_kernel(vcpu) ||
- kvm_event_needs_reinjection(vcpu) ||
-- vcpu->arch.exception.pending))
-+ kvm_is_exception_pending(vcpu)))
- return false;
-
- if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
-diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
-index 4147d27f9fbc..256745d1a2c3 100644
---- a/arch/x86/kvm/x86.h
-+++ b/arch/x86/kvm/x86.h
-@@ -82,10 +82,17 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
- void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
- int kvm_check_nested_events(struct kvm_vcpu *vcpu);
-
-+static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu)
-+{
-+ return vcpu->arch.exception.pending ||
-+ vcpu->arch.exception_vmexit.pending;
-+}
-+
- static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
- {
- vcpu->arch.exception.pending = false;
- vcpu->arch.exception.injected = false;
-+ vcpu->arch.exception_vmexit.pending = false;
- }
-
- static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
---
-2.35.1
-
+++ /dev/null
-From 1abbad519136449cb6a4dd537e30dbf56cb3ff9a Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 15:37:09 +0200
-Subject: KVM: x86: Report error when setting CPUID if Hyper-V allocation fails
-
-From: Sean Christopherson <seanjc@google.com>
-
-[ Upstream commit 3be29eb7b5251a772e2033761a9b67981fdfb0f7 ]
-
-Return -ENOMEM back to userspace if allocating the Hyper-V vCPU struct
-fails when enabling Hyper-V in guest CPUID. Silently ignoring failure
-means that KVM will not have an up-to-date CPUID cache if allocating the
-struct succeeds later on, e.g. when activating SynIC.
-
-Rejecting the CPUID operation also guarantess that vcpu->arch.hyperv is
-non-NULL if hyperv_enabled is true, which will allow for additional
-cleanup, e.g. in the eVMCS code.
-
-Note, the initialization needs to be done before CPUID is set, and more
-subtly before kvm_check_cpuid(), which potentially enables dynamic
-XFEATURES. Sadly, there's no easy way to avoid exposing Hyper-V details
-to CPUID or vice versa. Expose kvm_hv_vcpu_init() and the Hyper-V CPUID
-signature to CPUID instead of exposing cpuid_entry2_find() outside of
-CPUID code. It's hard to envision kvm_hv_vcpu_init() being misused,
-whereas cpuid_entry2_find() absolutely shouldn't be used outside of core
-CPUID code.
-
-Fixes: 10d7bf1e46dc ("KVM: x86: hyper-v: Cache guest CPUID leaves determining features availability")
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Link: https://lore.kernel.org/r/20220830133737.1539624-6-vkuznets@redhat.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/cpuid.c | 18 +++++++++++++++++-
- arch/x86/kvm/hyperv.c | 30 ++++++++++++++----------------
- arch/x86/kvm/hyperv.h | 6 +++++-
- 3 files changed, 36 insertions(+), 18 deletions(-)
-
-diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
-index 2796dde06302..7065462378e2 100644
---- a/arch/x86/kvm/cpuid.c
-+++ b/arch/x86/kvm/cpuid.c
-@@ -311,6 +311,15 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
- }
- EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
-
-+static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
-+{
-+ struct kvm_cpuid_entry2 *entry;
-+
-+ entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE,
-+ KVM_CPUID_INDEX_NOT_SIGNIFICANT);
-+ return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX;
-+}
-+
- static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
- {
- struct kvm_lapic *apic = vcpu->arch.apic;
-@@ -346,7 +355,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
- vcpu->arch.cr4_guest_rsvd_bits =
- __cr4_reserved_bits(guest_cpuid_has, vcpu);
-
-- kvm_hv_set_cpuid(vcpu);
-+ kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu->arch.cpuid_entries,
-+ vcpu->arch.cpuid_nent));
-
- /* Invoke the vendor callback only after the above state is updated. */
- static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
-@@ -409,6 +419,12 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
- return 0;
- }
-
-+ if (kvm_cpuid_has_hyperv(e2, nent)) {
-+ r = kvm_hv_vcpu_init(vcpu);
-+ if (r)
-+ return r;
-+ }
-+
- r = kvm_check_cpuid(vcpu, e2, nent);
- if (r)
- return r;
-diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
-index 8aadd31ed058..bf4729e8cc80 100644
---- a/arch/x86/kvm/hyperv.c
-+++ b/arch/x86/kvm/hyperv.c
-@@ -38,9 +38,6 @@
- #include "irq.h"
- #include "fpu.h"
-
--/* "Hv#1" signature */
--#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
--
- #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
-
- static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
-@@ -934,7 +931,7 @@ static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
- stimer_prepare_msg(stimer);
- }
-
--static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
-+int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
- {
- struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
- int i;
-@@ -1984,26 +1981,27 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
- return HV_STATUS_SUCCESS;
- }
-
--void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
-+void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled)
- {
-+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
- struct kvm_cpuid_entry2 *entry;
-- struct kvm_vcpu_hv *hv_vcpu;
-
-- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE);
-- if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
-- vcpu->arch.hyperv_enabled = true;
-- } else {
-- vcpu->arch.hyperv_enabled = false;
-- return;
-- }
-+ vcpu->arch.hyperv_enabled = hyperv_enabled;
-
-- if (kvm_hv_vcpu_init(vcpu))
-+ if (!hv_vcpu) {
-+ /*
-+ * KVM should have already allocated kvm_vcpu_hv if Hyper-V is
-+ * enabled in CPUID.
-+ */
-+ WARN_ON_ONCE(vcpu->arch.hyperv_enabled);
- return;
--
-- hv_vcpu = to_hv_vcpu(vcpu);
-+ }
-
- memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache));
-
-+ if (!vcpu->arch.hyperv_enabled)
-+ return;
-+
- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
- if (entry) {
- hv_vcpu->cpuid_cache.features_eax = entry->eax;
-diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
-index da2737f2a956..1030b1b50552 100644
---- a/arch/x86/kvm/hyperv.h
-+++ b/arch/x86/kvm/hyperv.h
-@@ -23,6 +23,9 @@
-
- #include <linux/kvm_host.h>
-
-+/* "Hv#1" signature */
-+#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
-+
- /*
- * The #defines related to the synthetic debugger are required by KDNet, but
- * they are not documented in the Hyper-V TLFS because the synthetic debugger
-@@ -141,7 +144,8 @@ void kvm_hv_request_tsc_page_update(struct kvm *kvm);
-
- void kvm_hv_init_vm(struct kvm *kvm);
- void kvm_hv_destroy_vm(struct kvm *kvm);
--void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu);
-+int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
-+void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled);
- int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce);
- int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
- int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
---
-2.35.1
-
+++ /dev/null
-From 062c933fa4de64d23cc794af74f175ad605bc167 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 30 Aug 2022 15:37:07 +0200
-Subject: KVM: x86: Zero out entire Hyper-V CPUID cache before processing
- entries
-
-From: Vitaly Kuznetsov <vkuznets@redhat.com>
-
-[ Upstream commit ce2196b831b1e9f8982b2904fc3e8658cc0e6573 ]
-
-Wipe the whole 'hv_vcpu->cpuid_cache' with memset() instead of having to
-zero each particular member when the corresponding CPUID entry was not
-found.
-
-No functional change intended.
-
-Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
-[sean: split to separate patch]
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Reviewed-by: Wei Liu <wei.liu@kernel.org>
-Link: https://lore.kernel.org/r/20220830133737.1539624-4-vkuznets@redhat.com
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Stable-dep-of: 3be29eb7b525 ("KVM: x86: Report error when setting CPUID if Hyper-V allocation fails")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/kvm/hyperv.c | 11 ++---------
- 1 file changed, 2 insertions(+), 9 deletions(-)
-
-diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
-index ed804447589c..611c349a08bf 100644
---- a/arch/x86/kvm/hyperv.c
-+++ b/arch/x86/kvm/hyperv.c
-@@ -2005,31 +2005,24 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
-
- hv_vcpu = to_hv_vcpu(vcpu);
-
-+ memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache));
-+
- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
- if (entry) {
- hv_vcpu->cpuid_cache.features_eax = entry->eax;
- hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
- hv_vcpu->cpuid_cache.features_edx = entry->edx;
-- } else {
-- hv_vcpu->cpuid_cache.features_eax = 0;
-- hv_vcpu->cpuid_cache.features_ebx = 0;
-- hv_vcpu->cpuid_cache.features_edx = 0;
- }
-
- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
- if (entry) {
- hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
- hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
-- } else {
-- hv_vcpu->cpuid_cache.enlightenments_eax = 0;
-- hv_vcpu->cpuid_cache.enlightenments_ebx = 0;
- }
-
- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
- if (entry)
- hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
-- else
-- hv_vcpu->cpuid_cache.syndbg_cap_eax = 0;
- }
-
- int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce)
---
-2.35.1
-
+++ /dev/null
-kvm-x86-mmu-fix-memoryleak-in-kvm_mmu_vendor_module_.patch
-kvm-x86-do-proper-cleanup-if-kvm_x86_ops-vm_init-fai.patch
-kvm-fix-memoryleak-in-kvm_init.patch
-kvm-x86-zero-out-entire-hyper-v-cpuid-cache-before-p.patch
-kvm-x86-check-for-existing-hyper-v-vcpu-in-kvm_hv_vc.patch
-kvm-x86-report-error-when-setting-cpuid-if-hyper-v-a.patch
-kvm-nvmx-treat-general-detect-db-dr7.gd-1-as-fault-l.patch
-kvm-nvmx-prioritize-tss-t-flag-dbs-over-monitor-trap.patch
-kvm-nvmx-ignore-sipi-that-arrives-in-l2-when-vcpu-is.patch
-kvm-vmx-inject-pf-on-encls-as-emulated-pf.patch
-kvm-nvmx-unconditionally-clear-mtf_pending-on-nested.patch
-kvm-x86-make-kvm_queued_exception-a-properly-named-v.patch
-kvm-x86-formalize-blocking-of-nested-pending-excepti.patch
-kvm-x86-hoist-nested-event-checks-above-event-inject.patch
-kvm-x86-evaluate-ability-to-inject-smi-nmi-irq-after.patch
-kvm-nvmx-add-a-helper-to-identify-low-priority-db-tr.patch
-kvm-x86-morph-pending-exceptions-to-pending-vm-exits.patch
-kvm-ppc-book3s-hv-fix-decrementer-migration.patch
-kvm-ppc-book3s-hv-p9-fix-irq-disabling-in-tick-accou.patch
-kvm-ppc-book3s-hv-p9-clear-vcpu-cpu-fields-before-en.patch
-kvm-ppc-book3s-hv-p9-restore-stolen-time-logging-in-.patch