From: Greg Kroah-Hartman Date: Thu, 27 Oct 2022 14:54:41 +0000 (+0200) Subject: drop kvm-6.0 directory. X-Git-Tag: v5.10.151~12 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=2c4031df59239848032d95433cb9f8dd1eb50144;p=thirdparty%2Fkernel%2Fstable-queue.git drop kvm-6.0 directory. not needed anymore. --- diff --git a/kvm-6.0/kvm-fix-memoryleak-in-kvm_init.patch b/kvm-6.0/kvm-fix-memoryleak-in-kvm_init.patch deleted file mode 100644 index 875f7c3c184..00000000000 --- a/kvm-6.0/kvm-fix-memoryleak-in-kvm_init.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 6df6ee6aa80c0ffb1f45001da2e5e20f45440c03 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 23 Aug 2022 14:34:14 +0800 -Subject: KVM: fix memoryleak in kvm_init() - -From: Miaohe Lin - -[ Upstream commit 5a2a961be2ad6a16eb388a80442443b353c11d16 ] - -When alloc_cpumask_var_node() fails for a certain cpu, there might be some -allocated cpumasks for percpu cpu_kick_mask. We should free these cpumasks -or memoryleak will occur. - -Fixes: baff59ccdc65 ("KVM: Pre-allocate cpumasks for kvm_make_all_cpus_request_except()") -Signed-off-by: Miaohe Lin -Link: https://lore.kernel.org/r/20220823063414.59778-1-linmiaohe@huawei.com -Signed-off-by: Sean Christopherson -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - virt/kvm/kvm_main.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 584a5bab3af3..dcf47da44844 100644 ---- a/virt/kvm/kvm_main.c -+++ b/virt/kvm/kvm_main.c -@@ -5881,7 +5881,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, - - r = kvm_async_pf_init(); - if (r) -- goto out_free_5; -+ goto out_free_4; - - kvm_chardev_ops.owner = module; - -@@ -5905,10 +5905,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, - - out_unreg: - kvm_async_pf_deinit(); --out_free_5: -+out_free_4: - for_each_possible_cpu(cpu) - free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); --out_free_4: - kmem_cache_destroy(kvm_vcpu_cache); - out_free_3: - unregister_reboot_notifier(&kvm_reboot_notifier); --- -2.35.1 - diff --git a/kvm-6.0/kvm-nvmx-add-a-helper-to-identify-low-priority-db-tr.patch b/kvm-6.0/kvm-nvmx-add-a-helper-to-identify-low-priority-db-tr.patch deleted file mode 100644 index 635d46ee951..00000000000 --- a/kvm-6.0/kvm-nvmx-add-a-helper-to-identify-low-priority-db-tr.patch +++ /dev/null @@ -1,84 +0,0 @@ -From a5026653be6ccf7dff187ec87d628ef4ba5d89c5 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:16:06 +0000 -Subject: KVM: nVMX: Add a helper to identify low-priority #DB traps - -From: Sean Christopherson - -[ Upstream commit 2b384165f4d15540f94998b751f50058642ad110 ] - -Add a helper to identify "low"-priority #DB traps, i.e. trap-like #DBs -that aren't TSS T flag #DBs, and tweak the related code to operate on any -queued exception. A future commit will separate exceptions that are -intercepted by L1, i.e. cause nested VM-Exit, from those that do NOT -trigger nested VM-Exit. I.e. there will be multiple exception structs -and multiple invocations of the helpers. - -No functional change intended. - -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-20-seanjc@google.com -Signed-off-by: Paolo Bonzini -Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time") -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/vmx/nested.c | 23 +++++++++++++++++------ - 1 file changed, 17 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index 7655b5acbbcd..dfd5e13e5202 100644 ---- a/arch/x86/kvm/vmx/nested.c -+++ b/arch/x86/kvm/vmx/nested.c -@@ -3871,14 +3871,24 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, - * from the emulator (because such #DBs are fault-like and thus don't trigger - * actions that fire on instruction retire). - */ --static inline unsigned long vmx_get_pending_dbg_trap(struct kvm_vcpu *vcpu) -+static unsigned long vmx_get_pending_dbg_trap(struct kvm_queued_exception *ex) - { -- if (!vcpu->arch.exception.pending || -- vcpu->arch.exception.vector != DB_VECTOR) -+ if (!ex->pending || ex->vector != DB_VECTOR) - return 0; - - /* General Detect #DBs are always fault-like. */ -- return vcpu->arch.exception.payload & ~DR6_BD; -+ return ex->payload & ~DR6_BD; -+} -+ -+/* -+ * Returns true if there's a pending #DB exception that is lower priority than -+ * a pending Monitor Trap Flag VM-Exit. TSS T-flag #DBs are not emulated by -+ * KVM, but could theoretically be injected by userspace. Note, this code is -+ * imperfect, see above. -+ */ -+static bool vmx_is_low_priority_db_trap(struct kvm_queued_exception *ex) -+{ -+ return vmx_get_pending_dbg_trap(ex) & ~DR6_BT; - } - - /* -@@ -3890,8 +3900,9 @@ static inline unsigned long vmx_get_pending_dbg_trap(struct kvm_vcpu *vcpu) - */ - static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) - { -- unsigned long pending_dbg = vmx_get_pending_dbg_trap(vcpu); -+ unsigned long pending_dbg; - -+ pending_dbg = vmx_get_pending_dbg_trap(&vcpu->arch.exception); - if (pending_dbg) - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, pending_dbg); - } -@@ -3961,7 +3972,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - * prioritize SMI over MTF and trap-like #DBs. - */ - if (vcpu->arch.exception.pending && -- !(vmx_get_pending_dbg_trap(vcpu) & ~DR6_BT)) { -+ !vmx_is_low_priority_db_trap(&vcpu->arch.exception)) { - if (block_nested_exceptions) - return -EBUSY; - if (!nested_vmx_check_exception(vcpu, &exit_qual)) --- -2.35.1 - diff --git a/kvm-6.0/kvm-nvmx-ignore-sipi-that-arrives-in-l2-when-vcpu-is.patch b/kvm-6.0/kvm-nvmx-ignore-sipi-that-arrives-in-l2-when-vcpu-is.patch deleted file mode 100644 index a85c31b74ac..00000000000 --- a/kvm-6.0/kvm-nvmx-ignore-sipi-that-arrives-in-l2-when-vcpu-is.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 48c98fb5bd6fccc2492ce5f3b5e0e24ddd705383 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:15:57 +0000 -Subject: KVM: nVMX: Ignore SIPI that arrives in L2 when vCPU is not in WFS - -From: Sean Christopherson - -[ Upstream commit c2086eca86585bfd8132dd91e802497a202185c8 ] - -Fall through to handling other pending exception/events for L2 if SIPI -is pending while the CPU is not in Wait-for-SIPI. KVM correctly ignores -the event, but incorrectly returns immediately, e.g. a SIPI coincident -with another event could lead to KVM incorrectly routing the event to L1 -instead of L2. - -Fixes: bf0cd88ce363 ("KVM: x86: emulate wait-for-SIPI and SIPI-VMExit") -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-11-seanjc@google.com -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/vmx/nested.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index 93c34841e51e..c06c25fb9cbe 100644 ---- a/arch/x86/kvm/vmx/nested.c -+++ b/arch/x86/kvm/vmx/nested.c -@@ -3937,10 +3937,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - return -EBUSY; - - clear_bit(KVM_APIC_SIPI, &apic->pending_events); -- if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) -+ if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { - nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0, - apic->sipi_vector & 0xFFUL); -- return 0; -+ return 0; -+ } -+ /* Fallthrough, the SIPI is completely ignored. */ - } - - /* --- -2.35.1 - diff --git a/kvm-6.0/kvm-nvmx-prioritize-tss-t-flag-dbs-over-monitor-trap.patch b/kvm-6.0/kvm-nvmx-prioritize-tss-t-flag-dbs-over-monitor-trap.patch deleted file mode 100644 index fa223ff6304..00000000000 --- a/kvm-6.0/kvm-nvmx-prioritize-tss-t-flag-dbs-over-monitor-trap.patch +++ /dev/null @@ -1,58 +0,0 @@ -From ce3c7cd86f3efdea63bb63bad636972751fe18fa Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:15:54 +0000 -Subject: KVM: nVMX: Prioritize TSS T-flag #DBs over Monitor Trap Flag - -From: Sean Christopherson - -[ Upstream commit b9d44f9091ac6c325fc2f7b7671b462fb36abbed ] - -Service TSS T-flag #DBs prior to pending MTFs, as such #DBs are higher -priority than MTF. KVM itself doesn't emulate TSS #DBs, and any such -exceptions injected from L1 will be handled by hardware (or morphed to -a fault-like exception if injection fails), but theoretically userspace -could pend a TSS T-flag #DB in conjunction with a pending MTF. - -Note, there's no known use case this fixes, it's purely to be technically -correct with respect to Intel's SDM. - -Cc: Oliver Upton -Cc: Peter Shier -Fixes: 5ef8acbdd687 ("KVM: nVMX: Emulate MTF when performing instruction emulation") -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-8-seanjc@google.com -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/vmx/nested.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index 4b96b5a25ba5..93c34841e51e 100644 ---- a/arch/x86/kvm/vmx/nested.c -+++ b/arch/x86/kvm/vmx/nested.c -@@ -3944,15 +3944,17 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - } - - /* -- * Process any exceptions that are not debug traps before MTF. -+ * Process exceptions that are higher priority than Monitor Trap Flag: -+ * fault-like exceptions, TSS T flag #DB (not emulated by KVM, but -+ * could theoretically come in from userspace), and ICEBP (INT1). - * - * Note that only a pending nested run can block a pending exception. - * Otherwise an injected NMI/interrupt should either be - * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO, - * while delivering the pending exception. - */ -- -- if (vcpu->arch.exception.pending && !vmx_get_pending_dbg_trap(vcpu)) { -+ if (vcpu->arch.exception.pending && -+ !(vmx_get_pending_dbg_trap(vcpu) & ~DR6_BT)) { - if (vmx->nested.nested_run_pending) - return -EBUSY; - if (!nested_vmx_check_exception(vcpu, &exit_qual)) --- -2.35.1 - diff --git a/kvm-6.0/kvm-nvmx-treat-general-detect-db-dr7.gd-1-as-fault-l.patch b/kvm-6.0/kvm-nvmx-treat-general-detect-db-dr7.gd-1-as-fault-l.patch deleted file mode 100644 index cbe40b353af..00000000000 --- a/kvm-6.0/kvm-nvmx-treat-general-detect-db-dr7.gd-1-as-fault-l.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 74aca2738bdc8deee250f0cdc6dd4959c9dfac3f Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:15:53 +0000 -Subject: KVM: nVMX: Treat General Detect #DB (DR7.GD=1) as fault-like - -From: Sean Christopherson - -[ Upstream commit 8d178f460772ecdee8e6d72389b43a8d35a14ff5 ] - -Exclude General Detect #DBs, which have fault-like behavior but also have -a non-zero payload (DR6.BD=1), from nVMX's handling of pending debug -traps. Opportunistically rewrite the comment to better document what is -being checked, i.e. "has a non-zero payload" vs. "has a payload", and to -call out the many caveats surrounding #DBs that KVM dodges one way or -another. - -Cc: Oliver Upton -Cc: Peter Shier -Fixes: 684c0422da71 ("KVM: nVMX: Handle pending #DB when injecting INIT VM-exit") -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-7-seanjc@google.com -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/vmx/nested.c | 36 +++++++++++++++++++++++++----------- - 1 file changed, 25 insertions(+), 11 deletions(-) - -diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index 03d348fa6485..4b96b5a25ba5 100644 ---- a/arch/x86/kvm/vmx/nested.c -+++ b/arch/x86/kvm/vmx/nested.c -@@ -3858,16 +3858,29 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, - } - - /* -- * Returns true if a debug trap is pending delivery. -+ * Returns true if a debug trap is (likely) pending delivery. Infer the class -+ * of a #DB (trap-like vs. fault-like) from the exception payload (to-be-DR6). -+ * Using the payload is flawed because code breakpoints (fault-like) and data -+ * breakpoints (trap-like) set the same bits in DR6 (breakpoint detected), i.e. -+ * this will return false positives if a to-be-injected code breakpoint #DB is -+ * pending (from KVM's perspective, but not "pending" across an instruction -+ * boundary). ICEBP, a.k.a. INT1, is also not reflected here even though it -+ * too is trap-like. - * -- * In KVM, debug traps bear an exception payload. As such, the class of a #DB -- * exception may be inferred from the presence of an exception payload. -+ * KVM "works" despite these flaws as ICEBP isn't currently supported by the -+ * emulator, Monitor Trap Flag is not marked pending on intercepted #DBs (the -+ * #DB has already happened), and MTF isn't marked pending on code breakpoints -+ * from the emulator (because such #DBs are fault-like and thus don't trigger -+ * actions that fire on instruction retire). - */ --static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu) -+static inline unsigned long vmx_get_pending_dbg_trap(struct kvm_vcpu *vcpu) - { -- return vcpu->arch.exception.pending && -- vcpu->arch.exception.nr == DB_VECTOR && -- vcpu->arch.exception.payload; -+ if (!vcpu->arch.exception.pending || -+ vcpu->arch.exception.nr != DB_VECTOR) -+ return 0; -+ -+ /* General Detect #DBs are always fault-like. */ -+ return vcpu->arch.exception.payload & ~DR6_BD; - } - - /* -@@ -3879,9 +3892,10 @@ static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu) - */ - static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) - { -- if (vmx_pending_dbg_trap(vcpu)) -- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, -- vcpu->arch.exception.payload); -+ unsigned long pending_dbg = vmx_get_pending_dbg_trap(vcpu); -+ -+ if (pending_dbg) -+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, pending_dbg); - } - - static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) -@@ -3938,7 +3952,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - * while delivering the pending exception. - */ - -- if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) { -+ if (vcpu->arch.exception.pending && !vmx_get_pending_dbg_trap(vcpu)) { - if (vmx->nested.nested_run_pending) - return -EBUSY; - if (!nested_vmx_check_exception(vcpu, &exit_qual)) --- -2.35.1 - diff --git a/kvm-6.0/kvm-nvmx-unconditionally-clear-mtf_pending-on-nested.patch b/kvm-6.0/kvm-nvmx-unconditionally-clear-mtf_pending-on-nested.patch deleted file mode 100644 index 891d74e5b46..00000000000 --- a/kvm-6.0/kvm-nvmx-unconditionally-clear-mtf_pending-on-nested.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 9960eda0640025a4b547fa5ca741bbb2ac8dc0c4 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:15:58 +0000 -Subject: KVM: nVMX: Unconditionally clear mtf_pending on nested VM-Exit - -From: Sean Christopherson - -[ Upstream commit 593a5c2e3c12a2f65967739267093255c47e9fe0 ] - -Clear mtf_pending on nested VM-Exit instead of handling the clear on a -case-by-case basis in vmx_check_nested_events(). The pending MTF should -never survive nested VM-Exit, as it is a property of KVM's run of the -current L2, i.e. should never affect the next L2 run by L1. In practice, -this is likely a nop as getting to L1 with nested_run_pending is -impossible, and KVM doesn't correctly handle morphing a pending exception -that occurs on a prior injected exception (need for re-injected exception -being the other case where MTF isn't cleared). However, KVM will -hopefully soon correctly deal with a pending exception on top of an -injected exception. - -Add a TODO to document that KVM has an inversion priority bug between -SMIs and MTF (and trap-like #DBS), and that KVM also doesn't properly -save/restore MTF across SMI/RSM. - -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-12-seanjc@google.com -Signed-off-by: Paolo Bonzini -Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time") -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/vmx/nested.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index c06c25fb9cbe..0aa40ea496a8 100644 ---- a/arch/x86/kvm/vmx/nested.c -+++ b/arch/x86/kvm/vmx/nested.c -@@ -3910,16 +3910,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - unsigned long exit_qual; - bool block_nested_events = - vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); -- bool mtf_pending = vmx->nested.mtf_pending; - struct kvm_lapic *apic = vcpu->arch.apic; - -- /* -- * Clear the MTF state. If a higher priority VM-exit is delivered first, -- * this state is discarded. -- */ -- if (!block_nested_events) -- vmx->nested.mtf_pending = false; -- - if (lapic_in_kernel(vcpu) && - test_bit(KVM_APIC_INIT, &apic->pending_events)) { - if (block_nested_events) -@@ -3928,6 +3920,9 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - clear_bit(KVM_APIC_INIT, &apic->pending_events); - if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED) - nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); -+ -+ /* MTF is discarded if the vCPU is in WFS. */ -+ vmx->nested.mtf_pending = false; - return 0; - } - -@@ -3950,6 +3945,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - * fault-like exceptions, TSS T flag #DB (not emulated by KVM, but - * could theoretically come in from userspace), and ICEBP (INT1). - * -+ * TODO: SMIs have higher priority than MTF and trap-like #DBs (except -+ * for TSS T flag #DBs). KVM also doesn't save/restore pending MTF -+ * across SMI/RSM as it should; that needs to be addressed in order to -+ * prioritize SMI over MTF and trap-like #DBs. -+ * - * Note that only a pending nested run can block a pending exception. - * Otherwise an injected NMI/interrupt should either be - * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO, -@@ -3965,7 +3965,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - return 0; - } - -- if (mtf_pending) { -+ if (vmx->nested.mtf_pending) { - if (block_nested_events) - return -EBUSY; - nested_vmx_update_pending_dbg(vcpu); -@@ -4562,6 +4562,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - -+ /* Pending MTF traps are discarded on VM-Exit. */ -+ vmx->nested.mtf_pending = false; -+ - /* trying to cancel vmlaunch/vmresume is a bug */ - WARN_ON_ONCE(vmx->nested.nested_run_pending); - --- -2.35.1 - diff --git a/kvm-6.0/kvm-ppc-book3s-hv-fix-decrementer-migration.patch b/kvm-6.0/kvm-ppc-book3s-hv-fix-decrementer-migration.patch deleted file mode 100644 index 758501be885..00000000000 --- a/kvm-6.0/kvm-ppc-book3s-hv-fix-decrementer-migration.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 890cdfadae6607769ae9504dec90df8e4cb95943 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 16 Aug 2022 19:25:17 -0300 -Subject: KVM: PPC: Book3S HV: Fix decrementer migration - -From: Fabiano Rosas - -[ Upstream commit 0a5bfb824a6ea35e54b7e5ac6f881beea5e309d2 ] - -We used to have a workaround[1] for a hang during migration that was -made ineffective when we converted the decrementer expiry to be -relative to guest timebase. - -The point of the workaround was that in the absence of an explicit -decrementer expiry value provided by userspace during migration, KVM -needs to initialize dec_expires to a value that will result in an -expired decrementer after subtracting the current guest timebase. That -stops the vcpu from hanging after migration due to a decrementer -that's too large. - -If the dec_expires is now relative to guest timebase, its -initialization needs to be guest timebase-relative as well, otherwise -we end up with a decrementer expiry that is still larger than the -guest timebase. - -1- https://git.kernel.org/torvalds/c/5855564c8ab2 - -Fixes: 3c1a4322bba7 ("KVM: PPC: Book3S HV: Change dec_expires to be relative to guest timebase") -Signed-off-by: Fabiano Rosas -Signed-off-by: Michael Ellerman -Link: https://lore.kernel.org/r/20220816222517.1916391-1-farosas@linux.ibm.com -Signed-off-by: Sasha Levin ---- - arch/powerpc/kvm/book3s_hv.c | 18 ++++++++++++++++-- - arch/powerpc/kvm/powerpc.c | 1 - - 2 files changed, 16 insertions(+), 3 deletions(-) - -diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c -index 57d0835e56fd..917abda9e5ce 100644 ---- a/arch/powerpc/kvm/book3s_hv.c -+++ b/arch/powerpc/kvm/book3s_hv.c -@@ -2517,10 +2517,24 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, - r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); - break; - case KVM_REG_PPC_TB_OFFSET: -+ { - /* round up to multiple of 2^24 */ -- vcpu->arch.vcore->tb_offset = -- ALIGN(set_reg_val(id, *val), 1UL << 24); -+ u64 tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24); -+ -+ /* -+ * Now that we know the timebase offset, update the -+ * decrementer expiry with a guest timebase value. If -+ * the userspace does not set DEC_EXPIRY, this ensures -+ * a migrated vcpu at least starts with an expired -+ * decrementer, which is better than a large one that -+ * causes a hang. -+ */ -+ if (!vcpu->arch.dec_expires && tb_offset) -+ vcpu->arch.dec_expires = get_tb() + tb_offset; -+ -+ vcpu->arch.vcore->tb_offset = tb_offset; - break; -+ } - case KVM_REG_PPC_LPCR: - kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true); - break; -diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c -index fb1490761c87..757491dd6b7b 100644 ---- a/arch/powerpc/kvm/powerpc.c -+++ b/arch/powerpc/kvm/powerpc.c -@@ -786,7 +786,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) - - hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; -- vcpu->arch.dec_expires = get_tb(); - - #ifdef CONFIG_KVM_EXIT_TIMING - mutex_init(&vcpu->arch.exit_timing_lock); --- -2.35.1 - diff --git a/kvm-6.0/kvm-ppc-book3s-hv-p9-clear-vcpu-cpu-fields-before-en.patch b/kvm-6.0/kvm-ppc-book3s-hv-p9-clear-vcpu-cpu-fields-before-en.patch deleted file mode 100644 index 654025b8bd7..00000000000 --- a/kvm-6.0/kvm-ppc-book3s-hv-p9-clear-vcpu-cpu-fields-before-en.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 3f7b9afa2b8e5b655f45b72f75eadd94a2ece613 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 8 Sep 2022 23:25:41 +1000 -Subject: KVM: PPC: Book3S HV P9: Clear vcpu cpu fields before enabling host - irqs - -From: Nicholas Piggin - -[ Upstream commit bc91c04bfff7cdf676011b97bb21b2861d7b21c9 ] - -On guest entry, vcpu->cpu and vcpu->arch.thread_cpu are set after -disabling host irqs. On guest exit there is a window whre tick time -accounting briefly enables irqs before these fields are cleared. - -Move them up to ensure they are cleared before host irqs are run. -This is possibly not a problem, but is more symmetric and makes the -fields less surprising. - -Signed-off-by: Nicholas Piggin -Signed-off-by: Michael Ellerman -Link: https://lore.kernel.org/r/20220908132545.4085849-1-npiggin@gmail.com -Stable-dep-of: 1a5486b3c351 ("KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl") -Signed-off-by: Sasha Levin ---- - arch/powerpc/kvm/book3s_hv.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c -index d72df696837d..0f8dee657336 100644 ---- a/arch/powerpc/kvm/book3s_hv.c -+++ b/arch/powerpc/kvm/book3s_hv.c -@@ -4629,6 +4629,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - - set_irq_happened(trap); - -+ vcpu->cpu = -1; -+ vcpu->arch.thread_cpu = -1; -+ - context_tracking_guest_exit(); - if (!vtime_accounting_enabled_this_cpu()) { - powerpc_local_irq_pmu_restore(flags); -@@ -4644,9 +4647,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - } - vtime_account_guest_exit(); - -- vcpu->cpu = -1; -- vcpu->arch.thread_cpu = -1; -- - powerpc_local_irq_pmu_restore(flags); - - preempt_enable(); --- -2.35.1 - diff --git a/kvm-6.0/kvm-ppc-book3s-hv-p9-fix-irq-disabling-in-tick-accou.patch b/kvm-6.0/kvm-ppc-book3s-hv-p9-fix-irq-disabling-in-tick-accou.patch deleted file mode 100644 index 85f27bcf468..00000000000 --- a/kvm-6.0/kvm-ppc-book3s-hv-p9-fix-irq-disabling-in-tick-accou.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 5d52e92c4206869cc55d9336d147dc0ed9e6b857 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 8 Sep 2022 23:25:42 +1000 -Subject: KVM: PPC: Book3S HV P9: Fix irq disabling in tick accounting - -From: Nicholas Piggin - -[ Upstream commit c953f7500b65f2b157d1eb468ca8b86328834cce ] - -kvmhv_run_single_vcpu() disables PMIs as well as Linux irqs, -however the tick time accounting code enables and disables irqs and -not PMIs within this region. By chance this might not actually cause -a bug, but it is clearly an incorrect use of the APIs. - -Fixes: 2251fbe76395e ("KVM: PPC: Book3S HV P9: Improve mtmsrd scheduling by delaying MSR[EE] disable") -Signed-off-by: Nicholas Piggin -Signed-off-by: Michael Ellerman -Link: https://lore.kernel.org/r/20220908132545.4085849-2-npiggin@gmail.com -Signed-off-by: Sasha Levin ---- - arch/powerpc/kvm/book3s_hv.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c -index 917abda9e5ce..d72df696837d 100644 ---- a/arch/powerpc/kvm/book3s_hv.c -+++ b/arch/powerpc/kvm/book3s_hv.c -@@ -4631,7 +4631,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - - context_tracking_guest_exit(); - if (!vtime_accounting_enabled_this_cpu()) { -- local_irq_enable(); -+ powerpc_local_irq_pmu_restore(flags); - /* - * Service IRQs here before vtime_account_guest_exit() so any - * ticks that occurred while running the guest are accounted to -@@ -4640,7 +4640,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - * interrupts here, which has the problem that it accounts - * interrupt processing overhead to the host. - */ -- local_irq_disable(); -+ powerpc_local_irq_pmu_save(flags); - } - vtime_account_guest_exit(); - --- -2.35.1 - diff --git a/kvm-6.0/kvm-ppc-book3s-hv-p9-restore-stolen-time-logging-in-.patch b/kvm-6.0/kvm-ppc-book3s-hv-p9-restore-stolen-time-logging-in-.patch deleted file mode 100644 index 767a53576a0..00000000000 --- a/kvm-6.0/kvm-ppc-book3s-hv-p9-restore-stolen-time-logging-in-.patch +++ /dev/null @@ -1,150 +0,0 @@ -From afe3395a6c00b7cb77f86640479cda6046f95a6c Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 8 Sep 2022 23:25:44 +1000 -Subject: KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl - -From: Nicholas Piggin - -[ Upstream commit 1a5486b3c3517aa1f608a10003ade4da122cb175 ] - -Stolen time logging in dtl was removed from the P9 path, so guests had -no stolen time accounting. Add it back in a simpler way that still -avoids locks and per-core accounting code. - -Fixes: ecb6a7207f92 ("KVM: PPC: Book3S HV P9: Remove most of the vcore logic") -Signed-off-by: Nicholas Piggin -Signed-off-by: Michael Ellerman -Link: https://lore.kernel.org/r/20220908132545.4085849-4-npiggin@gmail.com -Signed-off-by: Sasha Levin ---- - arch/powerpc/kvm/book3s_hv.c | 49 +++++++++++++++++++++++++++++++++--- - 1 file changed, 45 insertions(+), 4 deletions(-) - -diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c -index 0f8dee657336..2feacb1ee9d9 100644 ---- a/arch/powerpc/kvm/book3s_hv.c -+++ b/arch/powerpc/kvm/book3s_hv.c -@@ -249,6 +249,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) - - /* - * We use the vcpu_load/put functions to measure stolen time. -+ * - * Stolen time is counted as time when either the vcpu is able to - * run as part of a virtual core, but the task running the vcore - * is preempted or sleeping, or when the vcpu needs something done -@@ -278,6 +279,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) - * lock. The stolen times are measured in units of timebase ticks. - * (Note that the != TB_NIL checks below are purely defensive; - * they should never fail.) -+ * -+ * The POWER9 path is simpler, one vcpu per virtual core so the -+ * former case does not exist. If a vcpu is preempted when it is -+ * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate -+ * the stolen cycles in busy_stolen. RUNNING is not a preemptible -+ * state in the P9 path. - */ - - static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb) -@@ -311,8 +318,14 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) - unsigned long flags; - u64 now; - -- if (cpu_has_feature(CPU_FTR_ARCH_300)) -+ if (cpu_has_feature(CPU_FTR_ARCH_300)) { -+ if (vcpu->arch.busy_preempt != TB_NIL) { -+ WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST); -+ vc->stolen_tb += mftb() - vcpu->arch.busy_preempt; -+ vcpu->arch.busy_preempt = TB_NIL; -+ } - return; -+ } - - now = mftb(); - -@@ -340,8 +353,21 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) - unsigned long flags; - u64 now; - -- if (cpu_has_feature(CPU_FTR_ARCH_300)) -+ if (cpu_has_feature(CPU_FTR_ARCH_300)) { -+ /* -+ * In the P9 path, RUNNABLE is not preemptible -+ * (nor takes host interrupts) -+ */ -+ WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE); -+ /* -+ * Account stolen time when preempted while the vcpu task is -+ * running in the kernel (but not in qemu, which is INACTIVE). -+ */ -+ if (task_is_running(current) && -+ vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) -+ vcpu->arch.busy_preempt = mftb(); - return; -+ } - - now = mftb(); - -@@ -740,6 +766,18 @@ static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, - vcpu->arch.dtl.dirty = true; - } - -+static void kvmppc_create_dtl_entry_p9(struct kvm_vcpu *vcpu, -+ struct kvmppc_vcore *vc, -+ u64 now) -+{ -+ unsigned long stolen; -+ -+ stolen = vc->stolen_tb - vcpu->arch.stolen_logged; -+ vcpu->arch.stolen_logged = vc->stolen_tb; -+ -+ __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now, stolen); -+} -+ - static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, - struct kvmppc_vcore *vc) - { -@@ -4534,7 +4572,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - vc = vcpu->arch.vcore; - vcpu->arch.ceded = 0; - vcpu->arch.run_task = current; -- vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; - vcpu->arch.last_inst = KVM_INST_FETCH_FAILED; - - /* See if the MMU is ready to go */ -@@ -4561,6 +4598,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - /* flags save not required, but irq_pmu has no disable/enable API */ - powerpc_local_irq_pmu_save(flags); - -+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; -+ - if (signal_pending(current)) - goto sigpend; - if (need_resched() || !kvm->arch.mmu_ready) -@@ -4605,7 +4644,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - - tb = mftb(); - -- __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); -+ kvmppc_create_dtl_entry_p9(vcpu, vc, tb + vc->tb_offset); - - trace_kvm_guest_enter(vcpu); - -@@ -4631,6 +4670,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - - vcpu->cpu = -1; - vcpu->arch.thread_cpu = -1; -+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; - - context_tracking_guest_exit(); - if (!vtime_accounting_enabled_this_cpu()) { -@@ -4708,6 +4748,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, - out: - vcpu->cpu = -1; - vcpu->arch.thread_cpu = -1; -+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; - powerpc_local_irq_pmu_restore(flags); - preempt_enable(); - goto done; --- -2.35.1 - diff --git a/kvm-6.0/kvm-vmx-inject-pf-on-encls-as-emulated-pf.patch b/kvm-6.0/kvm-vmx-inject-pf-on-encls-as-emulated-pf.patch deleted file mode 100644 index 1488e3f7c7e..00000000000 --- a/kvm-6.0/kvm-vmx-inject-pf-on-encls-as-emulated-pf.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 60973cdaf3fd26815a9f305379f542e6f93b166a Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:15:59 +0000 -Subject: KVM: VMX: Inject #PF on ENCLS as "emulated" #PF - -From: Sean Christopherson - -[ Upstream commit bfcb08a0b9e99b959814a329fabace22c3df046d ] - -Treat #PFs that occur during emulation of ENCLS as, wait for it, emulated -page faults. Practically speaking, this is a glorified nop as the -exception is never of the nested flavor, and it's extremely unlikely the -guest is relying on the side effect of an implicit INVLPG on the faulting -address. - -Fixes: 70210c044b4e ("KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions") -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-13-seanjc@google.com -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/vmx/sgx.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c -index aba8cebdc587..8f95c7c01433 100644 ---- a/arch/x86/kvm/vmx/sgx.c -+++ b/arch/x86/kvm/vmx/sgx.c -@@ -129,7 +129,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) - ex.address = gva; - ex.error_code_valid = true; - ex.nested_page_fault = false; -- kvm_inject_page_fault(vcpu, &ex); -+ kvm_inject_emulated_page_fault(vcpu, &ex); - } else { - kvm_inject_gp(vcpu, 0); - } --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-check-for-existing-hyper-v-vcpu-in-kvm_hv_vc.patch b/kvm-6.0/kvm-x86-check-for-existing-hyper-v-vcpu-in-kvm_hv_vc.patch deleted file mode 100644 index b23938f76c5..00000000000 --- a/kvm-6.0/kvm-x86-check-for-existing-hyper-v-vcpu-in-kvm_hv_vc.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 2af948a8e9becc744b8a2f1d0718d6e255df8b46 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 15:37:08 +0200 -Subject: KVM: x86: Check for existing Hyper-V vCPU in kvm_hv_vcpu_init() - -From: Sean Christopherson - -[ Upstream commit 1cac8d9f6bd25df3713103e44e2d9ca0c2e03c33 ] - -When potentially allocating/initializing the Hyper-V vCPU struct, check -for an existing instance in kvm_hv_vcpu_init() instead of requiring -callers to perform the check. Relying on callers to do the check is -risky as it's all too easy for KVM to overwrite vcpu->arch.hyperv and -leak memory, and it adds additional burden on callers without much -benefit. - -No functional change intended. - -Signed-off-by: Sean Christopherson -Signed-off-by: Vitaly Kuznetsov -Signed-off-by: Sean Christopherson -Reviewed-by: Wei Liu -Link: https://lore.kernel.org/r/20220830133737.1539624-5-vkuznets@redhat.com -Signed-off-by: Paolo Bonzini -Stable-dep-of: 3be29eb7b525 ("KVM: x86: Report error when setting CPUID if Hyper-V allocation fails") -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/hyperv.c | 27 ++++++++++++--------------- - 1 file changed, 12 insertions(+), 15 deletions(-) - -diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c -index 611c349a08bf..8aadd31ed058 100644 ---- a/arch/x86/kvm/hyperv.c -+++ b/arch/x86/kvm/hyperv.c -@@ -936,9 +936,12 @@ static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) - - static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) - { -- struct kvm_vcpu_hv *hv_vcpu; -+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); - int i; - -+ if (hv_vcpu) -+ return 0; -+ - hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT); - if (!hv_vcpu) - return -ENOMEM; -@@ -962,11 +965,9 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) - struct kvm_vcpu_hv_synic *synic; - int r; - -- if (!to_hv_vcpu(vcpu)) { -- r = kvm_hv_vcpu_init(vcpu); -- if (r) -- return r; -- } -+ r = kvm_hv_vcpu_init(vcpu); -+ if (r) -+ return r; - - synic = to_hv_synic(vcpu); - -@@ -1660,10 +1661,8 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) - if (!host && !vcpu->arch.hyperv_enabled) - return 1; - -- if (!to_hv_vcpu(vcpu)) { -- if (kvm_hv_vcpu_init(vcpu)) -- return 1; -- } -+ if (kvm_hv_vcpu_init(vcpu)) -+ return 1; - - if (kvm_hv_msr_partition_wide(msr)) { - int r; -@@ -1683,10 +1682,8 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) - if (!host && !vcpu->arch.hyperv_enabled) - return 1; - -- if (!to_hv_vcpu(vcpu)) { -- if (kvm_hv_vcpu_init(vcpu)) -- return 1; -- } -+ if (kvm_hv_vcpu_init(vcpu)) -+ return 1; - - if (kvm_hv_msr_partition_wide(msr)) { - int r; -@@ -2000,7 +1997,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) - return; - } - -- if (!to_hv_vcpu(vcpu) && kvm_hv_vcpu_init(vcpu)) -+ if (kvm_hv_vcpu_init(vcpu)) - return; - - hv_vcpu = to_hv_vcpu(vcpu); --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-do-proper-cleanup-if-kvm_x86_ops-vm_init-fai.patch b/kvm-6.0/kvm-x86-do-proper-cleanup-if-kvm_x86_ops-vm_init-fai.patch deleted file mode 100644 index c5ec512cb3d..00000000000 --- a/kvm-6.0/kvm-x86-do-proper-cleanup-if-kvm_x86_ops-vm_init-fai.patch +++ /dev/null @@ -1,52 +0,0 @@ -From e1f7f2457b1342553570bfcaeadae1496f75eec1 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 29 Jul 2022 15:43:29 -0700 -Subject: kvm: x86: Do proper cleanup if kvm_x86_ops->vm_init() fails - -From: Junaid Shahid - -[ Upstream commit b24ede22538b4d984cbe20532bbcb303692e7f52 ] - -If vm_init() fails [which can happen, for instance, if a memory -allocation fails during avic_vm_init()], we need to cleanup some -state in order to avoid resource leaks. - -Signed-off-by: Junaid Shahid -Link: https://lore.kernel.org/r/20220729224329.323378-1-junaids@google.com -Signed-off-by: Sean Christopherson -Stable-dep-of: 5a2a961be2ad ("KVM: fix memoryleak in kvm_init()") -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/x86.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index e2435090f225..14cb589683a1 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -12103,6 +12103,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) - if (ret) - goto out_page_track; - -+ ret = static_call(kvm_x86_vm_init)(kvm); -+ if (ret) -+ goto out_uninit_mmu; -+ - INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); - INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); - atomic_set(&kvm->arch.noncoherent_dma_count, 0); -@@ -12138,8 +12142,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) - kvm_hv_init_vm(kvm); - kvm_xen_init_vm(kvm); - -- return static_call(kvm_x86_vm_init)(kvm); -+ return 0; - -+out_uninit_mmu: -+ kvm_mmu_uninit_vm(kvm); - out_page_track: - kvm_page_track_cleanup(kvm); - out: --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-evaluate-ability-to-inject-smi-nmi-irq-after.patch b/kvm-6.0/kvm-x86-evaluate-ability-to-inject-smi-nmi-irq-after.patch deleted file mode 100644 index 0b96b6678e5..00000000000 --- a/kvm-6.0/kvm-x86-evaluate-ability-to-inject-smi-nmi-irq-after.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 37892c242b5293bddc508ec7fa3c598104fc29c7 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:16:05 +0000 -Subject: KVM: x86: Evaluate ability to inject SMI/NMI/IRQ after potential - VM-Exit - -From: Sean Christopherson - -[ Upstream commit 28360f88706837fc3f1ac8944b45b4a630a71c75 ] - -Determine whether or not new events can be injected after checking nested -events. If a VM-Exit occurred during nested event handling, any previous -event that needed re-injection is gone from's KVM perspective; the event -is captured in the vmc*12 VM-Exit information, but doesn't exist in terms -of what needs to be done for entry to L1. - -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-19-seanjc@google.com -Signed-off-by: Paolo Bonzini -Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time") -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/x86.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 15229a5ad9ff..01d59f93d93e 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -9683,7 +9683,7 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) - - static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) - { -- bool can_inject = !kvm_event_needs_reinjection(vcpu); -+ bool can_inject; - int r; - - /* -@@ -9748,7 +9748,13 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) - if (r < 0) - goto out; - -- /* try to inject new event if pending */ -+ /* -+ * New events, other than exceptions, cannot be injected if KVM needs -+ * to re-inject a previous event. See above comments on re-injecting -+ * for why pending exceptions get priority. -+ */ -+ can_inject = !kvm_event_needs_reinjection(vcpu); -+ - if (vcpu->arch.exception.pending) { - /* - * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-formalize-blocking-of-nested-pending-excepti.patch b/kvm-6.0/kvm-x86-formalize-blocking-of-nested-pending-excepti.patch deleted file mode 100644 index 4b9e12576b8..00000000000 --- a/kvm-6.0/kvm-x86-formalize-blocking-of-nested-pending-excepti.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 3b49b279b88de56dc9d042feb7b8bf101a21ea30 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:16:02 +0000 -Subject: KVM: x86: Formalize blocking of nested pending exceptions - -From: Sean Christopherson - -[ Upstream commit 72c14e00bdc445e96045c28d04bba45cbe69cf95 ] - -Capture nested_run_pending as block_pending_exceptions so that the logic -of why exceptions are blocked only needs to be documented once instead of -at every place that employs the logic. - -No functional change intended. - -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-16-seanjc@google.com -Signed-off-by: Paolo Bonzini -Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time") -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/svm/nested.c | 26 ++++++++++++++++---------- - arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++---------- - 2 files changed, 33 insertions(+), 20 deletions(-) - -diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c -index 8f991592d277..a6111392985c 100644 ---- a/arch/x86/kvm/svm/nested.c -+++ b/arch/x86/kvm/svm/nested.c -@@ -1356,10 +1356,22 @@ static inline bool nested_exit_on_init(struct vcpu_svm *svm) - - static int svm_check_nested_events(struct kvm_vcpu *vcpu) - { -- struct vcpu_svm *svm = to_svm(vcpu); -- bool block_nested_events = -- kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending; - struct kvm_lapic *apic = vcpu->arch.apic; -+ struct vcpu_svm *svm = to_svm(vcpu); -+ /* -+ * Only a pending nested run blocks a pending exception. If there is a -+ * previously injected event, the pending exception occurred while said -+ * event was being delivered and thus needs to be handled. -+ */ -+ bool block_nested_exceptions = svm->nested.nested_run_pending; -+ /* -+ * New events (not exceptions) are only recognized at instruction -+ * boundaries. If an event needs reinjection, then KVM is handling a -+ * VM-Exit that occurred _during_ instruction execution; new events are -+ * blocked until the instruction completes. -+ */ -+ bool block_nested_events = block_nested_exceptions || -+ kvm_event_needs_reinjection(vcpu); - - if (lapic_in_kernel(vcpu) && - test_bit(KVM_APIC_INIT, &apic->pending_events)) { -@@ -1372,13 +1384,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) - } - - if (vcpu->arch.exception.pending) { -- /* -- * Only a pending nested run can block a pending exception. -- * Otherwise an injected NMI/interrupt should either be -- * lost or delivered to the nested hypervisor in the EXITINTINFO -- * vmcb field, while delivering the pending exception. -- */ -- if (svm->nested.nested_run_pending) -+ if (block_nested_exceptions) - return -EBUSY; - if (!nested_exit_on_exception(svm)) - return 0; -diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index 83239d47fc0f..7655b5acbbcd 100644 ---- a/arch/x86/kvm/vmx/nested.c -+++ b/arch/x86/kvm/vmx/nested.c -@@ -3904,11 +3904,23 @@ static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) - - static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - { -+ struct kvm_lapic *apic = vcpu->arch.apic; - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long exit_qual; -- bool block_nested_events = -- vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); -- struct kvm_lapic *apic = vcpu->arch.apic; -+ /* -+ * Only a pending nested run blocks a pending exception. If there is a -+ * previously injected event, the pending exception occurred while said -+ * event was being delivered and thus needs to be handled. -+ */ -+ bool block_nested_exceptions = vmx->nested.nested_run_pending; -+ /* -+ * New events (not exceptions) are only recognized at instruction -+ * boundaries. If an event needs reinjection, then KVM is handling a -+ * VM-Exit that occurred _during_ instruction execution; new events are -+ * blocked until the instruction completes. -+ */ -+ bool block_nested_events = block_nested_exceptions || -+ kvm_event_needs_reinjection(vcpu); - - if (lapic_in_kernel(vcpu) && - test_bit(KVM_APIC_INIT, &apic->pending_events)) { -@@ -3947,15 +3959,10 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - * for TSS T flag #DBs). KVM also doesn't save/restore pending MTF - * across SMI/RSM as it should; that needs to be addressed in order to - * prioritize SMI over MTF and trap-like #DBs. -- * -- * Note that only a pending nested run can block a pending exception. -- * Otherwise an injected NMI/interrupt should either be -- * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO, -- * while delivering the pending exception. - */ - if (vcpu->arch.exception.pending && - !(vmx_get_pending_dbg_trap(vcpu) & ~DR6_BT)) { -- if (vmx->nested.nested_run_pending) -+ if (block_nested_exceptions) - return -EBUSY; - if (!nested_vmx_check_exception(vcpu, &exit_qual)) - goto no_vmexit; -@@ -3972,7 +3979,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - } - - if (vcpu->arch.exception.pending) { -- if (vmx->nested.nested_run_pending) -+ if (block_nested_exceptions) - return -EBUSY; - if (!nested_vmx_check_exception(vcpu, &exit_qual)) - goto no_vmexit; --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-hoist-nested-event-checks-above-event-inject.patch b/kvm-6.0/kvm-x86-hoist-nested-event-checks-above-event-inject.patch deleted file mode 100644 index 420a0c2d487..00000000000 --- a/kvm-6.0/kvm-x86-hoist-nested-event-checks-above-event-inject.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 7a5eb73b397d1336923a66280c1a818b1479792e Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:16:04 +0000 -Subject: KVM: x86: Hoist nested event checks above event injection logic - -From: Sean Christopherson - -[ Upstream commit 6c593b5276e6ce411dcdf03e2f7d4b93c2e7138e ] - -Perform nested event checks before re-injecting exceptions/events into -L2. If a pending exception causes VM-Exit to L1, re-injecting events -into vmcs02 is premature and wasted effort. Take care to ensure events -that need to be re-injected are still re-injected if checking for nested -events "fails", i.e. if KVM needs to force an immediate entry+exit to -complete the to-be-re-injecteed event. - -Keep the "can_inject" logic the same for now; it too can be pushed below -the nested checks, but is a slightly riskier change (see past bugs about -events not being properly purged on nested VM-Exit). - -Add and/or modify comments to better document the various interactions. -Of note is the comment regarding "blocking" previously injected NMIs and -IRQs if an exception is pending. The old comment isn't wrong strictly -speaking, but it failed to capture the reason why the logic even exists. - -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-18-seanjc@google.com -Signed-off-by: Paolo Bonzini -Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time") -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/x86.c | 89 +++++++++++++++++++++++++++------------------- - 1 file changed, 53 insertions(+), 36 deletions(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 14182b5b2c93..15229a5ad9ff 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -9683,53 +9683,70 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) - - static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) - { -+ bool can_inject = !kvm_event_needs_reinjection(vcpu); - int r; -- bool can_inject = true; - -- /* try to reinject previous events if any */ -+ /* -+ * Process nested events first, as nested VM-Exit supercedes event -+ * re-injection. If there's an event queued for re-injection, it will -+ * be saved into the appropriate vmc{b,s}12 fields on nested VM-Exit. -+ */ -+ if (is_guest_mode(vcpu)) -+ r = kvm_check_nested_events(vcpu); -+ else -+ r = 0; - -- if (vcpu->arch.exception.injected) { -- kvm_inject_exception(vcpu); -- can_inject = false; -- } - /* -- * Do not inject an NMI or interrupt if there is a pending -- * exception. Exceptions and interrupts are recognized at -- * instruction boundaries, i.e. the start of an instruction. -- * Trap-like exceptions, e.g. #DB, have higher priority than -- * NMIs and interrupts, i.e. traps are recognized before an -- * NMI/interrupt that's pending on the same instruction. -- * Fault-like exceptions, e.g. #GP and #PF, are the lowest -- * priority, but are only generated (pended) during instruction -- * execution, i.e. a pending fault-like exception means the -- * fault occurred on the *previous* instruction and must be -- * serviced prior to recognizing any new events in order to -- * fully complete the previous instruction. -+ * Re-inject exceptions and events *especially* if immediate entry+exit -+ * to/from L2 is needed, as any event that has already been injected -+ * into L2 needs to complete its lifecycle before injecting a new event. -+ * -+ * Don't re-inject an NMI or interrupt if there is a pending exception. -+ * This collision arises if an exception occurred while vectoring the -+ * injected event, KVM intercepted said exception, and KVM ultimately -+ * determined the fault belongs to the guest and queues the exception -+ * for injection back into the guest. -+ * -+ * "Injected" interrupts can also collide with pending exceptions if -+ * userspace ignores the "ready for injection" flag and blindly queues -+ * an interrupt. In that case, prioritizing the exception is correct, -+ * as the exception "occurred" before the exit to userspace. Trap-like -+ * exceptions, e.g. most #DBs, have higher priority than interrupts. -+ * And while fault-like exceptions, e.g. #GP and #PF, are the lowest -+ * priority, they're only generated (pended) during instruction -+ * execution, and interrupts are recognized at instruction boundaries. -+ * Thus a pending fault-like exception means the fault occurred on the -+ * *previous* instruction and must be serviced prior to recognizing any -+ * new events in order to fully complete the previous instruction. - */ -- else if (!vcpu->arch.exception.pending) { -- if (vcpu->arch.nmi_injected) { -- static_call(kvm_x86_inject_nmi)(vcpu); -- can_inject = false; -- } else if (vcpu->arch.interrupt.injected) { -- static_call(kvm_x86_inject_irq)(vcpu, true); -- can_inject = false; -- } -- } -+ if (vcpu->arch.exception.injected) -+ kvm_inject_exception(vcpu); -+ else if (vcpu->arch.exception.pending) -+ ; /* see above */ -+ else if (vcpu->arch.nmi_injected) -+ static_call(kvm_x86_inject_nmi)(vcpu); -+ else if (vcpu->arch.interrupt.injected) -+ static_call(kvm_x86_inject_irq)(vcpu, true); - -+ /* -+ * Exceptions that morph to VM-Exits are handled above, and pending -+ * exceptions on top of injected exceptions that do not VM-Exit should -+ * either morph to #DF or, sadly, override the injected exception. -+ */ - WARN_ON_ONCE(vcpu->arch.exception.injected && - vcpu->arch.exception.pending); - - /* -- * Call check_nested_events() even if we reinjected a previous event -- * in order for caller to determine if it should require immediate-exit -- * from L2 to L1 due to pending L1 events which require exit -- * from L2 to L1. -+ * Bail if immediate entry+exit to/from the guest is needed to complete -+ * nested VM-Enter or event re-injection so that a different pending -+ * event can be serviced (or if KVM needs to exit to userspace). -+ * -+ * Otherwise, continue processing events even if VM-Exit occurred. The -+ * VM-Exit will have cleared exceptions that were meant for L2, but -+ * there may now be events that can be injected into L1. - */ -- if (is_guest_mode(vcpu)) { -- r = kvm_check_nested_events(vcpu); -- if (r < 0) -- goto out; -- } -+ if (r < 0) -+ goto out; - - /* try to inject new event if pending */ - if (vcpu->arch.exception.pending) { --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-make-kvm_queued_exception-a-properly-named-v.patch b/kvm-6.0/kvm-x86-make-kvm_queued_exception-a-properly-named-v.patch deleted file mode 100644 index 22701e9f97b..00000000000 --- a/kvm-6.0/kvm-x86-make-kvm_queued_exception-a-properly-named-v.patch +++ /dev/null @@ -1,555 +0,0 @@ -From 35646ab067697782bc4fe48ae07c7b0515e6446d Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:16:01 +0000 -Subject: KVM: x86: Make kvm_queued_exception a properly named, visible struct - -From: Sean Christopherson - -[ Upstream commit d4963e319f1f7851a098df6610a27f9f4cf6d42a ] - -Move the definition of "struct kvm_queued_exception" out of kvm_vcpu_arch -in anticipation of adding a second instance in kvm_vcpu_arch to handle -exceptions that occur when vectoring an injected exception and are -morphed to VM-Exit instead of leading to #DF. - -Opportunistically take advantage of the churn to rename "nr" to "vector". - -No functional change intended. - -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-15-seanjc@google.com -Signed-off-by: Paolo Bonzini -Stable-dep-of: 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time") -Signed-off-by: Sasha Levin ---- - arch/x86/include/asm/kvm_host.h | 23 +++++----- - arch/x86/kvm/svm/nested.c | 47 ++++++++++--------- - arch/x86/kvm/svm/svm.c | 14 +++--- - arch/x86/kvm/vmx/nested.c | 42 +++++++++-------- - arch/x86/kvm/vmx/vmx.c | 20 ++++----- - arch/x86/kvm/x86.c | 80 ++++++++++++++++----------------- - arch/x86/kvm/x86.h | 3 +- - 7 files changed, 113 insertions(+), 116 deletions(-) - -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index aa381ab69a19..36e4fde359a7 100644 ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -639,6 +639,17 @@ struct kvm_vcpu_xen { - struct timer_list poll_timer; - }; - -+struct kvm_queued_exception { -+ bool pending; -+ bool injected; -+ bool has_error_code; -+ u8 vector; -+ u32 error_code; -+ unsigned long payload; -+ bool has_payload; -+ u8 nested_apf; -+}; -+ - struct kvm_vcpu_arch { - /* - * rip and regs accesses must go through -@@ -738,16 +749,8 @@ struct kvm_vcpu_arch { - - u8 event_exit_inst_len; - -- struct kvm_queued_exception { -- bool pending; -- bool injected; -- bool has_error_code; -- u8 nr; -- u32 error_code; -- unsigned long payload; -- bool has_payload; -- u8 nested_apf; -- } exception; -+ /* Exceptions to be injected to the guest. */ -+ struct kvm_queued_exception exception; - - struct kvm_queued_interrupt { - bool injected; -diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c -index 76dcc8a3e849..8f991592d277 100644 ---- a/arch/x86/kvm/svm/nested.c -+++ b/arch/x86/kvm/svm/nested.c -@@ -468,7 +468,7 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm, - unsigned int nr; - - if (vcpu->arch.exception.injected) { -- nr = vcpu->arch.exception.nr; -+ nr = vcpu->arch.exception.vector; - exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; - - if (vcpu->arch.exception.has_error_code) { -@@ -1306,42 +1306,45 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu) - - static bool nested_exit_on_exception(struct vcpu_svm *svm) - { -- unsigned int nr = svm->vcpu.arch.exception.nr; -+ unsigned int vector = svm->vcpu.arch.exception.vector; - -- return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr)); -+ return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector)); - } - --static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm) -+static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) - { -- unsigned int nr = svm->vcpu.arch.exception.nr; -+ struct kvm_queued_exception *ex = &vcpu->arch.exception; -+ struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; - -- vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; -+ vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector; - vmcb->control.exit_code_hi = 0; - -- if (svm->vcpu.arch.exception.has_error_code) -- vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code; -+ if (ex->has_error_code) -+ vmcb->control.exit_info_1 = ex->error_code; - - /* - * EXITINFO2 is undefined for all exception intercepts other - * than #PF. - */ -- if (nr == PF_VECTOR) { -- if (svm->vcpu.arch.exception.nested_apf) -- vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; -- else if (svm->vcpu.arch.exception.has_payload) -- vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; -+ if (ex->vector == PF_VECTOR) { -+ if (ex->nested_apf) -+ vmcb->control.exit_info_2 = vcpu->arch.apf.nested_apf_token; -+ else if (ex->has_payload) -+ vmcb->control.exit_info_2 = ex->payload; - else -- vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; -- } else if (nr == DB_VECTOR) { -+ vmcb->control.exit_info_2 = vcpu->arch.cr2; -+ } else if (ex->vector == DB_VECTOR) { - /* See inject_pending_event. */ -- kvm_deliver_exception_payload(&svm->vcpu); -- if (svm->vcpu.arch.dr7 & DR7_GD) { -- svm->vcpu.arch.dr7 &= ~DR7_GD; -- kvm_update_dr7(&svm->vcpu); -+ kvm_deliver_exception_payload(vcpu, ex); -+ -+ if (vcpu->arch.dr7 & DR7_GD) { -+ vcpu->arch.dr7 &= ~DR7_GD; -+ kvm_update_dr7(vcpu); - } -- } else -- WARN_ON(svm->vcpu.arch.exception.has_payload); -+ } else { -+ WARN_ON(ex->has_payload); -+ } - - nested_svm_vmexit(svm); - } -@@ -1379,7 +1382,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) - return -EBUSY; - if (!nested_exit_on_exception(svm)) - return 0; -- nested_svm_inject_exception_vmexit(svm); -+ nested_svm_inject_exception_vmexit(vcpu); - return 0; - } - -diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c -index f3813dbacb9f..b96c091f6c3d 100644 ---- a/arch/x86/kvm/svm/svm.c -+++ b/arch/x86/kvm/svm/svm.c -@@ -463,22 +463,20 @@ static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu) - - static void svm_queue_exception(struct kvm_vcpu *vcpu) - { -+ struct kvm_queued_exception *ex = &vcpu->arch.exception; - struct vcpu_svm *svm = to_svm(vcpu); -- unsigned nr = vcpu->arch.exception.nr; -- bool has_error_code = vcpu->arch.exception.has_error_code; -- u32 error_code = vcpu->arch.exception.error_code; - -- kvm_deliver_exception_payload(vcpu); -+ kvm_deliver_exception_payload(vcpu, ex); - -- if (kvm_exception_is_soft(nr) && -+ if (kvm_exception_is_soft(ex->vector) && - svm_update_soft_interrupt_rip(vcpu)) - return; - -- svm->vmcb->control.event_inj = nr -+ svm->vmcb->control.event_inj = ex->vector - | SVM_EVTINJ_VALID -- | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) -+ | (ex->has_error_code ? SVM_EVTINJ_VALID_ERR : 0) - | SVM_EVTINJ_TYPE_EXEPT; -- svm->vmcb->control.event_inj_err = error_code; -+ svm->vmcb->control.event_inj_err = ex->error_code; - } - - static void svm_init_erratum_383(void) -diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index 0aa40ea496a8..83239d47fc0f 100644 ---- a/arch/x86/kvm/vmx/nested.c -+++ b/arch/x86/kvm/vmx/nested.c -@@ -446,29 +446,27 @@ static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, - */ - static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) - { -+ struct kvm_queued_exception *ex = &vcpu->arch.exception; - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); -- unsigned int nr = vcpu->arch.exception.nr; -- bool has_payload = vcpu->arch.exception.has_payload; -- unsigned long payload = vcpu->arch.exception.payload; - -- if (nr == PF_VECTOR) { -- if (vcpu->arch.exception.nested_apf) { -+ if (ex->vector == PF_VECTOR) { -+ if (ex->nested_apf) { - *exit_qual = vcpu->arch.apf.nested_apf_token; - return 1; - } -- if (nested_vmx_is_page_fault_vmexit(vmcs12, -- vcpu->arch.exception.error_code)) { -- *exit_qual = has_payload ? payload : vcpu->arch.cr2; -+ if (nested_vmx_is_page_fault_vmexit(vmcs12, ex->error_code)) { -+ *exit_qual = ex->has_payload ? ex->payload : vcpu->arch.cr2; - return 1; - } -- } else if (vmcs12->exception_bitmap & (1u << nr)) { -- if (nr == DB_VECTOR) { -- if (!has_payload) { -- payload = vcpu->arch.dr6; -- payload &= ~DR6_BT; -- payload ^= DR6_ACTIVE_LOW; -+ } else if (vmcs12->exception_bitmap & (1u << ex->vector)) { -+ if (ex->vector == DB_VECTOR) { -+ if (ex->has_payload) { -+ *exit_qual = ex->payload; -+ } else { -+ *exit_qual = vcpu->arch.dr6; -+ *exit_qual &= ~DR6_BT; -+ *exit_qual ^= DR6_ACTIVE_LOW; - } -- *exit_qual = payload; - } else - *exit_qual = 0; - return 1; -@@ -3723,7 +3721,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, - is_double_fault(exit_intr_info))) { - vmcs12->idt_vectoring_info_field = 0; - } else if (vcpu->arch.exception.injected) { -- nr = vcpu->arch.exception.nr; -+ nr = vcpu->arch.exception.vector; - idt_vectoring = nr | VECTORING_INFO_VALID_MASK; - - if (kvm_exception_is_soft(nr)) { -@@ -3827,11 +3825,11 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, - unsigned long exit_qual) - { -+ struct kvm_queued_exception *ex = &vcpu->arch.exception; -+ u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); -- unsigned int nr = vcpu->arch.exception.nr; -- u32 intr_info = nr | INTR_INFO_VALID_MASK; - -- if (vcpu->arch.exception.has_error_code) { -+ if (ex->has_error_code) { - /* - * Intel CPUs do not generate error codes with bits 31:16 set, - * and more importantly VMX disallows setting bits 31:16 in the -@@ -3841,11 +3839,11 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, - * generate "full" 32-bit error codes, so KVM allows userspace - * to inject exception error codes with bits 31:16 set. - */ -- vmcs12->vm_exit_intr_error_code = (u16)vcpu->arch.exception.error_code; -+ vmcs12->vm_exit_intr_error_code = (u16)ex->error_code; - intr_info |= INTR_INFO_DELIVER_CODE_MASK; - } - -- if (kvm_exception_is_soft(nr)) -+ if (kvm_exception_is_soft(ex->vector)) - intr_info |= INTR_TYPE_SOFT_EXCEPTION; - else - intr_info |= INTR_TYPE_HARD_EXCEPTION; -@@ -3876,7 +3874,7 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, - static inline unsigned long vmx_get_pending_dbg_trap(struct kvm_vcpu *vcpu) - { - if (!vcpu->arch.exception.pending || -- vcpu->arch.exception.nr != DB_VECTOR) -+ vcpu->arch.exception.vector != DB_VECTOR) - return 0; - - /* General Detect #DBs are always fault-like. */ -diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c -index 7f3581960eb5..0f68ed966944 100644 ---- a/arch/x86/kvm/vmx/vmx.c -+++ b/arch/x86/kvm/vmx/vmx.c -@@ -1659,7 +1659,7 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) - */ - if (nested_cpu_has_mtf(vmcs12) && - (!vcpu->arch.exception.pending || -- vcpu->arch.exception.nr == DB_VECTOR)) -+ vcpu->arch.exception.vector == DB_VECTOR)) - vmx->nested.mtf_pending = true; - else - vmx->nested.mtf_pending = false; -@@ -1686,15 +1686,13 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu) - - static void vmx_queue_exception(struct kvm_vcpu *vcpu) - { -+ struct kvm_queued_exception *ex = &vcpu->arch.exception; -+ u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; - struct vcpu_vmx *vmx = to_vmx(vcpu); -- unsigned nr = vcpu->arch.exception.nr; -- bool has_error_code = vcpu->arch.exception.has_error_code; -- u32 error_code = vcpu->arch.exception.error_code; -- u32 intr_info = nr | INTR_INFO_VALID_MASK; - -- kvm_deliver_exception_payload(vcpu); -+ kvm_deliver_exception_payload(vcpu, ex); - -- if (has_error_code) { -+ if (ex->has_error_code) { - /* - * Despite the error code being architecturally defined as 32 - * bits, and the VMCS field being 32 bits, Intel CPUs and thus -@@ -1705,21 +1703,21 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) - * the upper bits to avoid VM-Fail, losing information that - * does't really exist is preferable to killing the VM. - */ -- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)error_code); -+ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code); - intr_info |= INTR_INFO_DELIVER_CODE_MASK; - } - - if (vmx->rmode.vm86_active) { - int inc_eip = 0; -- if (kvm_exception_is_soft(nr)) -+ if (kvm_exception_is_soft(ex->vector)) - inc_eip = vcpu->arch.event_exit_inst_len; -- kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); -+ kvm_inject_realmode_interrupt(vcpu, ex->vector, inc_eip); - return; - } - - WARN_ON_ONCE(vmx->emulation_required); - -- if (kvm_exception_is_soft(nr)) { -+ if (kvm_exception_is_soft(ex->vector)) { - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmx->vcpu.arch.event_exit_inst_len); - intr_info |= INTR_TYPE_SOFT_EXCEPTION; -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 14cb589683a1..14182b5b2c93 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -556,16 +556,13 @@ static int exception_type(int vector) - return EXCPT_FAULT; - } - --void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) -+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, -+ struct kvm_queued_exception *ex) - { -- unsigned nr = vcpu->arch.exception.nr; -- bool has_payload = vcpu->arch.exception.has_payload; -- unsigned long payload = vcpu->arch.exception.payload; -- -- if (!has_payload) -+ if (!ex->has_payload) - return; - -- switch (nr) { -+ switch (ex->vector) { - case DB_VECTOR: - /* - * "Certain debug exceptions may clear bit 0-3. The -@@ -590,8 +587,8 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) - * So they need to be flipped for DR6. - */ - vcpu->arch.dr6 |= DR6_ACTIVE_LOW; -- vcpu->arch.dr6 |= payload; -- vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW; -+ vcpu->arch.dr6 |= ex->payload; -+ vcpu->arch.dr6 ^= ex->payload & DR6_ACTIVE_LOW; - - /* - * The #DB payload is defined as compatible with the 'pending -@@ -602,12 +599,12 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) - vcpu->arch.dr6 &= ~BIT(12); - break; - case PF_VECTOR: -- vcpu->arch.cr2 = payload; -+ vcpu->arch.cr2 = ex->payload; - break; - } - -- vcpu->arch.exception.has_payload = false; -- vcpu->arch.exception.payload = 0; -+ ex->has_payload = false; -+ ex->payload = 0; - } - EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload); - -@@ -646,17 +643,18 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, - vcpu->arch.exception.injected = false; - } - vcpu->arch.exception.has_error_code = has_error; -- vcpu->arch.exception.nr = nr; -+ vcpu->arch.exception.vector = nr; - vcpu->arch.exception.error_code = error_code; - vcpu->arch.exception.has_payload = has_payload; - vcpu->arch.exception.payload = payload; - if (!is_guest_mode(vcpu)) -- kvm_deliver_exception_payload(vcpu); -+ kvm_deliver_exception_payload(vcpu, -+ &vcpu->arch.exception); - return; - } - - /* to check exception */ -- prev_nr = vcpu->arch.exception.nr; -+ prev_nr = vcpu->arch.exception.vector; - if (prev_nr == DF_VECTOR) { - /* triple fault -> shutdown */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); -@@ -674,7 +672,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, - vcpu->arch.exception.pending = true; - vcpu->arch.exception.injected = false; - vcpu->arch.exception.has_error_code = true; -- vcpu->arch.exception.nr = DF_VECTOR; -+ vcpu->arch.exception.vector = DF_VECTOR; - vcpu->arch.exception.error_code = 0; - vcpu->arch.exception.has_payload = false; - vcpu->arch.exception.payload = 0; -@@ -5023,25 +5021,24 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, - static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) - { -+ struct kvm_queued_exception *ex = &vcpu->arch.exception; -+ - process_nmi(vcpu); - - if (kvm_check_request(KVM_REQ_SMI, vcpu)) - process_smi(vcpu); - - /* -- * In guest mode, payload delivery should be deferred, -- * so that the L1 hypervisor can intercept #PF before -- * CR2 is modified (or intercept #DB before DR6 is -- * modified under nVMX). Unless the per-VM capability, -- * KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of -- * an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we -- * opportunistically defer the exception payload, deliver it if the -- * capability hasn't been requested before processing a -- * KVM_GET_VCPU_EVENTS. -+ * In guest mode, payload delivery should be deferred if the exception -+ * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1 -+ * intercepts #PF, ditto for DR6 and #DBs. If the per-VM capability, -+ * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not -+ * propagate the payload and so it cannot be safely deferred. Deliver -+ * the payload if the capability hasn't been requested. - */ - if (!vcpu->kvm->arch.exception_payload_enabled && -- vcpu->arch.exception.pending && vcpu->arch.exception.has_payload) -- kvm_deliver_exception_payload(vcpu); -+ ex->pending && ex->has_payload) -+ kvm_deliver_exception_payload(vcpu, ex); - - /* - * The API doesn't provide the instruction length for software -@@ -5049,26 +5046,25 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, - * isn't advanced, we should expect to encounter the exception - * again. - */ -- if (kvm_exception_is_soft(vcpu->arch.exception.nr)) { -+ if (kvm_exception_is_soft(ex->vector)) { - events->exception.injected = 0; - events->exception.pending = 0; - } else { -- events->exception.injected = vcpu->arch.exception.injected; -- events->exception.pending = vcpu->arch.exception.pending; -+ events->exception.injected = ex->injected; -+ events->exception.pending = ex->pending; - /* - * For ABI compatibility, deliberately conflate - * pending and injected exceptions when - * KVM_CAP_EXCEPTION_PAYLOAD isn't enabled. - */ - if (!vcpu->kvm->arch.exception_payload_enabled) -- events->exception.injected |= -- vcpu->arch.exception.pending; -+ events->exception.injected |= ex->pending; - } -- events->exception.nr = vcpu->arch.exception.nr; -- events->exception.has_error_code = vcpu->arch.exception.has_error_code; -- events->exception.error_code = vcpu->arch.exception.error_code; -- events->exception_has_payload = vcpu->arch.exception.has_payload; -- events->exception_payload = vcpu->arch.exception.payload; -+ events->exception.nr = ex->vector; -+ events->exception.has_error_code = ex->has_error_code; -+ events->exception.error_code = ex->error_code; -+ events->exception_has_payload = ex->has_payload; -+ events->exception_payload = ex->payload; - - events->interrupt.injected = - vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; -@@ -5140,7 +5136,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, - process_nmi(vcpu); - vcpu->arch.exception.injected = events->exception.injected; - vcpu->arch.exception.pending = events->exception.pending; -- vcpu->arch.exception.nr = events->exception.nr; -+ vcpu->arch.exception.vector = events->exception.nr; - vcpu->arch.exception.has_error_code = events->exception.has_error_code; - vcpu->arch.exception.error_code = events->exception.error_code; - vcpu->arch.exception.has_payload = events->exception_has_payload; -@@ -9675,7 +9671,7 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu) - - static void kvm_inject_exception(struct kvm_vcpu *vcpu) - { -- trace_kvm_inj_exception(vcpu->arch.exception.nr, -+ trace_kvm_inj_exception(vcpu->arch.exception.vector, - vcpu->arch.exception.has_error_code, - vcpu->arch.exception.error_code, - vcpu->arch.exception.injected); -@@ -9747,12 +9743,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) - * describe the behavior of General Detect #DBs, which are - * fault-like. They do _not_ set RF, a la code breakpoints. - */ -- if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) -+ if (exception_type(vcpu->arch.exception.vector) == EXCPT_FAULT) - __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | - X86_EFLAGS_RF); - -- if (vcpu->arch.exception.nr == DB_VECTOR) { -- kvm_deliver_exception_payload(vcpu); -+ if (vcpu->arch.exception.vector == DB_VECTOR) { -+ kvm_deliver_exception_payload(vcpu, &vcpu->arch.exception); - if (vcpu->arch.dr7 & DR7_GD) { - vcpu->arch.dr7 &= ~DR7_GD; - kvm_update_dr7(vcpu); -diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h -index 1926d2cb8e79..4147d27f9fbc 100644 ---- a/arch/x86/kvm/x86.h -+++ b/arch/x86/kvm/x86.h -@@ -286,7 +286,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, - - int handle_ud(struct kvm_vcpu *vcpu); - --void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu); -+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, -+ struct kvm_queued_exception *ex); - - void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); - u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-mmu-fix-memoryleak-in-kvm_mmu_vendor_module_.patch b/kvm-6.0/kvm-x86-mmu-fix-memoryleak-in-kvm_mmu_vendor_module_.patch deleted file mode 100644 index 6a72f1e54cc..00000000000 --- a/kvm-6.0/kvm-x86-mmu-fix-memoryleak-in-kvm_mmu_vendor_module_.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 80c076cb1e7ff649cd729910c9f9058780e124cc Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 23 Aug 2022 14:32:37 +0800 -Subject: KVM: x86/mmu: fix memoryleak in kvm_mmu_vendor_module_init() - -From: Miaohe Lin - -[ Upstream commit d7c9bfb9caaffd496ae44b258ec7c793677d3eeb ] - -When register_shrinker() fails, KVM doesn't release the percpu counter -kvm_total_used_mmu_pages leading to memoryleak. Fix this issue by calling -percpu_counter_destroy() when register_shrinker() fails. - -Fixes: ab271bd4dfd5 ("x86: kvm: propagate register_shrinker return code") -Signed-off-by: Miaohe Lin -Link: https://lore.kernel.org/r/20220823063237.47299-1-linmiaohe@huawei.com -[sean: tweak shortlog and changelog] -Signed-off-by: Sean Christopherson -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/mmu.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c -index 3552e6af3684..858bc53cfab4 100644 ---- a/arch/x86/kvm/mmu/mmu.c -+++ b/arch/x86/kvm/mmu/mmu.c -@@ -6704,10 +6704,12 @@ int kvm_mmu_vendor_module_init(void) - - ret = register_shrinker(&mmu_shrinker, "x86-mmu"); - if (ret) -- goto out; -+ goto out_shrinker; - - return 0; - -+out_shrinker: -+ percpu_counter_destroy(&kvm_total_used_mmu_pages); - out: - mmu_destroy_caches(); - return ret; --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-morph-pending-exceptions-to-pending-vm-exits.patch b/kvm-6.0/kvm-x86-morph-pending-exceptions-to-pending-vm-exits.patch deleted file mode 100644 index bb4414f57a2..00000000000 --- a/kvm-6.0/kvm-x86-morph-pending-exceptions-to-pending-vm-exits.patch +++ /dev/null @@ -1,754 +0,0 @@ -From 2c2075dbd009341c0223762348ffd9d61e289200 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 23:16:08 +0000 -Subject: KVM: x86: Morph pending exceptions to pending VM-Exits at queue time - -From: Sean Christopherson - -[ Upstream commit 7709aba8f71613ae5d18d8c00adb54948e6bedb3 ] - -Morph pending exceptions to pending VM-Exits (due to interception) when -the exception is queued instead of waiting until nested events are -checked at VM-Entry. This fixes a longstanding bug where KVM fails to -handle an exception that occurs during delivery of a previous exception, -KVM (L0) and L1 both want to intercept the exception (e.g. #PF for shadow -paging), and KVM determines that the exception is in the guest's domain, -i.e. queues the new exception for L2. Deferring the interception check -causes KVM to esclate various combinations of injected+pending exceptions -to double fault (#DF) without consulting L1's interception desires, and -ends up injecting a spurious #DF into L2. - -KVM has fudged around the issue for #PF by special casing emulated #PF -injection for shadow paging, but the underlying issue is not unique to -shadow paging in L0, e.g. if KVM is intercepting #PF because the guest -has a smaller maxphyaddr and L1 (but not L0) is using shadow paging. -Other exceptions are affected as well, e.g. if KVM is intercepting #GP -for one of SVM's workaround or for the VMware backdoor emulation stuff. -The other cases have gone unnoticed because the #DF is spurious if and -only if L1 resolves the exception, e.g. KVM's goofs go unnoticed if L1 -would have injected #DF anyways. - -The hack-a-fix has also led to ugly code, e.g. bailing from the emulator -if #PF injection forced a nested VM-Exit and the emulator finds itself -back in L1. Allowing for direct-to-VM-Exit queueing also neatly solves -the async #PF in L2 mess; no need to set a magic flag and token, simply -queue a #PF nested VM-Exit. - -Deal with event migration by flagging that a pending exception was queued -by userspace and check for interception at the next KVM_RUN, e.g. so that -KVM does the right thing regardless of the order in which userspace -restores nested state vs. event state. - -When "getting" events from userspace, simply drop any pending excpetion -that is destined to be intercepted if there is also an injected exception -to be migrated. Ideally, KVM would migrate both events, but that would -require new ABI, and practically speaking losing the event is unlikely to -be noticed, let alone fatal. The injected exception is captured, RIP -still points at the original faulting instruction, etc... So either the -injection on the target will trigger the same intercepted exception, or -the source of the intercepted exception was transient and/or -non-deterministic, thus dropping it is ok-ish. - -Fixes: a04aead144fd ("KVM: nSVM: fix running nested guests when npt=0") -Fixes: feaf0c7dc473 ("KVM: nVMX: Do not generate #DF if #PF happens during exception delivery into L2") -Cc: Jim Mattson -Signed-off-by: Sean Christopherson -Reviewed-by: Maxim Levitsky -Link: https://lore.kernel.org/r/20220830231614.3580124-22-seanjc@google.com -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/include/asm/kvm_host.h | 12 ++- - arch/x86/kvm/svm/nested.c | 45 +++------ - arch/x86/kvm/vmx/nested.c | 109 ++++++++++------------ - arch/x86/kvm/vmx/vmx.c | 6 +- - arch/x86/kvm/x86.c | 159 ++++++++++++++++++++++---------- - arch/x86/kvm/x86.h | 7 ++ - 6 files changed, 188 insertions(+), 150 deletions(-) - -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index 36e4fde359a7..bad74c8fbc65 100644 ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -647,7 +647,6 @@ struct kvm_queued_exception { - u32 error_code; - unsigned long payload; - bool has_payload; -- u8 nested_apf; - }; - - struct kvm_vcpu_arch { -@@ -749,8 +748,12 @@ struct kvm_vcpu_arch { - - u8 event_exit_inst_len; - -+ bool exception_from_userspace; -+ - /* Exceptions to be injected to the guest. */ - struct kvm_queued_exception exception; -+ /* Exception VM-Exits to be synthesized to L1. */ -+ struct kvm_queued_exception exception_vmexit; - - struct kvm_queued_interrupt { - bool injected; -@@ -861,7 +864,6 @@ struct kvm_vcpu_arch { - u32 id; - bool send_user_only; - u32 host_apf_flags; -- unsigned long nested_apf_token; - bool delivery_as_pf_vmexit; - bool pageready_pending; - } apf; -@@ -1637,9 +1639,9 @@ struct kvm_x86_ops { - - struct kvm_x86_nested_ops { - void (*leave_nested)(struct kvm_vcpu *vcpu); -+ bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, -+ u32 error_code); - int (*check_events)(struct kvm_vcpu *vcpu); -- bool (*handle_page_fault_workaround)(struct kvm_vcpu *vcpu, -- struct x86_exception *fault); - bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); - void (*triple_fault)(struct kvm_vcpu *vcpu); - int (*get_state)(struct kvm_vcpu *vcpu, -@@ -1866,7 +1868,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay - void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); - void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); - void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); --bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, -+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, - struct x86_exception *fault); - bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); - bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); -diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c -index a6111392985c..405075286965 100644 ---- a/arch/x86/kvm/svm/nested.c -+++ b/arch/x86/kvm/svm/nested.c -@@ -55,28 +55,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, - nested_svm_vmexit(svm); - } - --static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu, -- struct x86_exception *fault) --{ -- struct vcpu_svm *svm = to_svm(vcpu); -- struct vmcb *vmcb = svm->vmcb; -- -- WARN_ON(!is_guest_mode(vcpu)); -- -- if (vmcb12_is_intercept(&svm->nested.ctl, -- INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && -- !WARN_ON_ONCE(svm->nested.nested_run_pending)) { -- vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR; -- vmcb->control.exit_code_hi = 0; -- vmcb->control.exit_info_1 = fault->error_code; -- vmcb->control.exit_info_2 = fault->address; -- nested_svm_vmexit(svm); -- return true; -- } -- -- return false; --} -- - static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) - { - struct vcpu_svm *svm = to_svm(vcpu); -@@ -1304,16 +1282,17 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu) - return 0; - } - --static bool nested_exit_on_exception(struct vcpu_svm *svm) -+static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector, -+ u32 error_code) - { -- unsigned int vector = svm->vcpu.arch.exception.vector; -+ struct vcpu_svm *svm = to_svm(vcpu); - - return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector)); - } - - static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) - { -- struct kvm_queued_exception *ex = &vcpu->arch.exception; -+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; - struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; - -@@ -1328,9 +1307,7 @@ static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) - * than #PF. - */ - if (ex->vector == PF_VECTOR) { -- if (ex->nested_apf) -- vmcb->control.exit_info_2 = vcpu->arch.apf.nested_apf_token; -- else if (ex->has_payload) -+ if (ex->has_payload) - vmcb->control.exit_info_2 = ex->payload; - else - vmcb->control.exit_info_2 = vcpu->arch.cr2; -@@ -1383,15 +1360,19 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) - return 0; - } - -- if (vcpu->arch.exception.pending) { -+ if (vcpu->arch.exception_vmexit.pending) { - if (block_nested_exceptions) - return -EBUSY; -- if (!nested_exit_on_exception(svm)) -- return 0; - nested_svm_inject_exception_vmexit(vcpu); - return 0; - } - -+ if (vcpu->arch.exception.pending) { -+ if (block_nested_exceptions) -+ return -EBUSY; -+ return 0; -+ } -+ - if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { - if (block_nested_events) - return -EBUSY; -@@ -1729,8 +1710,8 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) - - struct kvm_x86_nested_ops svm_nested_ops = { - .leave_nested = svm_leave_nested, -+ .is_exception_vmexit = nested_svm_is_exception_vmexit, - .check_events = svm_check_nested_events, -- .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround, - .triple_fault = nested_svm_triple_fault, - .get_nested_state_pages = svm_get_nested_state_pages, - .get_state = svm_get_nested_state, -diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index dfd5e13e5202..4bb3ccf82d63 100644 ---- a/arch/x86/kvm/vmx/nested.c -+++ b/arch/x86/kvm/vmx/nested.c -@@ -439,59 +439,22 @@ static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, - return inequality ^ bit; - } - -- --/* -- * KVM wants to inject page-faults which it got to the guest. This function -- * checks whether in a nested guest, we need to inject them to L1 or L2. -- */ --static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) --{ -- struct kvm_queued_exception *ex = &vcpu->arch.exception; -- struct vmcs12 *vmcs12 = get_vmcs12(vcpu); -- -- if (ex->vector == PF_VECTOR) { -- if (ex->nested_apf) { -- *exit_qual = vcpu->arch.apf.nested_apf_token; -- return 1; -- } -- if (nested_vmx_is_page_fault_vmexit(vmcs12, ex->error_code)) { -- *exit_qual = ex->has_payload ? ex->payload : vcpu->arch.cr2; -- return 1; -- } -- } else if (vmcs12->exception_bitmap & (1u << ex->vector)) { -- if (ex->vector == DB_VECTOR) { -- if (ex->has_payload) { -- *exit_qual = ex->payload; -- } else { -- *exit_qual = vcpu->arch.dr6; -- *exit_qual &= ~DR6_BT; -- *exit_qual ^= DR6_ACTIVE_LOW; -- } -- } else -- *exit_qual = 0; -- return 1; -- } -- -- return 0; --} -- --static bool nested_vmx_handle_page_fault_workaround(struct kvm_vcpu *vcpu, -- struct x86_exception *fault) -+static bool nested_vmx_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector, -+ u32 error_code) - { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - -- WARN_ON(!is_guest_mode(vcpu)); -+ /* -+ * Drop bits 31:16 of the error code when performing the #PF mask+match -+ * check. All VMCS fields involved are 32 bits, but Intel CPUs never -+ * set bits 31:16 and VMX disallows setting bits 31:16 in the injected -+ * error code. Including the to-be-dropped bits in the check might -+ * result in an "impossible" or missed exit from L1's perspective. -+ */ -+ if (vector == PF_VECTOR) -+ return nested_vmx_is_page_fault_vmexit(vmcs12, (u16)error_code); - -- if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && -- !WARN_ON_ONCE(to_vmx(vcpu)->nested.nested_run_pending)) { -- vmcs12->vm_exit_intr_error_code = fault->error_code; -- nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, -- PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | -- INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, -- fault->address); -- return true; -- } -- return false; -+ return (vmcs12->exception_bitmap & (1u << vector)); - } - - static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, -@@ -3822,12 +3785,24 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - return -ENXIO; - } - --static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, -- unsigned long exit_qual) -+static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu) - { -- struct kvm_queued_exception *ex = &vcpu->arch.exception; -+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; - u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); -+ unsigned long exit_qual; -+ -+ if (ex->has_payload) { -+ exit_qual = ex->payload; -+ } else if (ex->vector == PF_VECTOR) { -+ exit_qual = vcpu->arch.cr2; -+ } else if (ex->vector == DB_VECTOR) { -+ exit_qual = vcpu->arch.dr6; -+ exit_qual &= ~DR6_BT; -+ exit_qual ^= DR6_ACTIVE_LOW; -+ } else { -+ exit_qual = 0; -+ } - - if (ex->has_error_code) { - /* -@@ -3917,7 +3892,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - { - struct kvm_lapic *apic = vcpu->arch.apic; - struct vcpu_vmx *vmx = to_vmx(vcpu); -- unsigned long exit_qual; - /* - * Only a pending nested run blocks a pending exception. If there is a - * previously injected event, the pending exception occurred while said -@@ -3971,14 +3945,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - * across SMI/RSM as it should; that needs to be addressed in order to - * prioritize SMI over MTF and trap-like #DBs. - */ -+ if (vcpu->arch.exception_vmexit.pending && -+ !vmx_is_low_priority_db_trap(&vcpu->arch.exception_vmexit)) { -+ if (block_nested_exceptions) -+ return -EBUSY; -+ -+ nested_vmx_inject_exception_vmexit(vcpu); -+ return 0; -+ } -+ - if (vcpu->arch.exception.pending && - !vmx_is_low_priority_db_trap(&vcpu->arch.exception)) { - if (block_nested_exceptions) - return -EBUSY; -- if (!nested_vmx_check_exception(vcpu, &exit_qual)) -- goto no_vmexit; -- nested_vmx_inject_exception_vmexit(vcpu, exit_qual); -- return 0; -+ goto no_vmexit; - } - - if (vmx->nested.mtf_pending) { -@@ -3989,15 +3969,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) - return 0; - } - -- if (vcpu->arch.exception.pending) { -+ if (vcpu->arch.exception_vmexit.pending) { - if (block_nested_exceptions) - return -EBUSY; -- if (!nested_vmx_check_exception(vcpu, &exit_qual)) -- goto no_vmexit; -- nested_vmx_inject_exception_vmexit(vcpu, exit_qual); -+ -+ nested_vmx_inject_exception_vmexit(vcpu); - return 0; - } - -+ if (vcpu->arch.exception.pending) { -+ if (block_nested_exceptions) -+ return -EBUSY; -+ goto no_vmexit; -+ } -+ - if (nested_vmx_preemption_timer_pending(vcpu)) { - if (block_nested_events) - return -EBUSY; -@@ -6868,8 +6853,8 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) - - struct kvm_x86_nested_ops vmx_nested_ops = { - .leave_nested = vmx_leave_nested, -+ .is_exception_vmexit = nested_vmx_is_exception_vmexit, - .check_events = vmx_check_nested_events, -- .handle_page_fault_workaround = nested_vmx_handle_page_fault_workaround, - .hv_timer_pending = nested_vmx_preemption_timer_pending, - .triple_fault = nested_vmx_triple_fault, - .get_state = vmx_get_nested_state, -diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c -index 0f68ed966944..9c2b8e2b2a28 100644 ---- a/arch/x86/kvm/vmx/vmx.c -+++ b/arch/x86/kvm/vmx/vmx.c -@@ -1659,7 +1659,9 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) - */ - if (nested_cpu_has_mtf(vmcs12) && - (!vcpu->arch.exception.pending || -- vcpu->arch.exception.vector == DB_VECTOR)) -+ vcpu->arch.exception.vector == DB_VECTOR) && -+ (!vcpu->arch.exception_vmexit.pending || -+ vcpu->arch.exception_vmexit.vector == DB_VECTOR)) - vmx->nested.mtf_pending = true; - else - vmx->nested.mtf_pending = false; -@@ -5718,7 +5720,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu) - struct vcpu_vmx *vmx = to_vmx(vcpu); - - return vmx->emulation_required && !vmx->rmode.vm86_active && -- (vcpu->arch.exception.pending || vcpu->arch.exception.injected); -+ (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected); - } - - static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 01d59f93d93e..8264e41b4fea 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -608,6 +608,21 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, - } - EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload); - -+static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector, -+ bool has_error_code, u32 error_code, -+ bool has_payload, unsigned long payload) -+{ -+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; -+ -+ ex->vector = vector; -+ ex->injected = false; -+ ex->pending = true; -+ ex->has_error_code = has_error_code; -+ ex->error_code = error_code; -+ ex->has_payload = has_payload; -+ ex->payload = payload; -+} -+ - static void kvm_multiple_exception(struct kvm_vcpu *vcpu, - unsigned nr, bool has_error, u32 error_code, - bool has_payload, unsigned long payload, bool reinject) -@@ -617,18 +632,31 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, - - kvm_make_request(KVM_REQ_EVENT, vcpu); - -+ /* -+ * If the exception is destined for L2 and isn't being reinjected, -+ * morph it to a VM-Exit if L1 wants to intercept the exception. A -+ * previously injected exception is not checked because it was checked -+ * when it was original queued, and re-checking is incorrect if _L1_ -+ * injected the exception, in which case it's exempt from interception. -+ */ -+ if (!reinject && is_guest_mode(vcpu) && -+ kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) { -+ kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code, -+ has_payload, payload); -+ return; -+ } -+ - if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { - queue: - if (reinject) { - /* -- * On vmentry, vcpu->arch.exception.pending is only -- * true if an event injection was blocked by -- * nested_run_pending. In that case, however, -- * vcpu_enter_guest requests an immediate exit, -- * and the guest shouldn't proceed far enough to -- * need reinjection. -+ * On VM-Entry, an exception can be pending if and only -+ * if event injection was blocked by nested_run_pending. -+ * In that case, however, vcpu_enter_guest() requests an -+ * immediate exit, and the guest shouldn't proceed far -+ * enough to need reinjection. - */ -- WARN_ON_ONCE(vcpu->arch.exception.pending); -+ WARN_ON_ONCE(kvm_is_exception_pending(vcpu)); - vcpu->arch.exception.injected = true; - if (WARN_ON_ONCE(has_payload)) { - /* -@@ -734,20 +762,22 @@ static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err) - void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) - { - ++vcpu->stat.pf_guest; -- vcpu->arch.exception.nested_apf = -- is_guest_mode(vcpu) && fault->async_page_fault; -- if (vcpu->arch.exception.nested_apf) { -- vcpu->arch.apf.nested_apf_token = fault->address; -- kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); -- } else { -+ -+ /* -+ * Async #PF in L2 is always forwarded to L1 as a VM-Exit regardless of -+ * whether or not L1 wants to intercept "regular" #PF. -+ */ -+ if (is_guest_mode(vcpu) && fault->async_page_fault) -+ kvm_queue_exception_vmexit(vcpu, PF_VECTOR, -+ true, fault->error_code, -+ true, fault->address); -+ else - kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code, - fault->address); -- } - } - EXPORT_SYMBOL_GPL(kvm_inject_page_fault); - --/* Returns true if the page fault was immediately morphed into a VM-Exit. */ --bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, -+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, - struct x86_exception *fault) - { - struct kvm_mmu *fault_mmu; -@@ -765,26 +795,7 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, - kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address, - fault_mmu->root.hpa); - -- /* -- * A workaround for KVM's bad exception handling. If KVM injected an -- * exception into L2, and L2 encountered a #PF while vectoring the -- * injected exception, manually check to see if L1 wants to intercept -- * #PF, otherwise queuing the #PF will lead to #DF or a lost exception. -- * In all other cases, defer the check to nested_ops->check_events(), -- * which will correctly handle priority (this does not). Note, other -- * exceptions, e.g. #GP, are theoretically affected, #PF is simply the -- * most problematic, e.g. when L0 and L1 are both intercepting #PF for -- * shadow paging. -- * -- * TODO: Rewrite exception handling to track injected and pending -- * (VM-Exit) exceptions separately. -- */ -- if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) && -- kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault)) -- return true; -- - fault_mmu->inject_page_fault(vcpu, fault); -- return false; - } - EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault); - -@@ -4846,7 +4857,7 @@ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) - return (kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_accept_dm_intr(vcpu) && - !kvm_event_needs_reinjection(vcpu) && -- !vcpu->arch.exception.pending); -+ !kvm_is_exception_pending(vcpu)); - } - - static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, -@@ -5021,13 +5032,27 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, - static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) - { -- struct kvm_queued_exception *ex = &vcpu->arch.exception; -+ struct kvm_queued_exception *ex; - - process_nmi(vcpu); - - if (kvm_check_request(KVM_REQ_SMI, vcpu)) - process_smi(vcpu); - -+ /* -+ * KVM's ABI only allows for one exception to be migrated. Luckily, -+ * the only time there can be two queued exceptions is if there's a -+ * non-exiting _injected_ exception, and a pending exiting exception. -+ * In that case, ignore the VM-Exiting exception as it's an extension -+ * of the injected exception. -+ */ -+ if (vcpu->arch.exception_vmexit.pending && -+ !vcpu->arch.exception.pending && -+ !vcpu->arch.exception.injected) -+ ex = &vcpu->arch.exception_vmexit; -+ else -+ ex = &vcpu->arch.exception; -+ - /* - * In guest mode, payload delivery should be deferred if the exception - * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1 -@@ -5134,6 +5159,19 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, - return -EINVAL; - - process_nmi(vcpu); -+ -+ /* -+ * Flag that userspace is stuffing an exception, the next KVM_RUN will -+ * morph the exception to a VM-Exit if appropriate. Do this only for -+ * pending exceptions, already-injected exceptions are not subject to -+ * intercpetion. Note, userspace that conflates pending and injected -+ * is hosed, and will incorrectly convert an injected exception into a -+ * pending exception, which in turn may cause a spurious VM-Exit. -+ */ -+ vcpu->arch.exception_from_userspace = events->exception.pending; -+ -+ vcpu->arch.exception_vmexit.pending = false; -+ - vcpu->arch.exception.injected = events->exception.injected; - vcpu->arch.exception.pending = events->exception.pending; - vcpu->arch.exception.vector = events->exception.nr; -@@ -8164,18 +8202,17 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) - } - } - --static bool inject_emulated_exception(struct kvm_vcpu *vcpu) -+static void inject_emulated_exception(struct kvm_vcpu *vcpu) - { - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; -- if (ctxt->exception.vector == PF_VECTOR) -- return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception); - -- if (ctxt->exception.error_code_valid) -+ if (ctxt->exception.vector == PF_VECTOR) -+ kvm_inject_emulated_page_fault(vcpu, &ctxt->exception); -+ else if (ctxt->exception.error_code_valid) - kvm_queue_exception_e(vcpu, ctxt->exception.vector, - ctxt->exception.error_code); - else - kvm_queue_exception(vcpu, ctxt->exception.vector); -- return false; - } - - static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu) -@@ -8773,8 +8810,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - - if (ctxt->have_exception) { - r = 1; -- if (inject_emulated_exception(vcpu)) -- return r; -+ inject_emulated_exception(vcpu); - } else if (vcpu->arch.pio.count) { - if (!vcpu->arch.pio.in) { - /* FIXME: return into emulator if single-stepping. */ -@@ -9721,7 +9757,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) - */ - if (vcpu->arch.exception.injected) - kvm_inject_exception(vcpu); -- else if (vcpu->arch.exception.pending) -+ else if (kvm_is_exception_pending(vcpu)) - ; /* see above */ - else if (vcpu->arch.nmi_injected) - static_call(kvm_x86_inject_nmi)(vcpu); -@@ -9748,6 +9784,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) - if (r < 0) - goto out; - -+ /* -+ * A pending exception VM-Exit should either result in nested VM-Exit -+ * or force an immediate re-entry and exit to/from L2, and exception -+ * VM-Exits cannot be injected (flag should _never_ be set). -+ */ -+ WARN_ON_ONCE(vcpu->arch.exception_vmexit.injected || -+ vcpu->arch.exception_vmexit.pending); -+ - /* - * New events, other than exceptions, cannot be injected if KVM needs - * to re-inject a previous event. See above comments on re-injecting -@@ -9847,7 +9891,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) - kvm_x86_ops.nested_ops->hv_timer_pending(vcpu)) - *req_immediate_exit = true; - -- WARN_ON(vcpu->arch.exception.pending); -+ WARN_ON(kvm_is_exception_pending(vcpu)); - return 0; - - out: -@@ -10866,6 +10910,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) - - int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) - { -+ struct kvm_queued_exception *ex = &vcpu->arch.exception; - struct kvm_run *kvm_run = vcpu->run; - int r; - -@@ -10924,6 +10969,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) - } - } - -+ /* -+ * If userspace set a pending exception and L2 is active, convert it to -+ * a pending VM-Exit if L1 wants to intercept the exception. -+ */ -+ if (vcpu->arch.exception_from_userspace && is_guest_mode(vcpu) && -+ kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, ex->vector, -+ ex->error_code)) { -+ kvm_queue_exception_vmexit(vcpu, ex->vector, -+ ex->has_error_code, ex->error_code, -+ ex->has_payload, ex->payload); -+ ex->injected = false; -+ ex->pending = false; -+ } -+ vcpu->arch.exception_from_userspace = false; -+ - if (unlikely(vcpu->arch.complete_userspace_io)) { - int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; - vcpu->arch.complete_userspace_io = NULL; -@@ -11030,6 +11090,7 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) - kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); - - vcpu->arch.exception.pending = false; -+ vcpu->arch.exception_vmexit.pending = false; - - kvm_make_request(KVM_REQ_EVENT, vcpu); - } -@@ -11410,7 +11471,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - - if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { - r = -EBUSY; -- if (vcpu->arch.exception.pending) -+ if (kvm_is_exception_pending(vcpu)) - goto out; - if (dbg->control & KVM_GUESTDBG_INJECT_DB) - kvm_queue_exception(vcpu, DB_VECTOR); -@@ -12643,7 +12704,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) - if (vcpu->arch.pv.pv_unhalted) - return true; - -- if (vcpu->arch.exception.pending) -+ if (kvm_is_exception_pending(vcpu)) - return true; - - if (kvm_test_request(KVM_REQ_NMI, vcpu) || -@@ -12898,7 +12959,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) - { - if (unlikely(!lapic_in_kernel(vcpu) || - kvm_event_needs_reinjection(vcpu) || -- vcpu->arch.exception.pending)) -+ kvm_is_exception_pending(vcpu))) - return false; - - if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu)) -diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h -index 4147d27f9fbc..256745d1a2c3 100644 ---- a/arch/x86/kvm/x86.h -+++ b/arch/x86/kvm/x86.h -@@ -82,10 +82,17 @@ static inline unsigned int __shrink_ple_window(unsigned int val, - void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); - int kvm_check_nested_events(struct kvm_vcpu *vcpu); - -+static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu) -+{ -+ return vcpu->arch.exception.pending || -+ vcpu->arch.exception_vmexit.pending; -+} -+ - static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) - { - vcpu->arch.exception.pending = false; - vcpu->arch.exception.injected = false; -+ vcpu->arch.exception_vmexit.pending = false; - } - - static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-report-error-when-setting-cpuid-if-hyper-v-a.patch b/kvm-6.0/kvm-x86-report-error-when-setting-cpuid-if-hyper-v-a.patch deleted file mode 100644 index d30317e1bfa..00000000000 --- a/kvm-6.0/kvm-x86-report-error-when-setting-cpuid-if-hyper-v-a.patch +++ /dev/null @@ -1,173 +0,0 @@ -From 1abbad519136449cb6a4dd537e30dbf56cb3ff9a Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 15:37:09 +0200 -Subject: KVM: x86: Report error when setting CPUID if Hyper-V allocation fails - -From: Sean Christopherson - -[ Upstream commit 3be29eb7b5251a772e2033761a9b67981fdfb0f7 ] - -Return -ENOMEM back to userspace if allocating the Hyper-V vCPU struct -fails when enabling Hyper-V in guest CPUID. Silently ignoring failure -means that KVM will not have an up-to-date CPUID cache if allocating the -struct succeeds later on, e.g. when activating SynIC. - -Rejecting the CPUID operation also guarantess that vcpu->arch.hyperv is -non-NULL if hyperv_enabled is true, which will allow for additional -cleanup, e.g. in the eVMCS code. - -Note, the initialization needs to be done before CPUID is set, and more -subtly before kvm_check_cpuid(), which potentially enables dynamic -XFEATURES. Sadly, there's no easy way to avoid exposing Hyper-V details -to CPUID or vice versa. Expose kvm_hv_vcpu_init() and the Hyper-V CPUID -signature to CPUID instead of exposing cpuid_entry2_find() outside of -CPUID code. It's hard to envision kvm_hv_vcpu_init() being misused, -whereas cpuid_entry2_find() absolutely shouldn't be used outside of core -CPUID code. - -Fixes: 10d7bf1e46dc ("KVM: x86: hyper-v: Cache guest CPUID leaves determining features availability") -Signed-off-by: Sean Christopherson -Signed-off-by: Vitaly Kuznetsov -Signed-off-by: Sean Christopherson -Link: https://lore.kernel.org/r/20220830133737.1539624-6-vkuznets@redhat.com -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/cpuid.c | 18 +++++++++++++++++- - arch/x86/kvm/hyperv.c | 30 ++++++++++++++---------------- - arch/x86/kvm/hyperv.h | 6 +++++- - 3 files changed, 36 insertions(+), 18 deletions(-) - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index 2796dde06302..7065462378e2 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -311,6 +311,15 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) - } - EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime); - -+static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent) -+{ -+ struct kvm_cpuid_entry2 *entry; -+ -+ entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE, -+ KVM_CPUID_INDEX_NOT_SIGNIFICANT); -+ return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX; -+} -+ - static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) - { - struct kvm_lapic *apic = vcpu->arch.apic; -@@ -346,7 +355,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) - vcpu->arch.cr4_guest_rsvd_bits = - __cr4_reserved_bits(guest_cpuid_has, vcpu); - -- kvm_hv_set_cpuid(vcpu); -+ kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu->arch.cpuid_entries, -+ vcpu->arch.cpuid_nent)); - - /* Invoke the vendor callback only after the above state is updated. */ - static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu); -@@ -409,6 +419,12 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, - return 0; - } - -+ if (kvm_cpuid_has_hyperv(e2, nent)) { -+ r = kvm_hv_vcpu_init(vcpu); -+ if (r) -+ return r; -+ } -+ - r = kvm_check_cpuid(vcpu, e2, nent); - if (r) - return r; -diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c -index 8aadd31ed058..bf4729e8cc80 100644 ---- a/arch/x86/kvm/hyperv.c -+++ b/arch/x86/kvm/hyperv.c -@@ -38,9 +38,6 @@ - #include "irq.h" - #include "fpu.h" - --/* "Hv#1" signature */ --#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 -- - #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) - - static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, -@@ -934,7 +931,7 @@ static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) - stimer_prepare_msg(stimer); - } - --static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) -+int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) - { - struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); - int i; -@@ -1984,26 +1981,27 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) - return HV_STATUS_SUCCESS; - } - --void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) -+void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled) - { -+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); - struct kvm_cpuid_entry2 *entry; -- struct kvm_vcpu_hv *hv_vcpu; - -- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE); -- if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) { -- vcpu->arch.hyperv_enabled = true; -- } else { -- vcpu->arch.hyperv_enabled = false; -- return; -- } -+ vcpu->arch.hyperv_enabled = hyperv_enabled; - -- if (kvm_hv_vcpu_init(vcpu)) -+ if (!hv_vcpu) { -+ /* -+ * KVM should have already allocated kvm_vcpu_hv if Hyper-V is -+ * enabled in CPUID. -+ */ -+ WARN_ON_ONCE(vcpu->arch.hyperv_enabled); - return; -- -- hv_vcpu = to_hv_vcpu(vcpu); -+ } - - memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache)); - -+ if (!vcpu->arch.hyperv_enabled) -+ return; -+ - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); - if (entry) { - hv_vcpu->cpuid_cache.features_eax = entry->eax; -diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h -index da2737f2a956..1030b1b50552 100644 ---- a/arch/x86/kvm/hyperv.h -+++ b/arch/x86/kvm/hyperv.h -@@ -23,6 +23,9 @@ - - #include - -+/* "Hv#1" signature */ -+#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 -+ - /* - * The #defines related to the synthetic debugger are required by KDNet, but - * they are not documented in the Hyper-V TLFS because the synthetic debugger -@@ -141,7 +144,8 @@ void kvm_hv_request_tsc_page_update(struct kvm *kvm); - - void kvm_hv_init_vm(struct kvm *kvm); - void kvm_hv_destroy_vm(struct kvm *kvm); --void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu); -+int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); -+void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled); - int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce); - int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); - int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, --- -2.35.1 - diff --git a/kvm-6.0/kvm-x86-zero-out-entire-hyper-v-cpuid-cache-before-p.patch b/kvm-6.0/kvm-x86-zero-out-entire-hyper-v-cpuid-cache-before-p.patch deleted file mode 100644 index cc93598be0c..00000000000 --- a/kvm-6.0/kvm-x86-zero-out-entire-hyper-v-cpuid-cache-before-p.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 062c933fa4de64d23cc794af74f175ad605bc167 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 30 Aug 2022 15:37:07 +0200 -Subject: KVM: x86: Zero out entire Hyper-V CPUID cache before processing - entries - -From: Vitaly Kuznetsov - -[ Upstream commit ce2196b831b1e9f8982b2904fc3e8658cc0e6573 ] - -Wipe the whole 'hv_vcpu->cpuid_cache' with memset() instead of having to -zero each particular member when the corresponding CPUID entry was not -found. - -No functional change intended. - -Signed-off-by: Vitaly Kuznetsov -[sean: split to separate patch] -Signed-off-by: Sean Christopherson -Reviewed-by: Wei Liu -Link: https://lore.kernel.org/r/20220830133737.1539624-4-vkuznets@redhat.com -Signed-off-by: Paolo Bonzini -Stable-dep-of: 3be29eb7b525 ("KVM: x86: Report error when setting CPUID if Hyper-V allocation fails") -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/hyperv.c | 11 ++--------- - 1 file changed, 2 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c -index ed804447589c..611c349a08bf 100644 ---- a/arch/x86/kvm/hyperv.c -+++ b/arch/x86/kvm/hyperv.c -@@ -2005,31 +2005,24 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) - - hv_vcpu = to_hv_vcpu(vcpu); - -+ memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache)); -+ - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); - if (entry) { - hv_vcpu->cpuid_cache.features_eax = entry->eax; - hv_vcpu->cpuid_cache.features_ebx = entry->ebx; - hv_vcpu->cpuid_cache.features_edx = entry->edx; -- } else { -- hv_vcpu->cpuid_cache.features_eax = 0; -- hv_vcpu->cpuid_cache.features_ebx = 0; -- hv_vcpu->cpuid_cache.features_edx = 0; - } - - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO); - if (entry) { - hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax; - hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx; -- } else { -- hv_vcpu->cpuid_cache.enlightenments_eax = 0; -- hv_vcpu->cpuid_cache.enlightenments_ebx = 0; - } - - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); - if (entry) - hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax; -- else -- hv_vcpu->cpuid_cache.syndbg_cap_eax = 0; - } - - int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce) --- -2.35.1 - diff --git a/kvm-6.0/series b/kvm-6.0/series deleted file mode 100644 index 8ce90645a91..00000000000 --- a/kvm-6.0/series +++ /dev/null @@ -1,21 +0,0 @@ -kvm-x86-mmu-fix-memoryleak-in-kvm_mmu_vendor_module_.patch -kvm-x86-do-proper-cleanup-if-kvm_x86_ops-vm_init-fai.patch -kvm-fix-memoryleak-in-kvm_init.patch -kvm-x86-zero-out-entire-hyper-v-cpuid-cache-before-p.patch -kvm-x86-check-for-existing-hyper-v-vcpu-in-kvm_hv_vc.patch -kvm-x86-report-error-when-setting-cpuid-if-hyper-v-a.patch -kvm-nvmx-treat-general-detect-db-dr7.gd-1-as-fault-l.patch -kvm-nvmx-prioritize-tss-t-flag-dbs-over-monitor-trap.patch -kvm-nvmx-ignore-sipi-that-arrives-in-l2-when-vcpu-is.patch -kvm-vmx-inject-pf-on-encls-as-emulated-pf.patch -kvm-nvmx-unconditionally-clear-mtf_pending-on-nested.patch -kvm-x86-make-kvm_queued_exception-a-properly-named-v.patch -kvm-x86-formalize-blocking-of-nested-pending-excepti.patch -kvm-x86-hoist-nested-event-checks-above-event-inject.patch -kvm-x86-evaluate-ability-to-inject-smi-nmi-irq-after.patch -kvm-nvmx-add-a-helper-to-identify-low-priority-db-tr.patch -kvm-x86-morph-pending-exceptions-to-pending-vm-exits.patch -kvm-ppc-book3s-hv-fix-decrementer-migration.patch -kvm-ppc-book3s-hv-p9-fix-irq-disabling-in-tick-accou.patch -kvm-ppc-book3s-hv-p9-clear-vcpu-cpu-fields-before-en.patch -kvm-ppc-book3s-hv-p9-restore-stolen-time-logging-in-.patch