From: Greg Kroah-Hartman Date: Fri, 14 May 2021 14:22:20 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v5.4.120~100 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c9bf52db0d573e55052d6ae61f6a2de53ba4fa87;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch --- diff --git a/queue-5.10/kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch b/queue-5.10/kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch new file mode 100644 index 00000000000..515a292b95f --- /dev/null +++ b/queue-5.10/kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch @@ -0,0 +1,144 @@ +From a217a6593cec8b315d4c2f344bae33660b39b703 Mon Sep 17 00:00:00 2001 +From: Lai Jiangshan +Date: Tue, 4 May 2021 21:50:14 +0200 +Subject: KVM/VMX: Invoke NMI non-IST entry instead of IST entry + +From: Lai Jiangshan + +commit a217a6593cec8b315d4c2f344bae33660b39b703 upstream. + +In VMX, the host NMI handler needs to be invoked after NMI VM-Exit. +Before commit 1a5488ef0dcf6 ("KVM: VMX: Invoke NMI handler via indirect +call instead of INTn"), this was done by INTn ("int $2"). But INTn +microcode is relatively expensive, so the commit reworked NMI VM-Exit +handling to invoke the kernel handler by function call. + +But this missed a detail. The NMI entry point for direct invocation is +fetched from the IDT table and called on the kernel stack. But on 64-bit +the NMI entry installed in the IDT expects to be invoked on the IST stack. +It relies on the "NMI executing" variable on the IST stack to work +correctly, which is at a fixed position in the IST stack. When the entry +point is unexpectedly called on the kernel stack, the RSP-addressed "NMI +executing" variable is obviously also on the kernel stack and is +"uninitialized" and can cause the NMI entry code to run in the wrong way. + +Provide a non-ist entry point for VMX which shares the C-function with +the regular NMI entry and invoke the new asm entry point instead. + +On 32-bit this just maps to the regular NMI entry point as 32-bit has no +ISTs and is not affected. + +[ tglx: Made it independent for backporting, massaged changelog ] + +Fixes: 1a5488ef0dcf6 ("KVM: VMX: Invoke NMI handler via indirect call instead of INTn") +Signed-off-by: Lai Jiangshan +Signed-off-by: Thomas Gleixner +Tested-by: Lai Jiangshan +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/87r1imi8i1.ffs@nanos.tec.linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/idtentry.h | 15 +++++++++++++++ + arch/x86/kernel/nmi.c | 10 ++++++++++ + arch/x86/kvm/vmx/vmx.c | 16 +++++++++------- + 3 files changed, 34 insertions(+), 7 deletions(-) + +--- a/arch/x86/include/asm/idtentry.h ++++ b/arch/x86/include/asm/idtentry.h +@@ -588,6 +588,21 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_ma + #endif + + /* NMI */ ++ ++#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) ++/* ++ * Special NOIST entry point for VMX which invokes this on the kernel ++ * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI ++ * 'executing' marker. ++ * ++ * On 32bit this just uses the regular NMI entry point because 32-bit does ++ * not have ISTs. ++ */ ++DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_noist); ++#else ++#define asm_exc_nmi_noist asm_exc_nmi ++#endif ++ + DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); + #ifdef CONFIG_XEN_PV + DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi); +--- a/arch/x86/kernel/nmi.c ++++ b/arch/x86/kernel/nmi.c +@@ -524,6 +524,16 @@ nmi_restart: + mds_user_clear_cpu_buffers(); + } + ++#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) ++DEFINE_IDTENTRY_RAW(exc_nmi_noist) ++{ ++ exc_nmi(regs); ++} ++#endif ++#if IS_MODULE(CONFIG_KVM_INTEL) ++EXPORT_SYMBOL_GPL(asm_exc_nmi_noist); ++#endif ++ + void stop_nmi(void) + { + ignore_nmis++; +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -6354,18 +6355,17 @@ static void vmx_apicv_post_state_restore + + void vmx_do_interrupt_nmi_irqoff(unsigned long entry); + +-static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info) ++static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, ++ unsigned long entry) + { +- unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; +- gate_desc *desc = (gate_desc *)host_idt_base + vector; +- + kvm_before_interrupt(vcpu); +- vmx_do_interrupt_nmi_irqoff(gate_offset(desc)); ++ vmx_do_interrupt_nmi_irqoff(entry); + kvm_after_interrupt(vcpu); + } + + static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) + { ++ const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist; + u32 intr_info = vmx_get_intr_info(&vmx->vcpu); + + /* if exit due to PF check for async PF */ +@@ -6376,18 +6376,20 @@ static void handle_exception_nmi_irqoff( + kvm_machine_check(); + /* We need to handle NMIs before interrupts are enabled */ + else if (is_nmi(intr_info)) +- handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info); ++ handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry); + } + + static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) + { + u32 intr_info = vmx_get_intr_info(vcpu); ++ unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; ++ gate_desc *desc = (gate_desc *)host_idt_base + vector; + + if (WARN_ONCE(!is_external_intr(intr_info), + "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) + return; + +- handle_interrupt_nmi_irqoff(vcpu, intr_info); ++ handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); + } + + static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) diff --git a/queue-5.10/kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch b/queue-5.10/kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch new file mode 100644 index 00000000000..6ebe394e3c4 --- /dev/null +++ b/queue-5.10/kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch @@ -0,0 +1,178 @@ +From c5e2184d1544f9e56140791eff1a351bea2e63b9 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 14 Jan 2021 16:40:51 -0800 +Subject: KVM: x86/mmu: Remove the defunct update_pte() paging hook + +From: Sean Christopherson + +commit c5e2184d1544f9e56140791eff1a351bea2e63b9 upstream. + +Remove the update_pte() shadow paging logic, which was obsoleted by +commit 4731d4c7a077 ("KVM: MMU: out of sync shadow core"), but never +removed. As pointed out by Yu, KVM never write protects leaf page +tables for the purposes of shadow paging, and instead marks their +associated shadow page as unsync so that the guest can write PTEs at +will. + +The update_pte() path, which predates the unsync logic, optimizes COW +scenarios by refreshing leaf SPTEs when they are written, as opposed to +zapping the SPTE, restarting the guest, and installing the new SPTE on +the subsequent fault. Since KVM no longer write-protects leaf page +tables, update_pte() is unreachable and can be dropped. + +Reported-by: Yu Zhang +Signed-off-by: Sean Christopherson +Message-Id: <20210115004051.4099250-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 3 -- + arch/x86/kvm/mmu/mmu.c | 49 +--------------------------------------- + arch/x86/kvm/x86.c | 1 + 3 files changed, 2 insertions(+), 51 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -358,8 +358,6 @@ struct kvm_mmu { + int (*sync_page)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp); + void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); +- void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +- u64 *spte, const void *pte); + hpa_t root_hpa; + gpa_t root_pgd; + union kvm_mmu_role mmu_role; +@@ -1019,7 +1017,6 @@ struct kvm_arch { + struct kvm_vm_stat { + ulong mmu_shadow_zapped; + ulong mmu_pte_write; +- ulong mmu_pte_updated; + ulong mmu_pde_zapped; + ulong mmu_flooded; + ulong mmu_recycled; +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -1715,13 +1715,6 @@ static int nonpaging_sync_page(struct kv + return 0; + } + +-static void nonpaging_update_pte(struct kvm_vcpu *vcpu, +- struct kvm_mmu_page *sp, u64 *spte, +- const void *pte) +-{ +- WARN_ON(1); +-} +- + #define KVM_PAGE_ARRAY_NR 16 + + struct kvm_mmu_pages { +@@ -3820,7 +3813,6 @@ static void nonpaging_init_context(struc + context->gva_to_gpa = nonpaging_gva_to_gpa; + context->sync_page = nonpaging_sync_page; + context->invlpg = NULL; +- context->update_pte = nonpaging_update_pte; + context->root_level = 0; + context->shadow_root_level = PT32E_ROOT_LEVEL; + context->direct_map = true; +@@ -4402,7 +4394,6 @@ static void paging64_init_context_common + context->gva_to_gpa = paging64_gva_to_gpa; + context->sync_page = paging64_sync_page; + context->invlpg = paging64_invlpg; +- context->update_pte = paging64_update_pte; + context->shadow_root_level = level; + context->direct_map = false; + } +@@ -4431,7 +4422,6 @@ static void paging32_init_context(struct + context->gva_to_gpa = paging32_gva_to_gpa; + context->sync_page = paging32_sync_page; + context->invlpg = paging32_invlpg; +- context->update_pte = paging32_update_pte; + context->shadow_root_level = PT32E_ROOT_LEVEL; + context->direct_map = false; + } +@@ -4513,7 +4503,6 @@ static void init_kvm_tdp_mmu(struct kvm_ + context->page_fault = kvm_tdp_page_fault; + context->sync_page = nonpaging_sync_page; + context->invlpg = NULL; +- context->update_pte = nonpaging_update_pte; + context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu); + context->direct_map = true; + context->get_guest_pgd = get_cr3; +@@ -4690,7 +4679,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_ + context->gva_to_gpa = ept_gva_to_gpa; + context->sync_page = ept_sync_page; + context->invlpg = ept_invlpg; +- context->update_pte = ept_update_pte; + context->root_level = level; + context->direct_map = false; + context->mmu_role.as_u64 = new_role.as_u64; +@@ -4838,19 +4826,6 @@ void kvm_mmu_unload(struct kvm_vcpu *vcp + } + EXPORT_SYMBOL_GPL(kvm_mmu_unload); + +-static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, +- struct kvm_mmu_page *sp, u64 *spte, +- const void *new) +-{ +- if (sp->role.level != PG_LEVEL_4K) { +- ++vcpu->kvm->stat.mmu_pde_zapped; +- return; +- } +- +- ++vcpu->kvm->stat.mmu_pte_updated; +- vcpu->arch.mmu->update_pte(vcpu, sp, spte, new); +-} +- + static bool need_remote_flush(u64 old, u64 new) + { + if (!is_shadow_present_pte(old)) +@@ -4966,22 +4941,6 @@ static u64 *get_written_sptes(struct kvm + return spte; + } + +-/* +- * Ignore various flags when determining if a SPTE can be immediately +- * overwritten for the current MMU. +- * - level: explicitly checked in mmu_pte_write_new_pte(), and will never +- * match the current MMU role, as MMU's level tracks the root level. +- * - access: updated based on the new guest PTE +- * - quadrant: handled by get_written_sptes() +- * - invalid: always false (loop only walks valid shadow pages) +- */ +-static const union kvm_mmu_page_role role_ign = { +- .level = 0xf, +- .access = 0x7, +- .quadrant = 0x3, +- .invalid = 0x1, +-}; +- + static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *new, int bytes, + struct kvm_page_track_notifier_node *node) +@@ -5032,14 +4991,10 @@ static void kvm_mmu_pte_write(struct kvm + + local_flush = true; + while (npte--) { +- u32 base_role = vcpu->arch.mmu->mmu_role.base.word; +- + entry = *spte; + mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); +- if (gentry && +- !((sp->role.word ^ base_role) & ~role_ign.word) && +- rmap_can_add(vcpu)) +- mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); ++ if (gentry && sp->role.level != PG_LEVEL_4K) ++ ++vcpu->kvm->stat.mmu_pde_zapped; + if (need_remote_flush(entry, *spte)) + remote_flush = true; + ++spte; +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -233,7 +233,6 @@ struct kvm_stats_debugfs_item debugfs_en + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), + VM_STAT("mmu_pte_write", mmu_pte_write), +- VM_STAT("mmu_pte_updated", mmu_pte_updated), + VM_STAT("mmu_pde_zapped", mmu_pde_zapped), + VM_STAT("mmu_flooded", mmu_flooded), + VM_STAT("mmu_recycled", mmu_recycled), diff --git a/queue-5.10/series b/queue-5.10/series index 2acbcdbf6d5..3af15e86372 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -2,3 +2,5 @@ keys-trusted-fix-memory-leak-on-object-td.patch tpm-fix-error-return-code-in-tpm2_get_cc_attrs_tbl.patch tpm-tpm_tis-extend-locality-handling-to-tpm2-in-tpm_tis_gen_interrupt.patch tpm-tpm_tis-reserve-locality-in-tpm_tis_resume.patch +kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch +kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch