5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 14 May 2021 14:22:20 +0000 (16:22 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 14 May 2021 14:22:20 +0000 (16:22 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 14 May 2021 14:22:20 +0000 (16:22 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 14 May 2021 14:22:20 +0000 (16:22 +0200)
diff --git a/queue-5.10/kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch b/queue-5.10/kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch

new file mode 100644 (file)

index 0000000..515a292
--- /dev/null
+++ b/queue-5.10/kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch
@@ -0,0 +1,144 @@
+From a217a6593cec8b315d4c2f344bae33660b39b703 Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+Date: Tue, 4 May 2021 21:50:14 +0200
+Subject: KVM/VMX: Invoke NMI non-IST entry instead of IST entry
+
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+
+commit a217a6593cec8b315d4c2f344bae33660b39b703 upstream.
+
+In VMX, the host NMI handler needs to be invoked after NMI VM-Exit.
+Before commit 1a5488ef0dcf6 ("KVM: VMX: Invoke NMI handler via indirect
+call instead of INTn"), this was done by INTn ("int $2"). But INTn
+microcode is relatively expensive, so the commit reworked NMI VM-Exit
+handling to invoke the kernel handler by function call.
+
+But this missed a detail. The NMI entry point for direct invocation is
+fetched from the IDT table and called on the kernel stack.  But on 64-bit
+the NMI entry installed in the IDT expects to be invoked on the IST stack.
+It relies on the "NMI executing" variable on the IST stack to work
+correctly, which is at a fixed position in the IST stack.  When the entry
+point is unexpectedly called on the kernel stack, the RSP-addressed "NMI
+executing" variable is obviously also on the kernel stack and is
+"uninitialized" and can cause the NMI entry code to run in the wrong way.
+
+Provide a non-ist entry point for VMX which shares the C-function with
+the regular NMI entry and invoke the new asm entry point instead.
+
+On 32-bit this just maps to the regular NMI entry point as 32-bit has no
+ISTs and is not affected.
+
+[ tglx: Made it independent for backporting, massaged changelog ]
+
+Fixes: 1a5488ef0dcf6 ("KVM: VMX: Invoke NMI handler via indirect call instead of INTn")
+Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/87r1imi8i1.ffs@nanos.tec.linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/idtentry.h |   15 +++++++++++++++
+ arch/x86/kernel/nmi.c           |   10 ++++++++++
+ arch/x86/kvm/vmx/vmx.c          |   16 +++++++++-------
+ 3 files changed, 34 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/idtentry.h
++++ b/arch/x86/include/asm/idtentry.h
+@@ -588,6 +588,21 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_ma
+ #endif
+ 
+ /* NMI */
++
++#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
++/*
++ * Special NOIST entry point for VMX which invokes this on the kernel
++ * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI
++ * 'executing' marker.
++ *
++ * On 32bit this just uses the regular NMI entry point because 32-bit does
++ * not have ISTs.
++ */
++DECLARE_IDTENTRY(X86_TRAP_NMI,                exc_nmi_noist);
++#else
++#define asm_exc_nmi_noist             asm_exc_nmi
++#endif
++
+ DECLARE_IDTENTRY_NMI(X86_TRAP_NMI,    exc_nmi);
+ #ifdef CONFIG_XEN_PV
+ DECLARE_IDTENTRY_RAW(X86_TRAP_NMI,    xenpv_exc_nmi);
+--- a/arch/x86/kernel/nmi.c
++++ b/arch/x86/kernel/nmi.c
+@@ -524,6 +524,16 @@ nmi_restart:
+               mds_user_clear_cpu_buffers();
+ }
+ 
++#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
++DEFINE_IDTENTRY_RAW(exc_nmi_noist)
++{
++      exc_nmi(regs);
++}
++#endif
++#if IS_MODULE(CONFIG_KVM_INTEL)
++EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
++#endif
++
+ void stop_nmi(void)
+ {
+       ignore_nmis++;
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -36,6 +36,7 @@
+ #include <asm/debugreg.h>
+ #include <asm/desc.h>
+ #include <asm/fpu/internal.h>
++#include <asm/idtentry.h>
+ #include <asm/io.h>
+ #include <asm/irq_remapping.h>
+ #include <asm/kexec.h>
+@@ -6354,18 +6355,17 @@ static void vmx_apicv_post_state_restore
+ 
+ void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
+ 
+-static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
++static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
++                                      unsigned long entry)
+ {
+-      unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
+-      gate_desc *desc = (gate_desc *)host_idt_base + vector;
+-
+       kvm_before_interrupt(vcpu);
+-      vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
++      vmx_do_interrupt_nmi_irqoff(entry);
+       kvm_after_interrupt(vcpu);
+ }
+ 
+ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
+ {
++      const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist;
+       u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
+ 
+       /* if exit due to PF check for async PF */
+@@ -6376,18 +6376,20 @@ static void handle_exception_nmi_irqoff(
+               kvm_machine_check();
+       /* We need to handle NMIs before interrupts are enabled */
+       else if (is_nmi(intr_info))
+-              handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info);
++              handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
+ }
+ 
+ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
+ {
+       u32 intr_info = vmx_get_intr_info(vcpu);
++      unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
++      gate_desc *desc = (gate_desc *)host_idt_base + vector;
+ 
+       if (WARN_ONCE(!is_external_intr(intr_info),
+           "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
+               return;
+ 
+-      handle_interrupt_nmi_irqoff(vcpu, intr_info);
++      handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
+ }
+ 
+ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
diff --git a/queue-5.10/kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch b/queue-5.10/kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch

new file mode 100644 (file)

index 0000000..6ebe394
--- /dev/null
+++ b/queue-5.10/kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch
@@ -0,0 +1,178 @@
+From c5e2184d1544f9e56140791eff1a351bea2e63b9 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 14 Jan 2021 16:40:51 -0800
+Subject: KVM: x86/mmu: Remove the defunct update_pte() paging hook
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c5e2184d1544f9e56140791eff1a351bea2e63b9 upstream.
+
+Remove the update_pte() shadow paging logic, which was obsoleted by
+commit 4731d4c7a077 ("KVM: MMU: out of sync shadow core"), but never
+removed.  As pointed out by Yu, KVM never write protects leaf page
+tables for the purposes of shadow paging, and instead marks their
+associated shadow page as unsync so that the guest can write PTEs at
+will.
+
+The update_pte() path, which predates the unsync logic, optimizes COW
+scenarios by refreshing leaf SPTEs when they are written, as opposed to
+zapping the SPTE, restarting the guest, and installing the new SPTE on
+the subsequent fault.  Since KVM no longer write-protects leaf page
+tables, update_pte() is unreachable and can be dropped.
+
+Reported-by: Yu Zhang <yu.c.zhang@intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210115004051.4099250-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    3 --
+ arch/x86/kvm/mmu/mmu.c          |   49 +---------------------------------------
+ arch/x86/kvm/x86.c              |    1 
+ 3 files changed, 2 insertions(+), 51 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -358,8 +358,6 @@ struct kvm_mmu {
+       int (*sync_page)(struct kvm_vcpu *vcpu,
+                        struct kvm_mmu_page *sp);
+       void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
+-      void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+-                         u64 *spte, const void *pte);
+       hpa_t root_hpa;
+       gpa_t root_pgd;
+       union kvm_mmu_role mmu_role;
+@@ -1019,7 +1017,6 @@ struct kvm_arch {
+ struct kvm_vm_stat {
+       ulong mmu_shadow_zapped;
+       ulong mmu_pte_write;
+-      ulong mmu_pte_updated;
+       ulong mmu_pde_zapped;
+       ulong mmu_flooded;
+       ulong mmu_recycled;
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -1715,13 +1715,6 @@ static int nonpaging_sync_page(struct kv
+       return 0;
+ }
+ 
+-static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
+-                               struct kvm_mmu_page *sp, u64 *spte,
+-                               const void *pte)
+-{
+-      WARN_ON(1);
+-}
+-
+ #define KVM_PAGE_ARRAY_NR 16
+ 
+ struct kvm_mmu_pages {
+@@ -3820,7 +3813,6 @@ static void nonpaging_init_context(struc
+       context->gva_to_gpa = nonpaging_gva_to_gpa;
+       context->sync_page = nonpaging_sync_page;
+       context->invlpg = NULL;
+-      context->update_pte = nonpaging_update_pte;
+       context->root_level = 0;
+       context->shadow_root_level = PT32E_ROOT_LEVEL;
+       context->direct_map = true;
+@@ -4402,7 +4394,6 @@ static void paging64_init_context_common
+       context->gva_to_gpa = paging64_gva_to_gpa;
+       context->sync_page = paging64_sync_page;
+       context->invlpg = paging64_invlpg;
+-      context->update_pte = paging64_update_pte;
+       context->shadow_root_level = level;
+       context->direct_map = false;
+ }
+@@ -4431,7 +4422,6 @@ static void paging32_init_context(struct
+       context->gva_to_gpa = paging32_gva_to_gpa;
+       context->sync_page = paging32_sync_page;
+       context->invlpg = paging32_invlpg;
+-      context->update_pte = paging32_update_pte;
+       context->shadow_root_level = PT32E_ROOT_LEVEL;
+       context->direct_map = false;
+ }
+@@ -4513,7 +4503,6 @@ static void init_kvm_tdp_mmu(struct kvm_
+       context->page_fault = kvm_tdp_page_fault;
+       context->sync_page = nonpaging_sync_page;
+       context->invlpg = NULL;
+-      context->update_pte = nonpaging_update_pte;
+       context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu);
+       context->direct_map = true;
+       context->get_guest_pgd = get_cr3;
+@@ -4690,7 +4679,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_
+       context->gva_to_gpa = ept_gva_to_gpa;
+       context->sync_page = ept_sync_page;
+       context->invlpg = ept_invlpg;
+-      context->update_pte = ept_update_pte;
+       context->root_level = level;
+       context->direct_map = false;
+       context->mmu_role.as_u64 = new_role.as_u64;
+@@ -4838,19 +4826,6 @@ void kvm_mmu_unload(struct kvm_vcpu *vcp
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_unload);
+ 
+-static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
+-                                struct kvm_mmu_page *sp, u64 *spte,
+-                                const void *new)
+-{
+-      if (sp->role.level != PG_LEVEL_4K) {
+-              ++vcpu->kvm->stat.mmu_pde_zapped;
+-              return;
+-        }
+-
+-      ++vcpu->kvm->stat.mmu_pte_updated;
+-      vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
+-}
+-
+ static bool need_remote_flush(u64 old, u64 new)
+ {
+       if (!is_shadow_present_pte(old))
+@@ -4966,22 +4941,6 @@ static u64 *get_written_sptes(struct kvm
+       return spte;
+ }
+ 
+-/*
+- * Ignore various flags when determining if a SPTE can be immediately
+- * overwritten for the current MMU.
+- *  - level: explicitly checked in mmu_pte_write_new_pte(), and will never
+- *    match the current MMU role, as MMU's level tracks the root level.
+- *  - access: updated based on the new guest PTE
+- *  - quadrant: handled by get_written_sptes()
+- *  - invalid: always false (loop only walks valid shadow pages)
+- */
+-static const union kvm_mmu_page_role role_ign = {
+-      .level = 0xf,
+-      .access = 0x7,
+-      .quadrant = 0x3,
+-      .invalid = 0x1,
+-};
+-
+ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+                             const u8 *new, int bytes,
+                             struct kvm_page_track_notifier_node *node)
+@@ -5032,14 +4991,10 @@ static void kvm_mmu_pte_write(struct kvm
+ 
+               local_flush = true;
+               while (npte--) {
+-                      u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
+-
+                       entry = *spte;
+                       mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL);
+-                      if (gentry &&
+-                          !((sp->role.word ^ base_role) & ~role_ign.word) &&
+-                          rmap_can_add(vcpu))
+-                              mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
++                      if (gentry && sp->role.level != PG_LEVEL_4K)
++                              ++vcpu->kvm->stat.mmu_pde_zapped;
+                       if (need_remote_flush(entry, *spte))
+                               remote_flush = true;
+                       ++spte;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -233,7 +233,6 @@ struct kvm_stats_debugfs_item debugfs_en
+       VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+       VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
+       VM_STAT("mmu_pte_write", mmu_pte_write),
+-      VM_STAT("mmu_pte_updated", mmu_pte_updated),
+       VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
+       VM_STAT("mmu_flooded", mmu_flooded),
+       VM_STAT("mmu_recycled", mmu_recycled),
diff --git a/queue-5.10/series b/queue-5.10/series

index 2acbcdbf6d5d3fec4a111cea07555bb26c817a9a..3af15e8637242fe23f3d1b8bc5d30e875aecfa10 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -2,3 +2,5 @@ keys-trusted-fix-memory-leak-on-object-td.patch
  tpm-fix-error-return-code-in-tpm2_get_cc_attrs_tbl.patch
  tpm-tpm_tis-extend-locality-handling-to-tpm2-in-tpm_tis_gen_interrupt.patch
  tpm-tpm_tis-reserve-locality-in-tpm_tis_resume.patch
+kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch
+kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 14 May 2021 14:22:20 +0000 (16:22 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 14 May 2021 14:22:20 +0000 (16:22 +0200)
queue-5.10/kvm-vmx-invoke-nmi-non-ist-entry-instead-of-ist-entry.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kvm-x86-mmu-remove-the-defunct-update_pte-paging-hook.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history