From: Greg Kroah-Hartman Date: Sun, 5 Dec 2021 12:44:11 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v4.4.294~41 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=32db3ea8e2ad5b74e1b9b3a3f0b968f59d5cf321;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch kvm-mmu-shadow-nested-paging-does-not-have-pku.patch kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch tracing-histograms-string-compares-should-not-care-about-signed-values.patch --- diff --git a/queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch b/queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch new file mode 100644 index 00000000000..a723fde5128 --- /dev/null +++ b/queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch @@ -0,0 +1,54 @@ +From 1f80d15020d7f130194821feb1432b67648c632d Mon Sep 17 00:00:00 2001 +From: Catalin Marinas +Date: Thu, 25 Nov 2021 15:20:14 +0000 +Subject: KVM: arm64: Avoid setting the upper 32 bits of TCR_EL2 and CPTR_EL2 to 1 + +From: Catalin Marinas + +commit 1f80d15020d7f130194821feb1432b67648c632d upstream. + +Having a signed (1 << 31) constant for TCR_EL2_RES1 and CPTR_EL2_TCPAC +causes the upper 32-bit to be set to 1 when assigning them to a 64-bit +variable. Bit 32 in TCR_EL2 is no longer RES0 in ARMv8.7: with FEAT_LPA2 +it changes the meaning of bits 49:48 and 9:8 in the stage 1 EL2 page +table entries. As a result of the sign-extension, a non-VHE kernel can +no longer boot on a model with ARMv8.7 enabled. + +CPTR_EL2 still has the top 32 bits RES0 but we should preempt any future +problems + +Make these top bit constants unsigned as per commit df655b75c43f +("arm64: KVM: Avoid setting the upper 32 bits of VTCR_EL2 to 1"). + +Signed-off-by: Catalin Marinas +Reported-by: Chris January +Cc: +Cc: Will Deacon +Cc: Marc Zyngier +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20211125152014.2806582-1-catalin.marinas@arm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/kvm_arm.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/arm64/include/asm/kvm_arm.h ++++ b/arch/arm64/include/asm/kvm_arm.h +@@ -91,7 +91,7 @@ + #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) + + /* TCR_EL2 Registers bits */ +-#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) ++#define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) + #define TCR_EL2_TBI (1 << 20) + #define TCR_EL2_PS_SHIFT 16 + #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) +@@ -276,7 +276,7 @@ + #define CPTR_EL2_TFP_SHIFT 10 + + /* Hyp Coprocessor Trap Register */ +-#define CPTR_EL2_TCPAC (1 << 31) ++#define CPTR_EL2_TCPAC (1U << 31) + #define CPTR_EL2_TAM (1 << 30) + #define CPTR_EL2_TTA (1 << 20) + #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) diff --git a/queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch b/queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch new file mode 100644 index 00000000000..575ae8f549c --- /dev/null +++ b/queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch @@ -0,0 +1,148 @@ +From bda44d844758c70c8dc1478e6fc9c25efa90c5a7 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 4 Nov 2021 00:25:02 +0000 +Subject: KVM: Ensure local memslot copies operate on up-to-date arch-specific data + +From: Sean Christopherson + +commit bda44d844758c70c8dc1478e6fc9c25efa90c5a7 upstream. + +When modifying memslots, snapshot the "old" memslot and copy it to the +"new" memslot's arch data after (re)acquiring slots_arch_lock. x86 can +change a memslot's arch data while memslot updates are in-progress so +long as it holds slots_arch_lock, thus snapshotting a memslot without +holding the lock can result in the consumption of stale data. + +Fixes: b10a038e84d1 ("KVM: mmu: Add slots_arch_lock for memslot arch fields") +Cc: stable@vger.kernel.org +Cc: Ben Gardon +Signed-off-by: Sean Christopherson +Message-Id: <20211104002531.1176691-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 47 +++++++++++++++++++++++++++++++---------------- + 1 file changed, 31 insertions(+), 16 deletions(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1523,11 +1523,10 @@ static struct kvm_memslots *kvm_dup_mems + + static int kvm_set_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, +- struct kvm_memory_slot *old, + struct kvm_memory_slot *new, int as_id, + enum kvm_mr_change change) + { +- struct kvm_memory_slot *slot; ++ struct kvm_memory_slot *slot, old; + struct kvm_memslots *slots; + int r; + +@@ -1558,7 +1557,7 @@ static int kvm_set_memslot(struct kvm *k + * Note, the INVALID flag needs to be in the appropriate entry + * in the freshly allocated memslots, not in @old or @new. + */ +- slot = id_to_memslot(slots, old->id); ++ slot = id_to_memslot(slots, new->id); + slot->flags |= KVM_MEMSLOT_INVALID; + + /* +@@ -1589,6 +1588,26 @@ static int kvm_set_memslot(struct kvm *k + kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id)); + } + ++ /* ++ * Make a full copy of the old memslot, the pointer will become stale ++ * when the memslots are re-sorted by update_memslots(), and the old ++ * memslot needs to be referenced after calling update_memslots(), e.g. ++ * to free its resources and for arch specific behavior. This needs to ++ * happen *after* (re)acquiring slots_arch_lock. ++ */ ++ slot = id_to_memslot(slots, new->id); ++ if (slot) { ++ old = *slot; ++ } else { ++ WARN_ON_ONCE(change != KVM_MR_CREATE); ++ memset(&old, 0, sizeof(old)); ++ old.id = new->id; ++ old.as_id = as_id; ++ } ++ ++ /* Copy the arch-specific data, again after (re)acquiring slots_arch_lock. */ ++ memcpy(&new->arch, &old.arch, sizeof(old.arch)); ++ + r = kvm_arch_prepare_memory_region(kvm, new, mem, change); + if (r) + goto out_slots; +@@ -1596,14 +1615,18 @@ static int kvm_set_memslot(struct kvm *k + update_memslots(slots, new, change); + slots = install_new_memslots(kvm, as_id, slots); + +- kvm_arch_commit_memory_region(kvm, mem, old, new, change); ++ kvm_arch_commit_memory_region(kvm, mem, &old, new, change); ++ ++ /* Free the old memslot's metadata. Note, this is the full copy!!! */ ++ if (change == KVM_MR_DELETE) ++ kvm_free_memslot(kvm, &old); + + kvfree(slots); + return 0; + + out_slots: + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { +- slot = id_to_memslot(slots, old->id); ++ slot = id_to_memslot(slots, new->id); + slot->flags &= ~KVM_MEMSLOT_INVALID; + slots = install_new_memslots(kvm, as_id, slots); + } else { +@@ -1618,7 +1641,6 @@ static int kvm_delete_memslot(struct kvm + struct kvm_memory_slot *old, int as_id) + { + struct kvm_memory_slot new; +- int r; + + if (!old->npages) + return -EINVAL; +@@ -1631,12 +1653,7 @@ static int kvm_delete_memslot(struct kvm + */ + new.as_id = as_id; + +- r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE); +- if (r) +- return r; +- +- kvm_free_memslot(kvm, old); +- return 0; ++ return kvm_set_memslot(kvm, mem, &new, as_id, KVM_MR_DELETE); + } + + /* +@@ -1711,7 +1728,6 @@ int __kvm_set_memory_region(struct kvm * + if (!old.npages) { + change = KVM_MR_CREATE; + new.dirty_bitmap = NULL; +- memset(&new.arch, 0, sizeof(new.arch)); + } else { /* Modify an existing slot. */ + if ((new.userspace_addr != old.userspace_addr) || + (new.npages != old.npages) || +@@ -1725,9 +1741,8 @@ int __kvm_set_memory_region(struct kvm * + else /* Nothing to change. */ + return 0; + +- /* Copy dirty_bitmap and arch from the current memslot. */ ++ /* Copy dirty_bitmap from the current memslot. */ + new.dirty_bitmap = old.dirty_bitmap; +- memcpy(&new.arch, &old.arch, sizeof(new.arch)); + } + + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { +@@ -1753,7 +1768,7 @@ int __kvm_set_memory_region(struct kvm * + bitmap_set(new.dirty_bitmap, 0, new.npages); + } + +- r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change); ++ r = kvm_set_memslot(kvm, mem, &new, as_id, change); + if (r) + goto out_bitmap; + diff --git a/queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch b/queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch new file mode 100644 index 00000000000..41181435221 --- /dev/null +++ b/queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch @@ -0,0 +1,39 @@ +From 28f091bc2f8c23b7eac2402956b692621be7f9f4 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 22 Nov 2021 13:01:37 -0500 +Subject: KVM: MMU: shadow nested paging does not have PKU + +From: Paolo Bonzini + +commit 28f091bc2f8c23b7eac2402956b692621be7f9f4 upstream. + +Initialize the mask for PKU permissions as if CR4.PKE=0, avoiding +incorrect interpretations of the nested hypervisor's page tables. + +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/mmu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -4852,7 +4852,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_ + struct kvm_mmu *context = &vcpu->arch.guest_mmu; + struct kvm_mmu_role_regs regs = { + .cr0 = cr0, +- .cr4 = cr4, ++ .cr4 = cr4 & ~X86_CR4_PKE, + .efer = efer, + }; + union kvm_mmu_role new_role; +@@ -4916,7 +4916,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_ + context->direct_map = false; + + update_permission_bitmask(context, true); +- update_pkru_bitmask(context); ++ context->pkru_mask = 0; + reset_rsvds_bits_mask_ept(vcpu, context, execonly); + reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); + } diff --git a/queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch b/queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch new file mode 100644 index 00000000000..eaf304d83ec --- /dev/null +++ b/queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch @@ -0,0 +1,123 @@ +From 40e5f9080472b614eeedcc5ba678289cd98d70df Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 25 Nov 2021 01:49:43 +0000 +Subject: KVM: nVMX: Abide to KVM_REQ_TLB_FLUSH_GUEST request on nested vmentry/vmexit + +From: Sean Christopherson + +commit 40e5f9080472b614eeedcc5ba678289cd98d70df upstream. + +Like KVM_REQ_TLB_FLUSH_CURRENT, the GUEST variant needs to be serviced at +nested transitions, as KVM doesn't track requests for L1 vs L2. E.g. if +there's a pending flush when a nested VM-Exit occurs, then the flush was +requested in the context of L2 and needs to be handled before switching +to L1, otherwise the flush for L2 would effectiely be lost. + +Opportunistically add a helper to handle CURRENT and GUEST as a pair, the +logic for when they need to be serviced is identical as both requests are +tied to L1 vs. L2, the only difference is the scope of the flush. + +Reported-by: Lai Jiangshan +Fixes: 07ffaf343e34 ("KVM: nVMX: Sync all PGDs on nested transition with shadow paging") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20211125014944.536398-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 8 +++----- + arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++---- + arch/x86/kvm/x86.h | 7 +------ + 3 files changed, 28 insertions(+), 15 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -3355,8 +3355,7 @@ enum nvmx_vmentry_status nested_vmx_ente + }; + u32 failed_index; + +- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) +- kvm_vcpu_flush_tlb_current(vcpu); ++ kvm_service_local_tlb_flush_requests(vcpu); + + evaluate_pending_interrupts = exec_controls_get(vmx) & + (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); +@@ -4513,9 +4512,8 @@ void nested_vmx_vmexit(struct kvm_vcpu * + (void)nested_get_evmcs_page(vcpu); + } + +- /* Service the TLB flush request for L2 before switching to L1. */ +- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) +- kvm_vcpu_flush_tlb_current(vcpu); ++ /* Service pending TLB flush requests for L2 before switching to L1. */ ++ kvm_service_local_tlb_flush_requests(vcpu); + + /* + * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3193,6 +3193,29 @@ static void kvm_vcpu_flush_tlb_guest(str + static_call(kvm_x86_tlb_flush_guest)(vcpu); + } + ++ ++static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) ++{ ++ ++vcpu->stat.tlb_flush; ++ static_call(kvm_x86_tlb_flush_current)(vcpu); ++} ++ ++/* ++ * Service "local" TLB flush requests, which are specific to the current MMU ++ * context. In addition to the generic event handling in vcpu_enter_guest(), ++ * TLB flushes that are targeted at an MMU context also need to be serviced ++ * prior before nested VM-Enter/VM-Exit. ++ */ ++void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu) ++{ ++ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) ++ kvm_vcpu_flush_tlb_current(vcpu); ++ ++ if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) ++ kvm_vcpu_flush_tlb_guest(vcpu); ++} ++EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests); ++ + static void record_steal_time(struct kvm_vcpu *vcpu) + { + struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; +@@ -9530,10 +9553,7 @@ static int vcpu_enter_guest(struct kvm_v + /* Flushing all ASIDs flushes the current ASID... */ + kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } +- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) +- kvm_vcpu_flush_tlb_current(vcpu); +- if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) +- kvm_vcpu_flush_tlb_guest(vcpu); ++ kvm_service_local_tlb_flush_requests(vcpu); + + if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; +--- a/arch/x86/kvm/x86.h ++++ b/arch/x86/kvm/x86.h +@@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_ + + #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL + ++void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); + int kvm_check_nested_events(struct kvm_vcpu *vcpu); + + static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) +@@ -185,12 +186,6 @@ static inline bool mmu_is_nested(struct + return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; + } + +-static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) +-{ +- ++vcpu->stat.tlb_flush; +- static_call(kvm_x86_tlb_flush_current)(vcpu); +-} +- + static inline int is_pae(struct kvm_vcpu *vcpu) + { + return kvm_read_cr4_bits(vcpu, X86_CR4_PAE); diff --git a/queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch b/queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch new file mode 100644 index 00000000000..05ffbd97a5a --- /dev/null +++ b/queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch @@ -0,0 +1,82 @@ +From 712494de96f35f3e146b36b752c2afe0fdc0f0cc Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 25 Nov 2021 01:49:44 +0000 +Subject: KVM: nVMX: Emulate guest TLB flush on nested VM-Enter with new vpid12 + +From: Sean Christopherson + +commit 712494de96f35f3e146b36b752c2afe0fdc0f0cc upstream. + +Fully emulate a guest TLB flush on nested VM-Enter which changes vpid12, +i.e. L2's VPID, instead of simply doing INVVPID to flush real hardware's +TLB entries for vpid02. From L1's perspective, changing L2's VPID is +effectively a TLB flush unless "hardware" has previously cached entries +for the new vpid12. Because KVM tracks only a single vpid12, KVM doesn't +know if the new vpid12 has been used in the past and so must treat it as +a brand new, never been used VPID, i.e. must assume that the new vpid12 +represents a TLB flush from L1's perspective. + +For example, if L1 and L2 share a CR3, the first VM-Enter to L2 (with a +VPID) is effectively a TLB flush as hardware/KVM has never seen vpid12 +and thus can't have cached entries in the TLB for vpid12. + +Reported-by: Lai Jiangshan +Fixes: 5c614b3583e7 ("KVM: nVMX: nested VPID emulation") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20211125014944.536398-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 37 +++++++++++++++++-------------------- + 1 file changed, 17 insertions(+), 20 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -1180,29 +1180,26 @@ static void nested_vmx_transition_tlb_fl + WARN_ON(!enable_vpid); + + /* +- * If VPID is enabled and used by vmc12, but L2 does not have a unique +- * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate +- * a VPID for L2, flush the current context as the effective ASID is +- * common to both L1 and L2. +- * +- * Defer the flush so that it runs after vmcs02.EPTP has been set by +- * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid +- * redundant flushes further down the nested pipeline. +- * +- * If a TLB flush isn't required due to any of the above, and vpid12 is +- * changing then the new "virtual" VPID (vpid12) will reuse the same +- * "real" VPID (vpid02), and so needs to be flushed. There's no direct +- * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for +- * all nested vCPUs. Remember, a flush on VM-Enter does not invalidate +- * guest-physical mappings, so there is no need to sync the nEPT MMU. ++ * VPID is enabled and in use by vmcs12. If vpid12 is changing, then ++ * emulate a guest TLB flush as KVM does not track vpid12 history nor ++ * is the VPID incorporated into the MMU context. I.e. KVM must assume ++ * that the new vpid12 has never been used and thus represents a new ++ * guest ASID that cannot have entries in the TLB. + */ +- if (!nested_has_guest_tlb_tag(vcpu)) { +- kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); +- } else if (is_vmenter && +- vmcs12->virtual_processor_id != vmx->nested.last_vpid) { ++ if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) { + vmx->nested.last_vpid = vmcs12->virtual_processor_id; +- vpid_sync_context(nested_get_vpid02(vcpu)); ++ kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); ++ return; + } ++ ++ /* ++ * If VPID is enabled, used by vmc12, and vpid12 is not changing but ++ * does not have a unique TLB tag (ASID), i.e. EPT is disabled and ++ * KVM was unable to allocate a VPID for L2, flush the current context ++ * as the effective ASID is common to both L1 and L2. ++ */ ++ if (!nested_has_guest_tlb_tag(vcpu)) ++ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } + + static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) diff --git a/queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch b/queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch new file mode 100644 index 00000000000..a6ea76a7164 --- /dev/null +++ b/queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch @@ -0,0 +1,81 @@ +From 2b4a5a5d56881ece3c66b9a9a8943a6f41bd7349 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 25 Nov 2021 01:49:43 +0000 +Subject: KVM: nVMX: Flush current VPID (L1 vs. L2) for KVM_REQ_TLB_FLUSH_GUEST + +From: Sean Christopherson + +commit 2b4a5a5d56881ece3c66b9a9a8943a6f41bd7349 upstream. + +Flush the current VPID when handling KVM_REQ_TLB_FLUSH_GUEST instead of +always flushing vpid01. Any TLB flush that is triggered when L2 is +active is scoped to L2's VPID (if it has one), e.g. if L2 toggles CR4.PGE +and L1 doesn't intercept PGE writes, then KVM's emulation of the TLB +flush needs to be applied to L2's VPID. + +Reported-by: Lai Jiangshan +Fixes: 07ffaf343e34 ("KVM: nVMX: Sync all PGDs on nested transition with shadow paging") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20211125014944.536398-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -2916,6 +2916,13 @@ static void vmx_flush_tlb_all(struct kvm + } + } + ++static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) ++{ ++ if (is_guest_mode(vcpu)) ++ return nested_get_vpid02(vcpu); ++ return to_vmx(vcpu)->vpid; ++} ++ + static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) + { + struct kvm_mmu *mmu = vcpu->arch.mmu; +@@ -2928,31 +2935,29 @@ static void vmx_flush_tlb_current(struct + if (enable_ept) + ept_sync_context(construct_eptp(vcpu, root_hpa, + mmu->shadow_root_level)); +- else if (!is_guest_mode(vcpu)) +- vpid_sync_context(to_vmx(vcpu)->vpid); + else +- vpid_sync_context(nested_get_vpid02(vcpu)); ++ vpid_sync_context(vmx_get_current_vpid(vcpu)); + } + + static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) + { + /* +- * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in ++ * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in + * vmx_flush_tlb_guest() for an explanation of why this is ok. + */ +- vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr); ++ vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr); + } + + static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) + { + /* +- * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0 +- * or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit +- * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is ++ * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a ++ * vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are ++ * required to flush GVA->{G,H}PA mappings from the TLB if vpid is + * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed), + * i.e. no explicit INVVPID is necessary. + */ +- vpid_sync_context(to_vmx(vcpu)->vpid); ++ vpid_sync_context(vmx_get_current_vpid(vcpu)); + } + + void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) diff --git a/queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch b/queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch new file mode 100644 index 00000000000..f182a080baa --- /dev/null +++ b/queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch @@ -0,0 +1,89 @@ +From 7e1901f6c86c896acff6609e0176f93f756d8b2a Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 22 Nov 2021 19:43:09 -0500 +Subject: KVM: VMX: prepare sync_pir_to_irr for running with APICv disabled + +From: Paolo Bonzini + +commit 7e1901f6c86c896acff6609e0176f93f756d8b2a upstream. + +If APICv is disabled for this vCPU, assigned devices may still attempt to +post interrupts. In that case, we need to cancel the vmentry and deliver +the interrupt with KVM_REQ_EVENT. Extend the existing code that handles +injection of L1 interrupts into L2 to cover this case as well. + +vmx_hwapic_irr_update is only called when APICv is active so it would be +confusing to add a check for vcpu->arch.apicv_active in there. Instead, +just use vmx_set_rvi directly in vmx_sync_pir_to_irr. + +Cc: stable@vger.kernel.org +Reviewed-by: Maxim Levitsky +Reviewed-by: David Matlack +Reviewed-by: Sean Christopherson +Message-Id: <20211123004311.2954158-3-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 39 +++++++++++++++++++++++++-------------- + 1 file changed, 25 insertions(+), 14 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6242,9 +6242,9 @@ static int vmx_sync_pir_to_irr(struct kv + { + struct vcpu_vmx *vmx = to_vmx(vcpu); + int max_irr; +- bool max_irr_updated; ++ bool got_posted_interrupt; + +- if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm)) ++ if (KVM_BUG_ON(!enable_apicv, vcpu->kvm)) + return -EIO; + + if (pi_test_on(&vmx->pi_desc)) { +@@ -6254,22 +6254,33 @@ static int vmx_sync_pir_to_irr(struct kv + * But on x86 this is just a compiler barrier anyway. + */ + smp_mb__after_atomic(); +- max_irr_updated = ++ got_posted_interrupt = + kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); +- +- /* +- * If we are running L2 and L1 has a new pending interrupt +- * which can be injected, this may cause a vmexit or it may +- * be injected into L2. Either way, this interrupt will be +- * processed via KVM_REQ_EVENT, not RVI, because we do not use +- * virtual interrupt delivery to inject L1 interrupts into L2. +- */ +- if (is_guest_mode(vcpu) && max_irr_updated) +- kvm_make_request(KVM_REQ_EVENT, vcpu); + } else { + max_irr = kvm_lapic_find_highest_irr(vcpu); ++ got_posted_interrupt = false; + } +- vmx_hwapic_irr_update(vcpu, max_irr); ++ ++ /* ++ * Newly recognized interrupts are injected via either virtual interrupt ++ * delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is ++ * disabled in two cases: ++ * ++ * 1) If L2 is running and the vCPU has a new pending interrupt. If L1 ++ * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a ++ * VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected ++ * into L2, but KVM doesn't use virtual interrupt delivery to inject ++ * interrupts into L2, and so KVM_REQ_EVENT is again needed. ++ * ++ * 2) If APICv is disabled for this vCPU, assigned devices may still ++ * attempt to post interrupts. The posted interrupt vector will cause ++ * a VM-Exit and the subsequent entry will call sync_pir_to_irr. ++ */ ++ if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu)) ++ vmx_set_rvi(max_irr); ++ else if (got_posted_interrupt) ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ + return max_irr; + } + diff --git a/queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch b/queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch new file mode 100644 index 00000000000..c2f16256197 --- /dev/null +++ b/queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch @@ -0,0 +1,110 @@ +From 37c4dbf337c5c2cdb24365ffae6ed70ac1e74d7a Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 22 Nov 2021 19:43:10 -0500 +Subject: KVM: x86: check PIR even for vCPUs with disabled APICv + +From: Paolo Bonzini + +commit 37c4dbf337c5c2cdb24365ffae6ed70ac1e74d7a upstream. + +The IRTE for an assigned device can trigger a POSTED_INTR_VECTOR even +if APICv is disabled on the vCPU that receives it. In that case, the +interrupt will just cause a vmexit and leave the ON bit set together +with the PIR bit corresponding to the interrupt. + +Right now, the interrupt would not be delivered until APICv is re-enabled. +However, fixing this is just a matter of always doing the PIR->IRR +synchronization, even if the vCPU has temporarily disabled APICv. + +This is not a problem for performance, or if anything it is an +improvement. First, in the common case where vcpu->arch.apicv_active is +true, one fewer check has to be performed. Second, static_call_cond will +elide the function call if APICv is not present or disabled. Finally, +in the case for AMD hardware we can remove the sync_pir_to_irr callback: +it is only needed for apic_has_interrupt_for_ppr, and that function +already has a fallback for !APICv. + +Cc: stable@vger.kernel.org +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Reviewed-by: Maxim Levitsky +Reviewed-by: David Matlack +Message-Id: <20211123004311.2954158-4-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/lapic.c | 2 +- + arch/x86/kvm/svm/svm.c | 1 - + arch/x86/kvm/x86.c | 18 +++++++++--------- + 3 files changed, 10 insertions(+), 11 deletions(-) + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -707,7 +707,7 @@ static void pv_eoi_clr_pending(struct kv + static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) + { + int highest_irr; +- if (apic->vcpu->arch.apicv_active) ++ if (kvm_x86_ops.sync_pir_to_irr) + highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu); + else + highest_irr = apic_find_highest_irr(apic); +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -4592,7 +4592,6 @@ static struct kvm_x86_ops svm_x86_ops __ + .load_eoi_exitmap = svm_load_eoi_exitmap, + .hwapic_irr_update = svm_hwapic_irr_update, + .hwapic_isr_update = svm_hwapic_isr_update, +- .sync_pir_to_irr = kvm_lapic_find_highest_irr, + .apicv_post_state_restore = avic_post_state_restore, + + .set_tss_addr = svm_set_tss_addr, +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4405,8 +4405,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu * + static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s) + { +- if (vcpu->arch.apicv_active) +- static_call(kvm_x86_sync_pir_to_irr)(vcpu); ++ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); + + return kvm_apic_get_state(vcpu, s); + } +@@ -9433,8 +9432,7 @@ static void vcpu_scan_ioapic(struct kvm_ + if (irqchip_split(vcpu->kvm)) + kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); + else { +- if (vcpu->arch.apicv_active) +- static_call(kvm_x86_sync_pir_to_irr)(vcpu); ++ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); + if (ioapic_in_kernel(vcpu->kvm)) + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); + } +@@ -9704,10 +9702,12 @@ static int vcpu_enter_guest(struct kvm_v + + /* + * This handles the case where a posted interrupt was +- * notified with kvm_vcpu_kick. ++ * notified with kvm_vcpu_kick. Assigned devices can ++ * use the POSTED_INTR_VECTOR even if APICv is disabled, ++ * so do it even if APICv is disabled on this vCPU. + */ +- if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active) +- static_call(kvm_x86_sync_pir_to_irr)(vcpu); ++ if (kvm_lapic_enabled(vcpu)) ++ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); + + if (kvm_vcpu_exit_request(vcpu)) { + vcpu->mode = OUTSIDE_GUEST_MODE; +@@ -9743,8 +9743,8 @@ static int vcpu_enter_guest(struct kvm_v + if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) + break; + +- if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active) +- static_call(kvm_x86_sync_pir_to_irr)(vcpu); ++ if (kvm_lapic_enabled(vcpu)) ++ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); + + if (unlikely(kvm_vcpu_exit_request(vcpu))) { + exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; diff --git a/queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch b/queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch new file mode 100644 index 00000000000..9df9a0bb02e --- /dev/null +++ b/queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch @@ -0,0 +1,33 @@ +From 78311a514099932cd8434d5d2194aa94e56ab67c Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 17 Nov 2021 07:35:44 -0500 +Subject: KVM: x86: ignore APICv if LAPIC is not enabled + +From: Paolo Bonzini + +commit 78311a514099932cd8434d5d2194aa94e56ab67c upstream. + +Synchronize the two calls to kvm_x86_sync_pir_to_irr. The one +in the reenter-guest fast path invoked the callback unconditionally +even if LAPIC is present but disabled. In this case, there are +no interrupts to deliver, and therefore posted interrupts can +be ignored. + +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9723,7 +9723,7 @@ static int vcpu_enter_guest(struct kvm_v + if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) + break; + +- if (vcpu->arch.apicv_active) ++ if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active) + static_call(kvm_x86_sync_pir_to_irr)(vcpu); + + if (unlikely(kvm_vcpu_exit_request(vcpu))) { diff --git a/queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch b/queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch new file mode 100644 index 00000000000..3b3445a2317 --- /dev/null +++ b/queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch @@ -0,0 +1,61 @@ +From 574c3c55e969096cea770eda3375ff35ccf91702 Mon Sep 17 00:00:00 2001 +From: Ben Gardon +Date: Mon, 15 Nov 2021 13:17:04 -0800 +Subject: KVM: x86/mmu: Fix TLB flush range when handling disconnected pt + +From: Ben Gardon + +commit 574c3c55e969096cea770eda3375ff35ccf91702 upstream. + +When recursively clearing out disconnected pts, the range based TLB +flush in handle_removed_tdp_mmu_page uses the wrong starting GFN, +resulting in the flush mostly missing the affected range. Fix this by +using base_gfn for the flush. + +In response to feedback from David Matlack on the RFC version of this +patch, also move a few definitions into the for loop in the function to +prevent unintended references to them in the future. + +Fixes: a066e61f13cf ("KVM: x86/mmu: Factor out handling of removed page tables") +CC: stable@vger.kernel.org +Signed-off-by: Ben Gardon +Message-Id: <20211115211704.2621644-1-bgardon@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/tdp_mmu.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/mmu/tdp_mmu.c ++++ b/arch/x86/kvm/mmu/tdp_mmu.c +@@ -316,9 +316,6 @@ static void handle_removed_tdp_mmu_page( + struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt)); + int level = sp->role.level; + gfn_t base_gfn = sp->gfn; +- u64 old_child_spte; +- u64 *sptep; +- gfn_t gfn; + int i; + + trace_kvm_mmu_prepare_zap_page(sp); +@@ -326,8 +323,9 @@ static void handle_removed_tdp_mmu_page( + tdp_mmu_unlink_page(kvm, sp, shared); + + for (i = 0; i < PT64_ENT_PER_PAGE; i++) { +- sptep = rcu_dereference(pt) + i; +- gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level); ++ u64 *sptep = rcu_dereference(pt) + i; ++ gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level); ++ u64 old_child_spte; + + if (shared) { + /* +@@ -373,7 +371,7 @@ static void handle_removed_tdp_mmu_page( + shared); + } + +- kvm_flush_remote_tlbs_with_address(kvm, gfn, ++ kvm_flush_remote_tlbs_with_address(kvm, base_gfn, + KVM_PAGES_PER_HPAGE(level + 1)); + + call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); diff --git a/queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch b/queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch new file mode 100644 index 00000000000..d9f68d355e6 --- /dev/null +++ b/queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch @@ -0,0 +1,90 @@ +From 53b7ca1a359389276c76fbc9e1009d8626a17e40 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 22 Nov 2021 19:43:11 -0500 +Subject: KVM: x86: Use a stable condition around all VT-d PI paths + +From: Paolo Bonzini + +commit 53b7ca1a359389276c76fbc9e1009d8626a17e40 upstream. + +Currently, checks for whether VT-d PI can be used refer to the current +status of the feature in the current vCPU; or they more or less pick +vCPU 0 in case a specific vCPU is not available. + +However, these checks do not attempt to synchronize with changes to +the IRTE. In particular, there is no path that updates the IRTE when +APICv is re-activated on vCPU 0; and there is no path to wakeup a CPU +that has APICv disabled, if the wakeup occurs because of an IRTE +that points to a posted interrupt. + +To fix this, always go through the VT-d PI path as long as there are +assigned devices and APICv is available on both the host and the VM side. +Since the relevant condition was copied over three times, take the hint +and factor it into a separate function. + +Suggested-by: Sean Christopherson +Cc: stable@vger.kernel.org +Reviewed-by: Sean Christopherson +Reviewed-by: Maxim Levitsky +Reviewed-by: David Matlack +Message-Id: <20211123004311.2954158-5-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/posted_intr.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +--- a/arch/x86/kvm/vmx/posted_intr.c ++++ b/arch/x86/kvm/vmx/posted_intr.c +@@ -5,6 +5,7 @@ + #include + + #include "lapic.h" ++#include "irq.h" + #include "posted_intr.h" + #include "trace.h" + #include "vmx.h" +@@ -77,13 +78,18 @@ after_clear_sn: + pi_set_on(pi_desc); + } + ++static bool vmx_can_use_vtd_pi(struct kvm *kvm) ++{ ++ return irqchip_in_kernel(kvm) && enable_apicv && ++ kvm_arch_has_assigned_device(kvm) && ++ irq_remapping_cap(IRQ_POSTING_CAP); ++} ++ + void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) + { + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + +- if (!kvm_arch_has_assigned_device(vcpu->kvm) || +- !irq_remapping_cap(IRQ_POSTING_CAP) || +- !kvm_vcpu_apicv_active(vcpu)) ++ if (!vmx_can_use_vtd_pi(vcpu->kvm)) + return; + + /* Set SN when the vCPU is preempted */ +@@ -141,9 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu) + struct pi_desc old, new; + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + +- if (!kvm_arch_has_assigned_device(vcpu->kvm) || +- !irq_remapping_cap(IRQ_POSTING_CAP) || +- !kvm_vcpu_apicv_active(vcpu)) ++ if (!vmx_can_use_vtd_pi(vcpu->kvm)) + return 0; + + WARN_ON(irqs_disabled()); +@@ -270,9 +274,7 @@ int pi_update_irte(struct kvm *kvm, unsi + struct vcpu_data vcpu_info; + int idx, ret = 0; + +- if (!kvm_arch_has_assigned_device(kvm) || +- !irq_remapping_cap(IRQ_POSTING_CAP) || +- !kvm_vcpu_apicv_active(kvm->vcpus[0])) ++ if (!vmx_can_use_vtd_pi(kvm)) + return 0; + + idx = srcu_read_lock(&kvm->irq_srcu); diff --git a/queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch b/queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch new file mode 100644 index 00000000000..7f6c6888eb0 --- /dev/null +++ b/queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch @@ -0,0 +1,36 @@ +From 05b29633c7a956d5675f5fbba70db0d26aa5e73e Mon Sep 17 00:00:00 2001 +From: Lai Jiangshan +Date: Wed, 24 Nov 2021 20:20:46 +0800 +Subject: KVM: X86: Use vcpu->arch.walk_mmu for kvm_mmu_invlpg() + +From: Lai Jiangshan + +commit 05b29633c7a956d5675f5fbba70db0d26aa5e73e upstream. + +INVLPG operates on guest virtual address, which are represented by +vcpu->arch.walk_mmu. In nested virtualization scenarios, +kvm_mmu_invlpg() was using the wrong MMU structure; if L2's invlpg were +emulated by L0 (in practice, it hardly happen) when nested two-dimensional +paging is enabled, the call to ->tlb_flush_gva() would be skipped and +the hardware TLB entry would not be invalidated. + +Signed-off-by: Lai Jiangshan +Message-Id: <20211124122055.64424-5-jiangshanlai@gmail.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/mmu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -5369,7 +5369,7 @@ void kvm_mmu_invalidate_gva(struct kvm_v + + void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) + { +- kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE); ++ kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE); + ++vcpu->stat.invlpg; + } + EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); diff --git a/queue-5.15/series b/queue-5.15/series index acce4ac1dd2..db9f26df691 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -69,3 +69,16 @@ drm-amd-display-allow-dsc-on-supported-mst-branch-devices.patch drm-i915-dp-perform-30ms-delay-after-source-oui-write.patch kvm-fix-avic_set_running-for-preemptable-kernels.patch kvm-disallow-user-memslot-with-size-that-exceeds-unsigned-long.patch +kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch +kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch +kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch +kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch +kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch +kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch +kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch +kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch +kvm-mmu-shadow-nested-paging-does-not-have-pku.patch +kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch +kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch +kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch +tracing-histograms-string-compares-should-not-care-about-signed-values.patch diff --git a/queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch b/queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch new file mode 100644 index 00000000000..db0fba5cb34 --- /dev/null +++ b/queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch @@ -0,0 +1,41 @@ +From 450fec13d9170127678f991698ac1a5b05c02e2f Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Tue, 30 Nov 2021 12:31:23 -0500 +Subject: tracing/histograms: String compares should not care about signed values + +From: Steven Rostedt (VMware) + +commit 450fec13d9170127678f991698ac1a5b05c02e2f upstream. + +When comparing two strings for the "onmatch" histogram trigger, fields +that are strings use string comparisons, which do not care about being +signed or not. + +Do not fail to match two string fields if one is unsigned char array and +the other is a signed char array. + +Link: https://lore.kernel.org/all/20211129123043.5cfd687a@gandalf.local.home/ + +Cc: stable@vgerk.kernel.org +Cc: Tom Zanussi +Cc: Yafang Shao +Fixes: b05e89ae7cf3b ("tracing: Accept different type for synthetic event fields") +Reviewed-by: Masami Hiramatsu +Reported-by: Sven Schnelle +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events_hist.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -3419,7 +3419,7 @@ static int check_synth_field(struct synt + + if (strcmp(field->type, hist_field->type) != 0) { + if (field->size != hist_field->size || +- field->is_signed != hist_field->is_signed) ++ (!field->is_string && field->is_signed != hist_field->is_signed)) + return -EINVAL; + } +