]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 5 Dec 2021 12:44:11 +0000 (13:44 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 5 Dec 2021 12:44:11 +0000 (13:44 +0100)
added patches:
kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch
kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch
kvm-mmu-shadow-nested-paging-does-not-have-pku.patch
kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch
kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch
kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch
kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch
kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch
kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch
kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch
kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch
kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch
tracing-histograms-string-compares-should-not-care-about-signed-values.patch

14 files changed:
queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch [new file with mode: 0644]
queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch [new file with mode: 0644]
queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch [new file with mode: 0644]
queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch [new file with mode: 0644]
queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch [new file with mode: 0644]
queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch [new file with mode: 0644]
queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch [new file with mode: 0644]
queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch [new file with mode: 0644]
queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch [new file with mode: 0644]
queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch [new file with mode: 0644]
queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch [new file with mode: 0644]
queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch [new file with mode: 0644]

diff --git a/queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch b/queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch
new file mode 100644 (file)
index 0000000..a723fde
--- /dev/null
@@ -0,0 +1,54 @@
+From 1f80d15020d7f130194821feb1432b67648c632d Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Thu, 25 Nov 2021 15:20:14 +0000
+Subject: KVM: arm64: Avoid setting the upper 32 bits of TCR_EL2 and CPTR_EL2 to 1
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit 1f80d15020d7f130194821feb1432b67648c632d upstream.
+
+Having a signed (1 << 31) constant for TCR_EL2_RES1 and CPTR_EL2_TCPAC
+causes the upper 32-bit to be set to 1 when assigning them to a 64-bit
+variable. Bit 32 in TCR_EL2 is no longer RES0 in ARMv8.7: with FEAT_LPA2
+it changes the meaning of bits 49:48 and 9:8 in the stage 1 EL2 page
+table entries. As a result of the sign-extension, a non-VHE kernel can
+no longer boot on a model with ARMv8.7 enabled.
+
+CPTR_EL2 still has the top 32 bits RES0 but we should preempt any future
+problems
+
+Make these top bit constants unsigned as per commit df655b75c43f
+("arm64: KVM: Avoid setting the upper 32 bits of VTCR_EL2 to 1").
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Reported-by: Chris January <Chris.January@arm.com>
+Cc: <stable@vger.kernel.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20211125152014.2806582-1-catalin.marinas@arm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/kvm_arm.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/include/asm/kvm_arm.h
++++ b/arch/arm64/include/asm/kvm_arm.h
+@@ -91,7 +91,7 @@
+ #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+ /* TCR_EL2 Registers bits */
+-#define TCR_EL2_RES1          ((1 << 31) | (1 << 23))
++#define TCR_EL2_RES1          ((1U << 31) | (1 << 23))
+ #define TCR_EL2_TBI           (1 << 20)
+ #define TCR_EL2_PS_SHIFT      16
+ #define TCR_EL2_PS_MASK               (7 << TCR_EL2_PS_SHIFT)
+@@ -276,7 +276,7 @@
+ #define CPTR_EL2_TFP_SHIFT 10
+ /* Hyp Coprocessor Trap Register */
+-#define CPTR_EL2_TCPAC        (1 << 31)
++#define CPTR_EL2_TCPAC        (1U << 31)
+ #define CPTR_EL2_TAM  (1 << 30)
+ #define CPTR_EL2_TTA  (1 << 20)
+ #define CPTR_EL2_TFP  (1 << CPTR_EL2_TFP_SHIFT)
diff --git a/queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch b/queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch
new file mode 100644 (file)
index 0000000..575ae8f
--- /dev/null
@@ -0,0 +1,148 @@
+From bda44d844758c70c8dc1478e6fc9c25efa90c5a7 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 4 Nov 2021 00:25:02 +0000
+Subject: KVM: Ensure local memslot copies operate on up-to-date arch-specific data
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit bda44d844758c70c8dc1478e6fc9c25efa90c5a7 upstream.
+
+When modifying memslots, snapshot the "old" memslot and copy it to the
+"new" memslot's arch data after (re)acquiring slots_arch_lock.  x86 can
+change a memslot's arch data while memslot updates are in-progress so
+long as it holds slots_arch_lock, thus snapshotting a memslot without
+holding the lock can result in the consumption of stale data.
+
+Fixes: b10a038e84d1 ("KVM: mmu: Add slots_arch_lock for memslot arch fields")
+Cc: stable@vger.kernel.org
+Cc: Ben Gardon <bgardon@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211104002531.1176691-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |   47 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 31 insertions(+), 16 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1523,11 +1523,10 @@ static struct kvm_memslots *kvm_dup_mems
+ static int kvm_set_memslot(struct kvm *kvm,
+                          const struct kvm_userspace_memory_region *mem,
+-                         struct kvm_memory_slot *old,
+                          struct kvm_memory_slot *new, int as_id,
+                          enum kvm_mr_change change)
+ {
+-      struct kvm_memory_slot *slot;
++      struct kvm_memory_slot *slot, old;
+       struct kvm_memslots *slots;
+       int r;
+@@ -1558,7 +1557,7 @@ static int kvm_set_memslot(struct kvm *k
+                * Note, the INVALID flag needs to be in the appropriate entry
+                * in the freshly allocated memslots, not in @old or @new.
+                */
+-              slot = id_to_memslot(slots, old->id);
++              slot = id_to_memslot(slots, new->id);
+               slot->flags |= KVM_MEMSLOT_INVALID;
+               /*
+@@ -1589,6 +1588,26 @@ static int kvm_set_memslot(struct kvm *k
+               kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
+       }
++      /*
++       * Make a full copy of the old memslot, the pointer will become stale
++       * when the memslots are re-sorted by update_memslots(), and the old
++       * memslot needs to be referenced after calling update_memslots(), e.g.
++       * to free its resources and for arch specific behavior.  This needs to
++       * happen *after* (re)acquiring slots_arch_lock.
++       */
++      slot = id_to_memslot(slots, new->id);
++      if (slot) {
++              old = *slot;
++      } else {
++              WARN_ON_ONCE(change != KVM_MR_CREATE);
++              memset(&old, 0, sizeof(old));
++              old.id = new->id;
++              old.as_id = as_id;
++      }
++
++      /* Copy the arch-specific data, again after (re)acquiring slots_arch_lock. */
++      memcpy(&new->arch, &old.arch, sizeof(old.arch));
++
+       r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
+       if (r)
+               goto out_slots;
+@@ -1596,14 +1615,18 @@ static int kvm_set_memslot(struct kvm *k
+       update_memslots(slots, new, change);
+       slots = install_new_memslots(kvm, as_id, slots);
+-      kvm_arch_commit_memory_region(kvm, mem, old, new, change);
++      kvm_arch_commit_memory_region(kvm, mem, &old, new, change);
++
++      /* Free the old memslot's metadata.  Note, this is the full copy!!! */
++      if (change == KVM_MR_DELETE)
++              kvm_free_memslot(kvm, &old);
+       kvfree(slots);
+       return 0;
+ out_slots:
+       if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+-              slot = id_to_memslot(slots, old->id);
++              slot = id_to_memslot(slots, new->id);
+               slot->flags &= ~KVM_MEMSLOT_INVALID;
+               slots = install_new_memslots(kvm, as_id, slots);
+       } else {
+@@ -1618,7 +1641,6 @@ static int kvm_delete_memslot(struct kvm
+                             struct kvm_memory_slot *old, int as_id)
+ {
+       struct kvm_memory_slot new;
+-      int r;
+       if (!old->npages)
+               return -EINVAL;
+@@ -1631,12 +1653,7 @@ static int kvm_delete_memslot(struct kvm
+        */
+       new.as_id = as_id;
+-      r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE);
+-      if (r)
+-              return r;
+-
+-      kvm_free_memslot(kvm, old);
+-      return 0;
++      return kvm_set_memslot(kvm, mem, &new, as_id, KVM_MR_DELETE);
+ }
+ /*
+@@ -1711,7 +1728,6 @@ int __kvm_set_memory_region(struct kvm *
+       if (!old.npages) {
+               change = KVM_MR_CREATE;
+               new.dirty_bitmap = NULL;
+-              memset(&new.arch, 0, sizeof(new.arch));
+       } else { /* Modify an existing slot. */
+               if ((new.userspace_addr != old.userspace_addr) ||
+                   (new.npages != old.npages) ||
+@@ -1725,9 +1741,8 @@ int __kvm_set_memory_region(struct kvm *
+               else /* Nothing to change. */
+                       return 0;
+-              /* Copy dirty_bitmap and arch from the current memslot. */
++              /* Copy dirty_bitmap from the current memslot. */
+               new.dirty_bitmap = old.dirty_bitmap;
+-              memcpy(&new.arch, &old.arch, sizeof(new.arch));
+       }
+       if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+@@ -1753,7 +1768,7 @@ int __kvm_set_memory_region(struct kvm *
+                       bitmap_set(new.dirty_bitmap, 0, new.npages);
+       }
+-      r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
++      r = kvm_set_memslot(kvm, mem, &new, as_id, change);
+       if (r)
+               goto out_bitmap;
diff --git a/queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch b/queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch
new file mode 100644 (file)
index 0000000..4118143
--- /dev/null
@@ -0,0 +1,39 @@
+From 28f091bc2f8c23b7eac2402956b692621be7f9f4 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 22 Nov 2021 13:01:37 -0500
+Subject: KVM: MMU: shadow nested paging does not have PKU
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 28f091bc2f8c23b7eac2402956b692621be7f9f4 upstream.
+
+Initialize the mask for PKU permissions as if CR4.PKE=0, avoiding
+incorrect interpretations of the nested hypervisor's page tables.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -4852,7 +4852,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_
+       struct kvm_mmu *context = &vcpu->arch.guest_mmu;
+       struct kvm_mmu_role_regs regs = {
+               .cr0 = cr0,
+-              .cr4 = cr4,
++              .cr4 = cr4 & ~X86_CR4_PKE,
+               .efer = efer,
+       };
+       union kvm_mmu_role new_role;
+@@ -4916,7 +4916,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_
+       context->direct_map = false;
+       update_permission_bitmask(context, true);
+-      update_pkru_bitmask(context);
++      context->pkru_mask = 0;
+       reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+       reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
+ }
diff --git a/queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch b/queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch
new file mode 100644 (file)
index 0000000..eaf304d
--- /dev/null
@@ -0,0 +1,123 @@
+From 40e5f9080472b614eeedcc5ba678289cd98d70df Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 25 Nov 2021 01:49:43 +0000
+Subject: KVM: nVMX: Abide to KVM_REQ_TLB_FLUSH_GUEST request on nested vmentry/vmexit
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 40e5f9080472b614eeedcc5ba678289cd98d70df upstream.
+
+Like KVM_REQ_TLB_FLUSH_CURRENT, the GUEST variant needs to be serviced at
+nested transitions, as KVM doesn't track requests for L1 vs L2.  E.g. if
+there's a pending flush when a nested VM-Exit occurs, then the flush was
+requested in the context of L2 and needs to be handled before switching
+to L1, otherwise the flush for L2 would effectiely be lost.
+
+Opportunistically add a helper to handle CURRENT and GUEST as a pair, the
+logic for when they need to be serviced is identical as both requests are
+tied to L1 vs. L2, the only difference is the scope of the flush.
+
+Reported-by: Lai Jiangshan <jiangshanlai+lkml@gmail.com>
+Fixes: 07ffaf343e34 ("KVM: nVMX: Sync all PGDs on nested transition with shadow paging")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211125014944.536398-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |    8 +++-----
+ arch/x86/kvm/x86.c        |   28 ++++++++++++++++++++++++----
+ arch/x86/kvm/x86.h        |    7 +------
+ 3 files changed, 28 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -3355,8 +3355,7 @@ enum nvmx_vmentry_status nested_vmx_ente
+       };
+       u32 failed_index;
+-      if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+-              kvm_vcpu_flush_tlb_current(vcpu);
++      kvm_service_local_tlb_flush_requests(vcpu);
+       evaluate_pending_interrupts = exec_controls_get(vmx) &
+               (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
+@@ -4513,9 +4512,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *
+               (void)nested_get_evmcs_page(vcpu);
+       }
+-      /* Service the TLB flush request for L2 before switching to L1. */
+-      if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+-              kvm_vcpu_flush_tlb_current(vcpu);
++      /* Service pending TLB flush requests for L2 before switching to L1. */
++      kvm_service_local_tlb_flush_requests(vcpu);
+       /*
+        * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3193,6 +3193,29 @@ static void kvm_vcpu_flush_tlb_guest(str
+       static_call(kvm_x86_tlb_flush_guest)(vcpu);
+ }
++
++static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
++{
++      ++vcpu->stat.tlb_flush;
++      static_call(kvm_x86_tlb_flush_current)(vcpu);
++}
++
++/*
++ * Service "local" TLB flush requests, which are specific to the current MMU
++ * context.  In addition to the generic event handling in vcpu_enter_guest(),
++ * TLB flushes that are targeted at an MMU context also need to be serviced
++ * prior before nested VM-Enter/VM-Exit.
++ */
++void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
++{
++      if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
++              kvm_vcpu_flush_tlb_current(vcpu);
++
++      if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
++              kvm_vcpu_flush_tlb_guest(vcpu);
++}
++EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
++
+ static void record_steal_time(struct kvm_vcpu *vcpu)
+ {
+       struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+@@ -9530,10 +9553,7 @@ static int vcpu_enter_guest(struct kvm_v
+                       /* Flushing all ASIDs flushes the current ASID... */
+                       kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+               }
+-              if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+-                      kvm_vcpu_flush_tlb_current(vcpu);
+-              if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
+-                      kvm_vcpu_flush_tlb_guest(vcpu);
++              kvm_service_local_tlb_flush_requests(vcpu);
+               if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
+                       vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_
+ #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
++void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
+ int kvm_check_nested_events(struct kvm_vcpu *vcpu);
+ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
+@@ -185,12 +186,6 @@ static inline bool mmu_is_nested(struct
+       return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
+ }
+-static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
+-{
+-      ++vcpu->stat.tlb_flush;
+-      static_call(kvm_x86_tlb_flush_current)(vcpu);
+-}
+-
+ static inline int is_pae(struct kvm_vcpu *vcpu)
+ {
+       return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
diff --git a/queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch b/queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch
new file mode 100644 (file)
index 0000000..05ffbd9
--- /dev/null
@@ -0,0 +1,82 @@
+From 712494de96f35f3e146b36b752c2afe0fdc0f0cc Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 25 Nov 2021 01:49:44 +0000
+Subject: KVM: nVMX: Emulate guest TLB flush on nested VM-Enter with new vpid12
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 712494de96f35f3e146b36b752c2afe0fdc0f0cc upstream.
+
+Fully emulate a guest TLB flush on nested VM-Enter which changes vpid12,
+i.e. L2's VPID, instead of simply doing INVVPID to flush real hardware's
+TLB entries for vpid02.  From L1's perspective, changing L2's VPID is
+effectively a TLB flush unless "hardware" has previously cached entries
+for the new vpid12.  Because KVM tracks only a single vpid12, KVM doesn't
+know if the new vpid12 has been used in the past and so must treat it as
+a brand new, never been used VPID, i.e. must assume that the new vpid12
+represents a TLB flush from L1's perspective.
+
+For example, if L1 and L2 share a CR3, the first VM-Enter to L2 (with a
+VPID) is effectively a TLB flush as hardware/KVM has never seen vpid12
+and thus can't have cached entries in the TLB for vpid12.
+
+Reported-by: Lai Jiangshan <jiangshanlai+lkml@gmail.com>
+Fixes: 5c614b3583e7 ("KVM: nVMX: nested VPID emulation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211125014944.536398-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |   37 +++++++++++++++++--------------------
+ 1 file changed, 17 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -1180,29 +1180,26 @@ static void nested_vmx_transition_tlb_fl
+       WARN_ON(!enable_vpid);
+       /*
+-       * If VPID is enabled and used by vmc12, but L2 does not have a unique
+-       * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
+-       * a VPID for L2, flush the current context as the effective ASID is
+-       * common to both L1 and L2.
+-       *
+-       * Defer the flush so that it runs after vmcs02.EPTP has been set by
+-       * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
+-       * redundant flushes further down the nested pipeline.
+-       *
+-       * If a TLB flush isn't required due to any of the above, and vpid12 is
+-       * changing then the new "virtual" VPID (vpid12) will reuse the same
+-       * "real" VPID (vpid02), and so needs to be flushed.  There's no direct
+-       * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
+-       * all nested vCPUs.  Remember, a flush on VM-Enter does not invalidate
+-       * guest-physical mappings, so there is no need to sync the nEPT MMU.
++       * VPID is enabled and in use by vmcs12.  If vpid12 is changing, then
++       * emulate a guest TLB flush as KVM does not track vpid12 history nor
++       * is the VPID incorporated into the MMU context.  I.e. KVM must assume
++       * that the new vpid12 has never been used and thus represents a new
++       * guest ASID that cannot have entries in the TLB.
+        */
+-      if (!nested_has_guest_tlb_tag(vcpu)) {
+-              kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+-      } else if (is_vmenter &&
+-                 vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
++      if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+               vmx->nested.last_vpid = vmcs12->virtual_processor_id;
+-              vpid_sync_context(nested_get_vpid02(vcpu));
++              kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
++              return;
+       }
++
++      /*
++       * If VPID is enabled, used by vmc12, and vpid12 is not changing but
++       * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
++       * KVM was unable to allocate a VPID for L2, flush the current context
++       * as the effective ASID is common to both L1 and L2.
++       */
++      if (!nested_has_guest_tlb_tag(vcpu))
++              kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ }
+ static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
diff --git a/queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch b/queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch
new file mode 100644 (file)
index 0000000..a6ea76a
--- /dev/null
@@ -0,0 +1,81 @@
+From 2b4a5a5d56881ece3c66b9a9a8943a6f41bd7349 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 25 Nov 2021 01:49:43 +0000
+Subject: KVM: nVMX: Flush current VPID (L1 vs. L2) for KVM_REQ_TLB_FLUSH_GUEST
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 2b4a5a5d56881ece3c66b9a9a8943a6f41bd7349 upstream.
+
+Flush the current VPID when handling KVM_REQ_TLB_FLUSH_GUEST instead of
+always flushing vpid01.  Any TLB flush that is triggered when L2 is
+active is scoped to L2's VPID (if it has one), e.g. if L2 toggles CR4.PGE
+and L1 doesn't intercept PGE writes, then KVM's emulation of the TLB
+flush needs to be applied to L2's VPID.
+
+Reported-by: Lai Jiangshan <jiangshanlai+lkml@gmail.com>
+Fixes: 07ffaf343e34 ("KVM: nVMX: Sync all PGDs on nested transition with shadow paging")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211125014944.536398-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |   23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -2916,6 +2916,13 @@ static void vmx_flush_tlb_all(struct kvm
+       }
+ }
++static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
++{
++      if (is_guest_mode(vcpu))
++              return nested_get_vpid02(vcpu);
++      return to_vmx(vcpu)->vpid;
++}
++
+ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_mmu *mmu = vcpu->arch.mmu;
+@@ -2928,31 +2935,29 @@ static void vmx_flush_tlb_current(struct
+       if (enable_ept)
+               ept_sync_context(construct_eptp(vcpu, root_hpa,
+                                               mmu->shadow_root_level));
+-      else if (!is_guest_mode(vcpu))
+-              vpid_sync_context(to_vmx(vcpu)->vpid);
+       else
+-              vpid_sync_context(nested_get_vpid02(vcpu));
++              vpid_sync_context(vmx_get_current_vpid(vcpu));
+ }
+ static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+ {
+       /*
+-       * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
++       * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
+        * vmx_flush_tlb_guest() for an explanation of why this is ok.
+        */
+-      vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
++      vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
+ }
+ static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
+ {
+       /*
+-       * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
+-       * or a vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit
+-       * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
++       * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
++       * vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit are
++       * required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+        * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
+        * i.e. no explicit INVVPID is necessary.
+        */
+-      vpid_sync_context(to_vmx(vcpu)->vpid);
++      vpid_sync_context(vmx_get_current_vpid(vcpu));
+ }
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
diff --git a/queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch b/queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch
new file mode 100644 (file)
index 0000000..f182a08
--- /dev/null
@@ -0,0 +1,89 @@
+From 7e1901f6c86c896acff6609e0176f93f756d8b2a Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 22 Nov 2021 19:43:09 -0500
+Subject: KVM: VMX: prepare sync_pir_to_irr for running with APICv disabled
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 7e1901f6c86c896acff6609e0176f93f756d8b2a upstream.
+
+If APICv is disabled for this vCPU, assigned devices may still attempt to
+post interrupts.  In that case, we need to cancel the vmentry and deliver
+the interrupt with KVM_REQ_EVENT.  Extend the existing code that handles
+injection of L1 interrupts into L2 to cover this case as well.
+
+vmx_hwapic_irr_update is only called when APICv is active so it would be
+confusing to add a check for vcpu->arch.apicv_active in there.  Instead,
+just use vmx_set_rvi directly in vmx_sync_pir_to_irr.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211123004311.2954158-3-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |   39 +++++++++++++++++++++++++--------------
+ 1 file changed, 25 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6242,9 +6242,9 @@ static int vmx_sync_pir_to_irr(struct kv
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int max_irr;
+-      bool max_irr_updated;
++      bool got_posted_interrupt;
+-      if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
++      if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
+               return -EIO;
+       if (pi_test_on(&vmx->pi_desc)) {
+@@ -6254,22 +6254,33 @@ static int vmx_sync_pir_to_irr(struct kv
+                * But on x86 this is just a compiler barrier anyway.
+                */
+               smp_mb__after_atomic();
+-              max_irr_updated =
++              got_posted_interrupt =
+                       kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
+-
+-              /*
+-               * If we are running L2 and L1 has a new pending interrupt
+-               * which can be injected, this may cause a vmexit or it may
+-               * be injected into L2.  Either way, this interrupt will be
+-               * processed via KVM_REQ_EVENT, not RVI, because we do not use
+-               * virtual interrupt delivery to inject L1 interrupts into L2.
+-               */
+-              if (is_guest_mode(vcpu) && max_irr_updated)
+-                      kvm_make_request(KVM_REQ_EVENT, vcpu);
+       } else {
+               max_irr = kvm_lapic_find_highest_irr(vcpu);
++              got_posted_interrupt = false;
+       }
+-      vmx_hwapic_irr_update(vcpu, max_irr);
++
++      /*
++       * Newly recognized interrupts are injected via either virtual interrupt
++       * delivery (RVI) or KVM_REQ_EVENT.  Virtual interrupt delivery is
++       * disabled in two cases:
++       *
++       * 1) If L2 is running and the vCPU has a new pending interrupt.  If L1
++       * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
++       * VM-Exit to L1.  If L1 doesn't want to exit, the interrupt is injected
++       * into L2, but KVM doesn't use virtual interrupt delivery to inject
++       * interrupts into L2, and so KVM_REQ_EVENT is again needed.
++       *
++       * 2) If APICv is disabled for this vCPU, assigned devices may still
++       * attempt to post interrupts.  The posted interrupt vector will cause
++       * a VM-Exit and the subsequent entry will call sync_pir_to_irr.
++       */
++      if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
++              vmx_set_rvi(max_irr);
++      else if (got_posted_interrupt)
++              kvm_make_request(KVM_REQ_EVENT, vcpu);
++
+       return max_irr;
+ }
diff --git a/queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch b/queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch
new file mode 100644 (file)
index 0000000..c2f1625
--- /dev/null
@@ -0,0 +1,110 @@
+From 37c4dbf337c5c2cdb24365ffae6ed70ac1e74d7a Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 22 Nov 2021 19:43:10 -0500
+Subject: KVM: x86: check PIR even for vCPUs with disabled APICv
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 37c4dbf337c5c2cdb24365ffae6ed70ac1e74d7a upstream.
+
+The IRTE for an assigned device can trigger a POSTED_INTR_VECTOR even
+if APICv is disabled on the vCPU that receives it.  In that case, the
+interrupt will just cause a vmexit and leave the ON bit set together
+with the PIR bit corresponding to the interrupt.
+
+Right now, the interrupt would not be delivered until APICv is re-enabled.
+However, fixing this is just a matter of always doing the PIR->IRR
+synchronization, even if the vCPU has temporarily disabled APICv.
+
+This is not a problem for performance, or if anything it is an
+improvement.  First, in the common case where vcpu->arch.apicv_active is
+true, one fewer check has to be performed.  Second, static_call_cond will
+elide the function call if APICv is not present or disabled.  Finally,
+in the case for AMD hardware we can remove the sync_pir_to_irr callback:
+it is only needed for apic_has_interrupt_for_ppr, and that function
+already has a fallback for !APICv.
+
+Cc: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Message-Id: <20211123004311.2954158-4-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c   |    2 +-
+ arch/x86/kvm/svm/svm.c |    1 -
+ arch/x86/kvm/x86.c     |   18 +++++++++---------
+ 3 files changed, 10 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -707,7 +707,7 @@ static void pv_eoi_clr_pending(struct kv
+ static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
+ {
+       int highest_irr;
+-      if (apic->vcpu->arch.apicv_active)
++      if (kvm_x86_ops.sync_pir_to_irr)
+               highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
+       else
+               highest_irr = apic_find_highest_irr(apic);
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4592,7 +4592,6 @@ static struct kvm_x86_ops svm_x86_ops __
+       .load_eoi_exitmap = svm_load_eoi_exitmap,
+       .hwapic_irr_update = svm_hwapic_irr_update,
+       .hwapic_isr_update = svm_hwapic_isr_update,
+-      .sync_pir_to_irr = kvm_lapic_find_highest_irr,
+       .apicv_post_state_restore = avic_post_state_restore,
+       .set_tss_addr = svm_set_tss_addr,
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4405,8 +4405,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *
+ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
+                                   struct kvm_lapic_state *s)
+ {
+-      if (vcpu->arch.apicv_active)
+-              static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++      static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+       return kvm_apic_get_state(vcpu, s);
+ }
+@@ -9433,8 +9432,7 @@ static void vcpu_scan_ioapic(struct kvm_
+       if (irqchip_split(vcpu->kvm))
+               kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
+       else {
+-              if (vcpu->arch.apicv_active)
+-                      static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++              static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+               if (ioapic_in_kernel(vcpu->kvm))
+                       kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+       }
+@@ -9704,10 +9702,12 @@ static int vcpu_enter_guest(struct kvm_v
+       /*
+        * This handles the case where a posted interrupt was
+-       * notified with kvm_vcpu_kick.
++       * notified with kvm_vcpu_kick.  Assigned devices can
++       * use the POSTED_INTR_VECTOR even if APICv is disabled,
++       * so do it even if APICv is disabled on this vCPU.
+        */
+-      if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+-              static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++      if (kvm_lapic_enabled(vcpu))
++              static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+       if (kvm_vcpu_exit_request(vcpu)) {
+               vcpu->mode = OUTSIDE_GUEST_MODE;
+@@ -9743,8 +9743,8 @@ static int vcpu_enter_guest(struct kvm_v
+               if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
+                       break;
+-              if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+-                      static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++              if (kvm_lapic_enabled(vcpu))
++                      static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+               if (unlikely(kvm_vcpu_exit_request(vcpu))) {
+                       exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
diff --git a/queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch b/queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch
new file mode 100644 (file)
index 0000000..9df9a0b
--- /dev/null
@@ -0,0 +1,33 @@
+From 78311a514099932cd8434d5d2194aa94e56ab67c Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Wed, 17 Nov 2021 07:35:44 -0500
+Subject: KVM: x86: ignore APICv if LAPIC is not enabled
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 78311a514099932cd8434d5d2194aa94e56ab67c upstream.
+
+Synchronize the two calls to kvm_x86_sync_pir_to_irr.  The one
+in the reenter-guest fast path invoked the callback unconditionally
+even if LAPIC is present but disabled.  In this case, there are
+no interrupts to deliver, and therefore posted interrupts can
+be ignored.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9723,7 +9723,7 @@ static int vcpu_enter_guest(struct kvm_v
+               if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
+                       break;
+-              if (vcpu->arch.apicv_active)
++              if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+                       static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+               if (unlikely(kvm_vcpu_exit_request(vcpu))) {
diff --git a/queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch b/queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch
new file mode 100644 (file)
index 0000000..3b3445a
--- /dev/null
@@ -0,0 +1,61 @@
+From 574c3c55e969096cea770eda3375ff35ccf91702 Mon Sep 17 00:00:00 2001
+From: Ben Gardon <bgardon@google.com>
+Date: Mon, 15 Nov 2021 13:17:04 -0800
+Subject: KVM: x86/mmu: Fix TLB flush range when handling disconnected pt
+
+From: Ben Gardon <bgardon@google.com>
+
+commit 574c3c55e969096cea770eda3375ff35ccf91702 upstream.
+
+When recursively clearing out disconnected pts, the range based TLB
+flush in handle_removed_tdp_mmu_page uses the wrong starting GFN,
+resulting in the flush mostly missing the affected range. Fix this by
+using base_gfn for the flush.
+
+In response to feedback from David Matlack on the RFC version of this
+patch, also move a few definitions into the for loop in the function to
+prevent unintended references to them in the future.
+
+Fixes: a066e61f13cf ("KVM: x86/mmu: Factor out handling of removed page tables")
+CC: stable@vger.kernel.org
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20211115211704.2621644-1-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -316,9 +316,6 @@ static void handle_removed_tdp_mmu_page(
+       struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
+       int level = sp->role.level;
+       gfn_t base_gfn = sp->gfn;
+-      u64 old_child_spte;
+-      u64 *sptep;
+-      gfn_t gfn;
+       int i;
+       trace_kvm_mmu_prepare_zap_page(sp);
+@@ -326,8 +323,9 @@ static void handle_removed_tdp_mmu_page(
+       tdp_mmu_unlink_page(kvm, sp, shared);
+       for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+-              sptep = rcu_dereference(pt) + i;
+-              gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
++              u64 *sptep = rcu_dereference(pt) + i;
++              gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
++              u64 old_child_spte;
+               if (shared) {
+                       /*
+@@ -373,7 +371,7 @@ static void handle_removed_tdp_mmu_page(
+                                   shared);
+       }
+-      kvm_flush_remote_tlbs_with_address(kvm, gfn,
++      kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
+                                          KVM_PAGES_PER_HPAGE(level + 1));
+       call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
diff --git a/queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch b/queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch
new file mode 100644 (file)
index 0000000..d9f68d3
--- /dev/null
@@ -0,0 +1,90 @@
+From 53b7ca1a359389276c76fbc9e1009d8626a17e40 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 22 Nov 2021 19:43:11 -0500
+Subject: KVM: x86: Use a stable condition around all VT-d PI paths
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 53b7ca1a359389276c76fbc9e1009d8626a17e40 upstream.
+
+Currently, checks for whether VT-d PI can be used refer to the current
+status of the feature in the current vCPU; or they more or less pick
+vCPU 0 in case a specific vCPU is not available.
+
+However, these checks do not attempt to synchronize with changes to
+the IRTE.  In particular, there is no path that updates the IRTE when
+APICv is re-activated on vCPU 0; and there is no path to wakeup a CPU
+that has APICv disabled, if the wakeup occurs because of an IRTE
+that points to a posted interrupt.
+
+To fix this, always go through the VT-d PI path as long as there are
+assigned devices and APICv is available on both the host and the VM side.
+Since the relevant condition was copied over three times, take the hint
+and factor it into a separate function.
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Message-Id: <20211123004311.2954158-5-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/posted_intr.c |   20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/vmx/posted_intr.c
++++ b/arch/x86/kvm/vmx/posted_intr.c
+@@ -5,6 +5,7 @@
+ #include <asm/cpu.h>
+ #include "lapic.h"
++#include "irq.h"
+ #include "posted_intr.h"
+ #include "trace.h"
+ #include "vmx.h"
+@@ -77,13 +78,18 @@ after_clear_sn:
+               pi_set_on(pi_desc);
+ }
++static bool vmx_can_use_vtd_pi(struct kvm *kvm)
++{
++      return irqchip_in_kernel(kvm) && enable_apicv &&
++              kvm_arch_has_assigned_device(kvm) &&
++              irq_remapping_cap(IRQ_POSTING_CAP);
++}
++
+ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+ {
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+-      if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-              !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+-              !kvm_vcpu_apicv_active(vcpu))
++      if (!vmx_can_use_vtd_pi(vcpu->kvm))
+               return;
+       /* Set SN when the vCPU is preempted */
+@@ -141,9 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
+       struct pi_desc old, new;
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+-      if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-              !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+-              !kvm_vcpu_apicv_active(vcpu))
++      if (!vmx_can_use_vtd_pi(vcpu->kvm))
+               return 0;
+       WARN_ON(irqs_disabled());
+@@ -270,9 +274,7 @@ int pi_update_irte(struct kvm *kvm, unsi
+       struct vcpu_data vcpu_info;
+       int idx, ret = 0;
+-      if (!kvm_arch_has_assigned_device(kvm) ||
+-          !irq_remapping_cap(IRQ_POSTING_CAP) ||
+-          !kvm_vcpu_apicv_active(kvm->vcpus[0]))
++      if (!vmx_can_use_vtd_pi(kvm))
+               return 0;
+       idx = srcu_read_lock(&kvm->irq_srcu);
diff --git a/queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch b/queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch
new file mode 100644 (file)
index 0000000..7f6c688
--- /dev/null
@@ -0,0 +1,36 @@
+From 05b29633c7a956d5675f5fbba70db0d26aa5e73e Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+Date: Wed, 24 Nov 2021 20:20:46 +0800
+Subject: KVM: X86: Use vcpu->arch.walk_mmu for kvm_mmu_invlpg()
+
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+
+commit 05b29633c7a956d5675f5fbba70db0d26aa5e73e upstream.
+
+INVLPG operates on guest virtual address, which are represented by
+vcpu->arch.walk_mmu.  In nested virtualization scenarios,
+kvm_mmu_invlpg() was using the wrong MMU structure; if L2's invlpg were
+emulated by L0 (in practice, it hardly happen) when nested two-dimensional
+paging is enabled, the call to ->tlb_flush_gva() would be skipped and
+the hardware TLB entry would not be invalidated.
+
+Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Message-Id: <20211124122055.64424-5-jiangshanlai@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5369,7 +5369,7 @@ void kvm_mmu_invalidate_gva(struct kvm_v
+ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+ {
+-      kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
++      kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
+       ++vcpu->stat.invlpg;
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
index acce4ac1dd2a40ab87b0968c79596e4442f599ce..db9f26df691e20a4ab54571e521f734b69dea546 100644 (file)
@@ -69,3 +69,16 @@ drm-amd-display-allow-dsc-on-supported-mst-branch-devices.patch
 drm-i915-dp-perform-30ms-delay-after-source-oui-write.patch
 kvm-fix-avic_set_running-for-preemptable-kernels.patch
 kvm-disallow-user-memslot-with-size-that-exceeds-unsigned-long.patch
+kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch
+kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch
+kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch
+kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch
+kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch
+kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch
+kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch
+kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch
+kvm-mmu-shadow-nested-paging-does-not-have-pku.patch
+kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch
+kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch
+kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch
+tracing-histograms-string-compares-should-not-care-about-signed-values.patch
diff --git a/queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch b/queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch
new file mode 100644 (file)
index 0000000..db0fba5
--- /dev/null
@@ -0,0 +1,41 @@
+From 450fec13d9170127678f991698ac1a5b05c02e2f Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Tue, 30 Nov 2021 12:31:23 -0500
+Subject: tracing/histograms: String compares should not care about signed values
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 450fec13d9170127678f991698ac1a5b05c02e2f upstream.
+
+When comparing two strings for the "onmatch" histogram trigger, fields
+that are strings use string comparisons, which do not care about being
+signed or not.
+
+Do not fail to match two string fields if one is unsigned char array and
+the other is a signed char array.
+
+Link: https://lore.kernel.org/all/20211129123043.5cfd687a@gandalf.local.home/
+
+Cc: stable@vgerk.kernel.org
+Cc: Tom Zanussi <zanussi@kernel.org>
+Cc: Yafang Shao <laoar.shao@gmail.com>
+Fixes: b05e89ae7cf3b ("tracing: Accept different type for synthetic event fields")
+Reviewed-by: Masami Hiramatsu <mhiramatsu@kernel.org>
+Reported-by: Sven Schnelle <svens@linux.ibm.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events_hist.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -3419,7 +3419,7 @@ static int check_synth_field(struct synt
+       if (strcmp(field->type, hist_field->type) != 0) {
+               if (field->size != hist_field->size ||
+-                  field->is_signed != hist_field->is_signed)
++                  (!field->is_string && field->is_signed != hist_field->is_signed))
+                       return -EINVAL;
+       }