From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 5 Dec 2021 12:44:11 +0000 (+0100)
Subject: 5.15-stable patches
X-Git-Tag: v4.4.294~41
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=32db3ea8e2ad5b74e1b9b3a3f0b968f59d5cf321;p=thirdparty%2Fkernel%2Fstable-queue.git

5.15-stable patches

added patches:
	kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch
	kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch
	kvm-mmu-shadow-nested-paging-does-not-have-pku.patch
	kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch
	kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch
	kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch
	kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch
	kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch
	kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch
	kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch
	kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch
	kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch
	tracing-histograms-string-compares-should-not-care-about-signed-values.patch
---

diff --git a/queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch b/queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch
new file mode 100644
index 00000000000..a723fde5128
--- /dev/null
+++ b/queue-5.15/kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch
@@ -0,0 +1,54 @@
+From 1f80d15020d7f130194821feb1432b67648c632d Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Thu, 25 Nov 2021 15:20:14 +0000
+Subject: KVM: arm64: Avoid setting the upper 32 bits of TCR_EL2 and CPTR_EL2 to 1
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit 1f80d15020d7f130194821feb1432b67648c632d upstream.
+
+Having a signed (1 << 31) constant for TCR_EL2_RES1 and CPTR_EL2_TCPAC
+causes the upper 32-bit to be set to 1 when assigning them to a 64-bit
+variable. Bit 32 in TCR_EL2 is no longer RES0 in ARMv8.7: with FEAT_LPA2
+it changes the meaning of bits 49:48 and 9:8 in the stage 1 EL2 page
+table entries. As a result of the sign-extension, a non-VHE kernel can
+no longer boot on a model with ARMv8.7 enabled.
+
+CPTR_EL2 still has the top 32 bits RES0 but we should preempt any future
+problems
+
+Make these top bit constants unsigned as per commit df655b75c43f
+("arm64: KVM: Avoid setting the upper 32 bits of VTCR_EL2 to 1").
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Reported-by: Chris January <Chris.January@arm.com>
+Cc: <stable@vger.kernel.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20211125152014.2806582-1-catalin.marinas@arm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/kvm_arm.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/include/asm/kvm_arm.h
++++ b/arch/arm64/include/asm/kvm_arm.h
+@@ -91,7 +91,7 @@
+ #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+ 
+ /* TCR_EL2 Registers bits */
+-#define TCR_EL2_RES1		((1 << 31) | (1 << 23))
++#define TCR_EL2_RES1		((1U << 31) | (1 << 23))
+ #define TCR_EL2_TBI		(1 << 20)
+ #define TCR_EL2_PS_SHIFT	16
+ #define TCR_EL2_PS_MASK		(7 << TCR_EL2_PS_SHIFT)
+@@ -276,7 +276,7 @@
+ #define CPTR_EL2_TFP_SHIFT 10
+ 
+ /* Hyp Coprocessor Trap Register */
+-#define CPTR_EL2_TCPAC	(1 << 31)
++#define CPTR_EL2_TCPAC	(1U << 31)
+ #define CPTR_EL2_TAM	(1 << 30)
+ #define CPTR_EL2_TTA	(1 << 20)
+ #define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
diff --git a/queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch b/queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch
new file mode 100644
index 00000000000..575ae8f549c
--- /dev/null
+++ b/queue-5.15/kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch
@@ -0,0 +1,148 @@
+From bda44d844758c70c8dc1478e6fc9c25efa90c5a7 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 4 Nov 2021 00:25:02 +0000
+Subject: KVM: Ensure local memslot copies operate on up-to-date arch-specific data
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit bda44d844758c70c8dc1478e6fc9c25efa90c5a7 upstream.
+
+When modifying memslots, snapshot the "old" memslot and copy it to the
+"new" memslot's arch data after (re)acquiring slots_arch_lock.  x86 can
+change a memslot's arch data while memslot updates are in-progress so
+long as it holds slots_arch_lock, thus snapshotting a memslot without
+holding the lock can result in the consumption of stale data.
+
+Fixes: b10a038e84d1 ("KVM: mmu: Add slots_arch_lock for memslot arch fields")
+Cc: stable@vger.kernel.org
+Cc: Ben Gardon <bgardon@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211104002531.1176691-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |   47 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 31 insertions(+), 16 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1523,11 +1523,10 @@ static struct kvm_memslots *kvm_dup_mems
+ 
+ static int kvm_set_memslot(struct kvm *kvm,
+ 			   const struct kvm_userspace_memory_region *mem,
+-			   struct kvm_memory_slot *old,
+ 			   struct kvm_memory_slot *new, int as_id,
+ 			   enum kvm_mr_change change)
+ {
+-	struct kvm_memory_slot *slot;
++	struct kvm_memory_slot *slot, old;
+ 	struct kvm_memslots *slots;
+ 	int r;
+ 
+@@ -1558,7 +1557,7 @@ static int kvm_set_memslot(struct kvm *k
+ 		 * Note, the INVALID flag needs to be in the appropriate entry
+ 		 * in the freshly allocated memslots, not in @old or @new.
+ 		 */
+-		slot = id_to_memslot(slots, old->id);
++		slot = id_to_memslot(slots, new->id);
+ 		slot->flags |= KVM_MEMSLOT_INVALID;
+ 
+ 		/*
+@@ -1589,6 +1588,26 @@ static int kvm_set_memslot(struct kvm *k
+ 		kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
+ 	}
+ 
++	/*
++	 * Make a full copy of the old memslot, the pointer will become stale
++	 * when the memslots are re-sorted by update_memslots(), and the old
++	 * memslot needs to be referenced after calling update_memslots(), e.g.
++	 * to free its resources and for arch specific behavior.  This needs to
++	 * happen *after* (re)acquiring slots_arch_lock.
++	 */
++	slot = id_to_memslot(slots, new->id);
++	if (slot) {
++		old = *slot;
++	} else {
++		WARN_ON_ONCE(change != KVM_MR_CREATE);
++		memset(&old, 0, sizeof(old));
++		old.id = new->id;
++		old.as_id = as_id;
++	}
++
++	/* Copy the arch-specific data, again after (re)acquiring slots_arch_lock. */
++	memcpy(&new->arch, &old.arch, sizeof(old.arch));
++
+ 	r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
+ 	if (r)
+ 		goto out_slots;
+@@ -1596,14 +1615,18 @@ static int kvm_set_memslot(struct kvm *k
+ 	update_memslots(slots, new, change);
+ 	slots = install_new_memslots(kvm, as_id, slots);
+ 
+-	kvm_arch_commit_memory_region(kvm, mem, old, new, change);
++	kvm_arch_commit_memory_region(kvm, mem, &old, new, change);
++
++	/* Free the old memslot's metadata.  Note, this is the full copy!!! */
++	if (change == KVM_MR_DELETE)
++		kvm_free_memslot(kvm, &old);
+ 
+ 	kvfree(slots);
+ 	return 0;
+ 
+ out_slots:
+ 	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+-		slot = id_to_memslot(slots, old->id);
++		slot = id_to_memslot(slots, new->id);
+ 		slot->flags &= ~KVM_MEMSLOT_INVALID;
+ 		slots = install_new_memslots(kvm, as_id, slots);
+ 	} else {
+@@ -1618,7 +1641,6 @@ static int kvm_delete_memslot(struct kvm
+ 			      struct kvm_memory_slot *old, int as_id)
+ {
+ 	struct kvm_memory_slot new;
+-	int r;
+ 
+ 	if (!old->npages)
+ 		return -EINVAL;
+@@ -1631,12 +1653,7 @@ static int kvm_delete_memslot(struct kvm
+ 	 */
+ 	new.as_id = as_id;
+ 
+-	r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE);
+-	if (r)
+-		return r;
+-
+-	kvm_free_memslot(kvm, old);
+-	return 0;
++	return kvm_set_memslot(kvm, mem, &new, as_id, KVM_MR_DELETE);
+ }
+ 
+ /*
+@@ -1711,7 +1728,6 @@ int __kvm_set_memory_region(struct kvm *
+ 	if (!old.npages) {
+ 		change = KVM_MR_CREATE;
+ 		new.dirty_bitmap = NULL;
+-		memset(&new.arch, 0, sizeof(new.arch));
+ 	} else { /* Modify an existing slot. */
+ 		if ((new.userspace_addr != old.userspace_addr) ||
+ 		    (new.npages != old.npages) ||
+@@ -1725,9 +1741,8 @@ int __kvm_set_memory_region(struct kvm *
+ 		else /* Nothing to change. */
+ 			return 0;
+ 
+-		/* Copy dirty_bitmap and arch from the current memslot. */
++		/* Copy dirty_bitmap from the current memslot. */
+ 		new.dirty_bitmap = old.dirty_bitmap;
+-		memcpy(&new.arch, &old.arch, sizeof(new.arch));
+ 	}
+ 
+ 	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+@@ -1753,7 +1768,7 @@ int __kvm_set_memory_region(struct kvm *
+ 			bitmap_set(new.dirty_bitmap, 0, new.npages);
+ 	}
+ 
+-	r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
++	r = kvm_set_memslot(kvm, mem, &new, as_id, change);
+ 	if (r)
+ 		goto out_bitmap;
+ 
diff --git a/queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch b/queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch
new file mode 100644
index 00000000000..41181435221
--- /dev/null
+++ b/queue-5.15/kvm-mmu-shadow-nested-paging-does-not-have-pku.patch
@@ -0,0 +1,39 @@
+From 28f091bc2f8c23b7eac2402956b692621be7f9f4 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 22 Nov 2021 13:01:37 -0500
+Subject: KVM: MMU: shadow nested paging does not have PKU
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 28f091bc2f8c23b7eac2402956b692621be7f9f4 upstream.
+
+Initialize the mask for PKU permissions as if CR4.PKE=0, avoiding
+incorrect interpretations of the nested hypervisor's page tables.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -4852,7 +4852,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_
+ 	struct kvm_mmu *context = &vcpu->arch.guest_mmu;
+ 	struct kvm_mmu_role_regs regs = {
+ 		.cr0 = cr0,
+-		.cr4 = cr4,
++		.cr4 = cr4 & ~X86_CR4_PKE,
+ 		.efer = efer,
+ 	};
+ 	union kvm_mmu_role new_role;
+@@ -4916,7 +4916,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_
+ 	context->direct_map = false;
+ 
+ 	update_permission_bitmask(context, true);
+-	update_pkru_bitmask(context);
++	context->pkru_mask = 0;
+ 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+ 	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
+ }
diff --git a/queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch b/queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch
new file mode 100644
index 00000000000..eaf304d83ec
--- /dev/null
+++ b/queue-5.15/kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch
@@ -0,0 +1,123 @@
+From 40e5f9080472b614eeedcc5ba678289cd98d70df Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 25 Nov 2021 01:49:43 +0000
+Subject: KVM: nVMX: Abide to KVM_REQ_TLB_FLUSH_GUEST request on nested vmentry/vmexit
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 40e5f9080472b614eeedcc5ba678289cd98d70df upstream.
+
+Like KVM_REQ_TLB_FLUSH_CURRENT, the GUEST variant needs to be serviced at
+nested transitions, as KVM doesn't track requests for L1 vs L2.  E.g. if
+there's a pending flush when a nested VM-Exit occurs, then the flush was
+requested in the context of L2 and needs to be handled before switching
+to L1, otherwise the flush for L2 would effectiely be lost.
+
+Opportunistically add a helper to handle CURRENT and GUEST as a pair, the
+logic for when they need to be serviced is identical as both requests are
+tied to L1 vs. L2, the only difference is the scope of the flush.
+
+Reported-by: Lai Jiangshan <jiangshanlai+lkml@gmail.com>
+Fixes: 07ffaf343e34 ("KVM: nVMX: Sync all PGDs on nested transition with shadow paging")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211125014944.536398-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |    8 +++-----
+ arch/x86/kvm/x86.c        |   28 ++++++++++++++++++++++++----
+ arch/x86/kvm/x86.h        |    7 +------
+ 3 files changed, 28 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -3355,8 +3355,7 @@ enum nvmx_vmentry_status nested_vmx_ente
+ 	};
+ 	u32 failed_index;
+ 
+-	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+-		kvm_vcpu_flush_tlb_current(vcpu);
++	kvm_service_local_tlb_flush_requests(vcpu);
+ 
+ 	evaluate_pending_interrupts = exec_controls_get(vmx) &
+ 		(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
+@@ -4513,9 +4512,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *
+ 		(void)nested_get_evmcs_page(vcpu);
+ 	}
+ 
+-	/* Service the TLB flush request for L2 before switching to L1. */
+-	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+-		kvm_vcpu_flush_tlb_current(vcpu);
++	/* Service pending TLB flush requests for L2 before switching to L1. */
++	kvm_service_local_tlb_flush_requests(vcpu);
+ 
+ 	/*
+ 	 * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3193,6 +3193,29 @@ static void kvm_vcpu_flush_tlb_guest(str
+ 	static_call(kvm_x86_tlb_flush_guest)(vcpu);
+ }
+ 
++
++static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
++{
++	++vcpu->stat.tlb_flush;
++	static_call(kvm_x86_tlb_flush_current)(vcpu);
++}
++
++/*
++ * Service "local" TLB flush requests, which are specific to the current MMU
++ * context.  In addition to the generic event handling in vcpu_enter_guest(),
++ * TLB flushes that are targeted at an MMU context also need to be serviced
++ * prior before nested VM-Enter/VM-Exit.
++ */
++void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
++{
++	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
++		kvm_vcpu_flush_tlb_current(vcpu);
++
++	if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
++		kvm_vcpu_flush_tlb_guest(vcpu);
++}
++EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
++
+ static void record_steal_time(struct kvm_vcpu *vcpu)
+ {
+ 	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+@@ -9530,10 +9553,7 @@ static int vcpu_enter_guest(struct kvm_v
+ 			/* Flushing all ASIDs flushes the current ASID... */
+ 			kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ 		}
+-		if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+-			kvm_vcpu_flush_tlb_current(vcpu);
+-		if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
+-			kvm_vcpu_flush_tlb_guest(vcpu);
++		kvm_service_local_tlb_flush_requests(vcpu);
+ 
+ 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
+ 			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_
+ 
+ #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
+ 
++void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
+ int kvm_check_nested_events(struct kvm_vcpu *vcpu);
+ 
+ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
+@@ -185,12 +186,6 @@ static inline bool mmu_is_nested(struct
+ 	return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
+ }
+ 
+-static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
+-{
+-	++vcpu->stat.tlb_flush;
+-	static_call(kvm_x86_tlb_flush_current)(vcpu);
+-}
+-
+ static inline int is_pae(struct kvm_vcpu *vcpu)
+ {
+ 	return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
diff --git a/queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch b/queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch
new file mode 100644
index 00000000000..05ffbd97a5a
--- /dev/null
+++ b/queue-5.15/kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch
@@ -0,0 +1,82 @@
+From 712494de96f35f3e146b36b752c2afe0fdc0f0cc Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 25 Nov 2021 01:49:44 +0000
+Subject: KVM: nVMX: Emulate guest TLB flush on nested VM-Enter with new vpid12
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 712494de96f35f3e146b36b752c2afe0fdc0f0cc upstream.
+
+Fully emulate a guest TLB flush on nested VM-Enter which changes vpid12,
+i.e. L2's VPID, instead of simply doing INVVPID to flush real hardware's
+TLB entries for vpid02.  From L1's perspective, changing L2's VPID is
+effectively a TLB flush unless "hardware" has previously cached entries
+for the new vpid12.  Because KVM tracks only a single vpid12, KVM doesn't
+know if the new vpid12 has been used in the past and so must treat it as
+a brand new, never been used VPID, i.e. must assume that the new vpid12
+represents a TLB flush from L1's perspective.
+
+For example, if L1 and L2 share a CR3, the first VM-Enter to L2 (with a
+VPID) is effectively a TLB flush as hardware/KVM has never seen vpid12
+and thus can't have cached entries in the TLB for vpid12.
+
+Reported-by: Lai Jiangshan <jiangshanlai+lkml@gmail.com>
+Fixes: 5c614b3583e7 ("KVM: nVMX: nested VPID emulation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211125014944.536398-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |   37 +++++++++++++++++--------------------
+ 1 file changed, 17 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -1180,29 +1180,26 @@ static void nested_vmx_transition_tlb_fl
+ 	WARN_ON(!enable_vpid);
+ 
+ 	/*
+-	 * If VPID is enabled and used by vmc12, but L2 does not have a unique
+-	 * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
+-	 * a VPID for L2, flush the current context as the effective ASID is
+-	 * common to both L1 and L2.
+-	 *
+-	 * Defer the flush so that it runs after vmcs02.EPTP has been set by
+-	 * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
+-	 * redundant flushes further down the nested pipeline.
+-	 *
+-	 * If a TLB flush isn't required due to any of the above, and vpid12 is
+-	 * changing then the new "virtual" VPID (vpid12) will reuse the same
+-	 * "real" VPID (vpid02), and so needs to be flushed.  There's no direct
+-	 * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
+-	 * all nested vCPUs.  Remember, a flush on VM-Enter does not invalidate
+-	 * guest-physical mappings, so there is no need to sync the nEPT MMU.
++	 * VPID is enabled and in use by vmcs12.  If vpid12 is changing, then
++	 * emulate a guest TLB flush as KVM does not track vpid12 history nor
++	 * is the VPID incorporated into the MMU context.  I.e. KVM must assume
++	 * that the new vpid12 has never been used and thus represents a new
++	 * guest ASID that cannot have entries in the TLB.
+ 	 */
+-	if (!nested_has_guest_tlb_tag(vcpu)) {
+-		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+-	} else if (is_vmenter &&
+-		   vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
++	if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+ 		vmx->nested.last_vpid = vmcs12->virtual_processor_id;
+-		vpid_sync_context(nested_get_vpid02(vcpu));
++		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
++		return;
+ 	}
++
++	/*
++	 * If VPID is enabled, used by vmc12, and vpid12 is not changing but
++	 * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
++	 * KVM was unable to allocate a VPID for L2, flush the current context
++	 * as the effective ASID is common to both L1 and L2.
++	 */
++	if (!nested_has_guest_tlb_tag(vcpu))
++		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ }
+ 
+ static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
diff --git a/queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch b/queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch
new file mode 100644
index 00000000000..a6ea76a7164
--- /dev/null
+++ b/queue-5.15/kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch
@@ -0,0 +1,81 @@
+From 2b4a5a5d56881ece3c66b9a9a8943a6f41bd7349 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 25 Nov 2021 01:49:43 +0000
+Subject: KVM: nVMX: Flush current VPID (L1 vs. L2) for KVM_REQ_TLB_FLUSH_GUEST
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 2b4a5a5d56881ece3c66b9a9a8943a6f41bd7349 upstream.
+
+Flush the current VPID when handling KVM_REQ_TLB_FLUSH_GUEST instead of
+always flushing vpid01.  Any TLB flush that is triggered when L2 is
+active is scoped to L2's VPID (if it has one), e.g. if L2 toggles CR4.PGE
+and L1 doesn't intercept PGE writes, then KVM's emulation of the TLB
+flush needs to be applied to L2's VPID.
+
+Reported-by: Lai Jiangshan <jiangshanlai+lkml@gmail.com>
+Fixes: 07ffaf343e34 ("KVM: nVMX: Sync all PGDs on nested transition with shadow paging")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211125014944.536398-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |   23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -2916,6 +2916,13 @@ static void vmx_flush_tlb_all(struct kvm
+ 	}
+ }
+ 
++static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
++{
++	if (is_guest_mode(vcpu))
++		return nested_get_vpid02(vcpu);
++	return to_vmx(vcpu)->vpid;
++}
++
+ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
+ {
+ 	struct kvm_mmu *mmu = vcpu->arch.mmu;
+@@ -2928,31 +2935,29 @@ static void vmx_flush_tlb_current(struct
+ 	if (enable_ept)
+ 		ept_sync_context(construct_eptp(vcpu, root_hpa,
+ 						mmu->shadow_root_level));
+-	else if (!is_guest_mode(vcpu))
+-		vpid_sync_context(to_vmx(vcpu)->vpid);
+ 	else
+-		vpid_sync_context(nested_get_vpid02(vcpu));
++		vpid_sync_context(vmx_get_current_vpid(vcpu));
+ }
+ 
+ static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+ {
+ 	/*
+-	 * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
++	 * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
+ 	 * vmx_flush_tlb_guest() for an explanation of why this is ok.
+ 	 */
+-	vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
++	vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
+ }
+ 
+ static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
+ {
+ 	/*
+-	 * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
+-	 * or a vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit
+-	 * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
++	 * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
++	 * vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit are
++	 * required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+ 	 * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
+ 	 * i.e. no explicit INVVPID is necessary.
+ 	 */
+-	vpid_sync_context(to_vmx(vcpu)->vpid);
++	vpid_sync_context(vmx_get_current_vpid(vcpu));
+ }
+ 
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
diff --git a/queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch b/queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch
new file mode 100644
index 00000000000..f182a080baa
--- /dev/null
+++ b/queue-5.15/kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch
@@ -0,0 +1,89 @@
+From 7e1901f6c86c896acff6609e0176f93f756d8b2a Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 22 Nov 2021 19:43:09 -0500
+Subject: KVM: VMX: prepare sync_pir_to_irr for running with APICv disabled
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 7e1901f6c86c896acff6609e0176f93f756d8b2a upstream.
+
+If APICv is disabled for this vCPU, assigned devices may still attempt to
+post interrupts.  In that case, we need to cancel the vmentry and deliver
+the interrupt with KVM_REQ_EVENT.  Extend the existing code that handles
+injection of L1 interrupts into L2 to cover this case as well.
+
+vmx_hwapic_irr_update is only called when APICv is active so it would be
+confusing to add a check for vcpu->arch.apicv_active in there.  Instead,
+just use vmx_set_rvi directly in vmx_sync_pir_to_irr.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211123004311.2954158-3-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |   39 +++++++++++++++++++++++++--------------
+ 1 file changed, 25 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6242,9 +6242,9 @@ static int vmx_sync_pir_to_irr(struct kv
+ {
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 	int max_irr;
+-	bool max_irr_updated;
++	bool got_posted_interrupt;
+ 
+-	if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
++	if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
+ 		return -EIO;
+ 
+ 	if (pi_test_on(&vmx->pi_desc)) {
+@@ -6254,22 +6254,33 @@ static int vmx_sync_pir_to_irr(struct kv
+ 		 * But on x86 this is just a compiler barrier anyway.
+ 		 */
+ 		smp_mb__after_atomic();
+-		max_irr_updated =
++		got_posted_interrupt =
+ 			kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
+-
+-		/*
+-		 * If we are running L2 and L1 has a new pending interrupt
+-		 * which can be injected, this may cause a vmexit or it may
+-		 * be injected into L2.  Either way, this interrupt will be
+-		 * processed via KVM_REQ_EVENT, not RVI, because we do not use
+-		 * virtual interrupt delivery to inject L1 interrupts into L2.
+-		 */
+-		if (is_guest_mode(vcpu) && max_irr_updated)
+-			kvm_make_request(KVM_REQ_EVENT, vcpu);
+ 	} else {
+ 		max_irr = kvm_lapic_find_highest_irr(vcpu);
++		got_posted_interrupt = false;
+ 	}
+-	vmx_hwapic_irr_update(vcpu, max_irr);
++
++	/*
++	 * Newly recognized interrupts are injected via either virtual interrupt
++	 * delivery (RVI) or KVM_REQ_EVENT.  Virtual interrupt delivery is
++	 * disabled in two cases:
++	 *
++	 * 1) If L2 is running and the vCPU has a new pending interrupt.  If L1
++	 * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
++	 * VM-Exit to L1.  If L1 doesn't want to exit, the interrupt is injected
++	 * into L2, but KVM doesn't use virtual interrupt delivery to inject
++	 * interrupts into L2, and so KVM_REQ_EVENT is again needed.
++	 *
++	 * 2) If APICv is disabled for this vCPU, assigned devices may still
++	 * attempt to post interrupts.  The posted interrupt vector will cause
++	 * a VM-Exit and the subsequent entry will call sync_pir_to_irr.
++	 */
++	if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
++		vmx_set_rvi(max_irr);
++	else if (got_posted_interrupt)
++		kvm_make_request(KVM_REQ_EVENT, vcpu);
++
+ 	return max_irr;
+ }
+ 
diff --git a/queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch b/queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch
new file mode 100644
index 00000000000..c2f16256197
--- /dev/null
+++ b/queue-5.15/kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch
@@ -0,0 +1,110 @@
+From 37c4dbf337c5c2cdb24365ffae6ed70ac1e74d7a Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 22 Nov 2021 19:43:10 -0500
+Subject: KVM: x86: check PIR even for vCPUs with disabled APICv
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 37c4dbf337c5c2cdb24365ffae6ed70ac1e74d7a upstream.
+
+The IRTE for an assigned device can trigger a POSTED_INTR_VECTOR even
+if APICv is disabled on the vCPU that receives it.  In that case, the
+interrupt will just cause a vmexit and leave the ON bit set together
+with the PIR bit corresponding to the interrupt.
+
+Right now, the interrupt would not be delivered until APICv is re-enabled.
+However, fixing this is just a matter of always doing the PIR->IRR
+synchronization, even if the vCPU has temporarily disabled APICv.
+
+This is not a problem for performance, or if anything it is an
+improvement.  First, in the common case where vcpu->arch.apicv_active is
+true, one fewer check has to be performed.  Second, static_call_cond will
+elide the function call if APICv is not present or disabled.  Finally,
+in the case for AMD hardware we can remove the sync_pir_to_irr callback:
+it is only needed for apic_has_interrupt_for_ppr, and that function
+already has a fallback for !APICv.
+
+Cc: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Message-Id: <20211123004311.2954158-4-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c   |    2 +-
+ arch/x86/kvm/svm/svm.c |    1 -
+ arch/x86/kvm/x86.c     |   18 +++++++++---------
+ 3 files changed, 10 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -707,7 +707,7 @@ static void pv_eoi_clr_pending(struct kv
+ static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
+ {
+ 	int highest_irr;
+-	if (apic->vcpu->arch.apicv_active)
++	if (kvm_x86_ops.sync_pir_to_irr)
+ 		highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
+ 	else
+ 		highest_irr = apic_find_highest_irr(apic);
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4592,7 +4592,6 @@ static struct kvm_x86_ops svm_x86_ops __
+ 	.load_eoi_exitmap = svm_load_eoi_exitmap,
+ 	.hwapic_irr_update = svm_hwapic_irr_update,
+ 	.hwapic_isr_update = svm_hwapic_isr_update,
+-	.sync_pir_to_irr = kvm_lapic_find_highest_irr,
+ 	.apicv_post_state_restore = avic_post_state_restore,
+ 
+ 	.set_tss_addr = svm_set_tss_addr,
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4405,8 +4405,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *
+ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
+ 				    struct kvm_lapic_state *s)
+ {
+-	if (vcpu->arch.apicv_active)
+-		static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++	static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ 
+ 	return kvm_apic_get_state(vcpu, s);
+ }
+@@ -9433,8 +9432,7 @@ static void vcpu_scan_ioapic(struct kvm_
+ 	if (irqchip_split(vcpu->kvm))
+ 		kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
+ 	else {
+-		if (vcpu->arch.apicv_active)
+-			static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++		static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ 		if (ioapic_in_kernel(vcpu->kvm))
+ 			kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+ 	}
+@@ -9704,10 +9702,12 @@ static int vcpu_enter_guest(struct kvm_v
+ 
+ 	/*
+ 	 * This handles the case where a posted interrupt was
+-	 * notified with kvm_vcpu_kick.
++	 * notified with kvm_vcpu_kick.  Assigned devices can
++	 * use the POSTED_INTR_VECTOR even if APICv is disabled,
++	 * so do it even if APICv is disabled on this vCPU.
+ 	 */
+-	if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+-		static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++	if (kvm_lapic_enabled(vcpu))
++		static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ 
+ 	if (kvm_vcpu_exit_request(vcpu)) {
+ 		vcpu->mode = OUTSIDE_GUEST_MODE;
+@@ -9743,8 +9743,8 @@ static int vcpu_enter_guest(struct kvm_v
+ 		if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
+ 			break;
+ 
+-		if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+-			static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++		if (kvm_lapic_enabled(vcpu))
++			static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ 
+ 		if (unlikely(kvm_vcpu_exit_request(vcpu))) {
+ 			exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
diff --git a/queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch b/queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch
new file mode 100644
index 00000000000..9df9a0bb02e
--- /dev/null
+++ b/queue-5.15/kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch
@@ -0,0 +1,33 @@
+From 78311a514099932cd8434d5d2194aa94e56ab67c Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Wed, 17 Nov 2021 07:35:44 -0500
+Subject: KVM: x86: ignore APICv if LAPIC is not enabled
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 78311a514099932cd8434d5d2194aa94e56ab67c upstream.
+
+Synchronize the two calls to kvm_x86_sync_pir_to_irr.  The one
+in the reenter-guest fast path invoked the callback unconditionally
+even if LAPIC is present but disabled.  In this case, there are
+no interrupts to deliver, and therefore posted interrupts can
+be ignored.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9723,7 +9723,7 @@ static int vcpu_enter_guest(struct kvm_v
+ 		if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
+ 			break;
+ 
+-		if (vcpu->arch.apicv_active)
++		if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+ 			static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+ 
+ 		if (unlikely(kvm_vcpu_exit_request(vcpu))) {
diff --git a/queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch b/queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch
new file mode 100644
index 00000000000..3b3445a2317
--- /dev/null
+++ b/queue-5.15/kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch
@@ -0,0 +1,61 @@
+From 574c3c55e969096cea770eda3375ff35ccf91702 Mon Sep 17 00:00:00 2001
+From: Ben Gardon <bgardon@google.com>
+Date: Mon, 15 Nov 2021 13:17:04 -0800
+Subject: KVM: x86/mmu: Fix TLB flush range when handling disconnected pt
+
+From: Ben Gardon <bgardon@google.com>
+
+commit 574c3c55e969096cea770eda3375ff35ccf91702 upstream.
+
+When recursively clearing out disconnected pts, the range based TLB
+flush in handle_removed_tdp_mmu_page uses the wrong starting GFN,
+resulting in the flush mostly missing the affected range. Fix this by
+using base_gfn for the flush.
+
+In response to feedback from David Matlack on the RFC version of this
+patch, also move a few definitions into the for loop in the function to
+prevent unintended references to them in the future.
+
+Fixes: a066e61f13cf ("KVM: x86/mmu: Factor out handling of removed page tables")
+CC: stable@vger.kernel.org
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20211115211704.2621644-1-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -316,9 +316,6 @@ static void handle_removed_tdp_mmu_page(
+ 	struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
+ 	int level = sp->role.level;
+ 	gfn_t base_gfn = sp->gfn;
+-	u64 old_child_spte;
+-	u64 *sptep;
+-	gfn_t gfn;
+ 	int i;
+ 
+ 	trace_kvm_mmu_prepare_zap_page(sp);
+@@ -326,8 +323,9 @@ static void handle_removed_tdp_mmu_page(
+ 	tdp_mmu_unlink_page(kvm, sp, shared);
+ 
+ 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+-		sptep = rcu_dereference(pt) + i;
+-		gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
++		u64 *sptep = rcu_dereference(pt) + i;
++		gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
++		u64 old_child_spte;
+ 
+ 		if (shared) {
+ 			/*
+@@ -373,7 +371,7 @@ static void handle_removed_tdp_mmu_page(
+ 				    shared);
+ 	}
+ 
+-	kvm_flush_remote_tlbs_with_address(kvm, gfn,
++	kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
+ 					   KVM_PAGES_PER_HPAGE(level + 1));
+ 
+ 	call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
diff --git a/queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch b/queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch
new file mode 100644
index 00000000000..d9f68d355e6
--- /dev/null
+++ b/queue-5.15/kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch
@@ -0,0 +1,90 @@
+From 53b7ca1a359389276c76fbc9e1009d8626a17e40 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 22 Nov 2021 19:43:11 -0500
+Subject: KVM: x86: Use a stable condition around all VT-d PI paths
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 53b7ca1a359389276c76fbc9e1009d8626a17e40 upstream.
+
+Currently, checks for whether VT-d PI can be used refer to the current
+status of the feature in the current vCPU; or they more or less pick
+vCPU 0 in case a specific vCPU is not available.
+
+However, these checks do not attempt to synchronize with changes to
+the IRTE.  In particular, there is no path that updates the IRTE when
+APICv is re-activated on vCPU 0; and there is no path to wakeup a CPU
+that has APICv disabled, if the wakeup occurs because of an IRTE
+that points to a posted interrupt.
+
+To fix this, always go through the VT-d PI path as long as there are
+assigned devices and APICv is available on both the host and the VM side.
+Since the relevant condition was copied over three times, take the hint
+and factor it into a separate function.
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Message-Id: <20211123004311.2954158-5-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/posted_intr.c |   20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/vmx/posted_intr.c
++++ b/arch/x86/kvm/vmx/posted_intr.c
+@@ -5,6 +5,7 @@
+ #include <asm/cpu.h>
+ 
+ #include "lapic.h"
++#include "irq.h"
+ #include "posted_intr.h"
+ #include "trace.h"
+ #include "vmx.h"
+@@ -77,13 +78,18 @@ after_clear_sn:
+ 		pi_set_on(pi_desc);
+ }
+ 
++static bool vmx_can_use_vtd_pi(struct kvm *kvm)
++{
++	return irqchip_in_kernel(kvm) && enable_apicv &&
++		kvm_arch_has_assigned_device(kvm) &&
++		irq_remapping_cap(IRQ_POSTING_CAP);
++}
++
+ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+ {
+ 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ 
+-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+-		!kvm_vcpu_apicv_active(vcpu))
++	if (!vmx_can_use_vtd_pi(vcpu->kvm))
+ 		return;
+ 
+ 	/* Set SN when the vCPU is preempted */
+@@ -141,9 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
+ 	struct pi_desc old, new;
+ 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ 
+-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+-		!kvm_vcpu_apicv_active(vcpu))
++	if (!vmx_can_use_vtd_pi(vcpu->kvm))
+ 		return 0;
+ 
+ 	WARN_ON(irqs_disabled());
+@@ -270,9 +274,7 @@ int pi_update_irte(struct kvm *kvm, unsi
+ 	struct vcpu_data vcpu_info;
+ 	int idx, ret = 0;
+ 
+-	if (!kvm_arch_has_assigned_device(kvm) ||
+-	    !irq_remapping_cap(IRQ_POSTING_CAP) ||
+-	    !kvm_vcpu_apicv_active(kvm->vcpus[0]))
++	if (!vmx_can_use_vtd_pi(kvm))
+ 		return 0;
+ 
+ 	idx = srcu_read_lock(&kvm->irq_srcu);
diff --git a/queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch b/queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch
new file mode 100644
index 00000000000..7f6c6888eb0
--- /dev/null
+++ b/queue-5.15/kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch
@@ -0,0 +1,36 @@
+From 05b29633c7a956d5675f5fbba70db0d26aa5e73e Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+Date: Wed, 24 Nov 2021 20:20:46 +0800
+Subject: KVM: X86: Use vcpu->arch.walk_mmu for kvm_mmu_invlpg()
+
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+
+commit 05b29633c7a956d5675f5fbba70db0d26aa5e73e upstream.
+
+INVLPG operates on guest virtual address, which are represented by
+vcpu->arch.walk_mmu.  In nested virtualization scenarios,
+kvm_mmu_invlpg() was using the wrong MMU structure; if L2's invlpg were
+emulated by L0 (in practice, it hardly happen) when nested two-dimensional
+paging is enabled, the call to ->tlb_flush_gva() would be skipped and
+the hardware TLB entry would not be invalidated.
+
+Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Message-Id: <20211124122055.64424-5-jiangshanlai@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5369,7 +5369,7 @@ void kvm_mmu_invalidate_gva(struct kvm_v
+ 
+ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+ {
+-	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
++	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
+ 	++vcpu->stat.invlpg;
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
diff --git a/queue-5.15/series b/queue-5.15/series
index acce4ac1dd2..db9f26df691 100644
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -69,3 +69,16 @@ drm-amd-display-allow-dsc-on-supported-mst-branch-devices.patch
 drm-i915-dp-perform-30ms-delay-after-source-oui-write.patch
 kvm-fix-avic_set_running-for-preemptable-kernels.patch
 kvm-disallow-user-memslot-with-size-that-exceeds-unsigned-long.patch
+kvm-x86-mmu-fix-tlb-flush-range-when-handling-disconnected-pt.patch
+kvm-ensure-local-memslot-copies-operate-on-up-to-date-arch-specific-data.patch
+kvm-x86-ignore-apicv-if-lapic-is-not-enabled.patch
+kvm-nvmx-emulate-guest-tlb-flush-on-nested-vm-enter-with-new-vpid12.patch
+kvm-nvmx-flush-current-vpid-l1-vs.-l2-for-kvm_req_tlb_flush_guest.patch
+kvm-nvmx-abide-to-kvm_req_tlb_flush_guest-request-on-nested-vmentry-vmexit.patch
+kvm-vmx-prepare-sync_pir_to_irr-for-running-with-apicv-disabled.patch
+kvm-x86-use-a-stable-condition-around-all-vt-d-pi-paths.patch
+kvm-mmu-shadow-nested-paging-does-not-have-pku.patch
+kvm-arm64-avoid-setting-the-upper-32-bits-of-tcr_el2-and-cptr_el2-to-1.patch
+kvm-x86-use-vcpu-arch.walk_mmu-for-kvm_mmu_invlpg.patch
+kvm-x86-check-pir-even-for-vcpus-with-disabled-apicv.patch
+tracing-histograms-string-compares-should-not-care-about-signed-values.patch
diff --git a/queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch b/queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch
new file mode 100644
index 00000000000..db0fba5cb34
--- /dev/null
+++ b/queue-5.15/tracing-histograms-string-compares-should-not-care-about-signed-values.patch
@@ -0,0 +1,41 @@
+From 450fec13d9170127678f991698ac1a5b05c02e2f Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Tue, 30 Nov 2021 12:31:23 -0500
+Subject: tracing/histograms: String compares should not care about signed values
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 450fec13d9170127678f991698ac1a5b05c02e2f upstream.
+
+When comparing two strings for the "onmatch" histogram trigger, fields
+that are strings use string comparisons, which do not care about being
+signed or not.
+
+Do not fail to match two string fields if one is unsigned char array and
+the other is a signed char array.
+
+Link: https://lore.kernel.org/all/20211129123043.5cfd687a@gandalf.local.home/
+
+Cc: stable@vgerk.kernel.org
+Cc: Tom Zanussi <zanussi@kernel.org>
+Cc: Yafang Shao <laoar.shao@gmail.com>
+Fixes: b05e89ae7cf3b ("tracing: Accept different type for synthetic event fields")
+Reviewed-by: Masami Hiramatsu <mhiramatsu@kernel.org>
+Reported-by: Sven Schnelle <svens@linux.ibm.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events_hist.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -3419,7 +3419,7 @@ static int check_synth_field(struct synt
+ 
+ 	if (strcmp(field->type, hist_field->type) != 0) {
+ 		if (field->size != hist_field->size ||
+-		    field->is_signed != hist_field->is_signed)
++		    (!field->is_string && field->is_signed != hist_field->is_signed))
+ 			return -EINVAL;
+ 	}
+