From a24c6965e15f5dd516b73804d8423cc1bda6ddcf Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Sat, 3 Apr 2021 12:14:03 -0400
Subject: [PATCH] Fixes for 5.11

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 ...ompile-out-tdp-mmu-on-32-bit-systems.patch | 351 ++++++++++++
 ...mu-add-comment-on-__tdp_mmu_set_spte.patch |  56 ++
 ...-lockdep-when-setting-a-tdp-mmu-spte.patch |  41 ++
 ...ge-tdp-mmu-yield-function-returns-to.patch | 113 ++++
 ...t-redundantly-clear-tdp-mmu-pt-memor.patch |  41 ++
 ...re-forward-progress-when-yielding-in.patch | 147 +++++
 ...re-tlbs-are-flushed-for-tdp-mmu-duri.patch |  68 +++
 ...re-tlbs-are-flushed-when-yielding-du.patch | 114 ++++
 ...or-out-functions-to-add-remove-tdp-m.patch | 103 ++++
 ...or-out-handling-of-removed-page-tabl.patch | 125 +++++
 ...-fix-braces-in-kvm_recover_nx_lpages.patch |  40 ++
 ...e-flush-and-non-flush-tdp_mmu_iter_c.patch | 125 +++++
 ...ect-tdp-mmu-page-table-memory-with-r.patch | 505 ++++++++++++++++++
 ...name-goal_gfn-to-next_last_level_gfn.patch | 114 ++++
 ...atomic-ops-to-set-sptes-in-tdp-mmu-m.patch | 371 +++++++++++++
 ...d-in-tdu-mmu-iter-even-if-no-sptes-c.patch | 137 +++++
 queue-5.11/series                             |  16 +
 17 files changed, 2467 insertions(+)
 create mode 100644 queue-5.11/kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch
 create mode 100644 queue-5.11/kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch

diff --git a/queue-5.11/kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch b/queue-5.11/kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch
new file mode 100644
index 00000000000..c4b7ed198d4
--- /dev/null
+++ b/queue-5.11/kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch
@@ -0,0 +1,351 @@
+From 54204429391068a6a503c135e37f7d7d1fe08d63 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 6 Feb 2021 09:53:33 -0500
+Subject: KVM: x86: compile out TDP MMU on 32-bit systems
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 897218ff7cf19290ec2d69652ce673d8ed6fedeb ]
+
+The TDP MMU assumes that it can do atomic accesses to 64-bit PTEs.
+Rather than just disabling it, compile it out completely so that it
+is possible to use for example 64-bit xchg.
+
+To limit the number of stubs, wrap all accesses to tdp_mmu_enabled
+or tdp_mmu_page with a function.  Calls to all other functions in
+tdp_mmu.c are eliminated and do not even reach the linker.
+
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Tested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/kvm_host.h |  2 ++
+ arch/x86/kvm/Makefile           |  3 ++-
+ arch/x86/kvm/mmu/mmu.c          | 36 ++++++++++++++++-----------------
+ arch/x86/kvm/mmu/mmu_internal.h |  2 ++
+ arch/x86/kvm/mmu/tdp_mmu.c      | 29 +-------------------------
+ arch/x86/kvm/mmu/tdp_mmu.h      | 32 +++++++++++++++++++++++++----
+ 6 files changed, 53 insertions(+), 51 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 42fca28d6189..0cbb13b83a16 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1005,6 +1005,7 @@ struct kvm_arch {
+ 	struct kvm_pmu_event_filter *pmu_event_filter;
+ 	struct task_struct *nx_lpage_recovery_thread;
+ 
++#ifdef CONFIG_X86_64
+ 	/*
+ 	 * Whether the TDP MMU is enabled for this VM. This contains a
+ 	 * snapshot of the TDP MMU module parameter from when the VM was
+@@ -1043,6 +1044,7 @@ struct kvm_arch {
+ 	 * the thread holds the MMU lock in write mode.
+ 	 */
+ 	spinlock_t tdp_mmu_pages_lock;
++#endif /* CONFIG_X86_64 */
+ };
+ 
+ struct kvm_vm_stat {
+diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
+index 4bd14ab01323..53c54cdcc923 100644
+--- a/arch/x86/kvm/Makefile
++++ b/arch/x86/kvm/Makefile
+@@ -17,7 +17,8 @@ kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
+ kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
+ 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
+ 			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
+-			   mmu/spte.o mmu/tdp_iter.o mmu/tdp_mmu.o
++			   mmu/spte.o
++kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
+ 
+ kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
+ 			   vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 5771102a840c..d9901836d7aa 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -1225,7 +1225,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+ {
+ 	struct kvm_rmap_head *rmap_head;
+ 
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
+ 				slot->base_gfn + gfn_offset, mask, true);
+ 	while (mask) {
+@@ -1254,7 +1254,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+ {
+ 	struct kvm_rmap_head *rmap_head;
+ 
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
+ 				slot->base_gfn + gfn_offset, mask, false);
+ 	while (mask) {
+@@ -1309,7 +1309,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
+ 		write_protected |= __rmap_write_protect(kvm, rmap_head, true);
+ 	}
+ 
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		write_protected |=
+ 			kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn);
+ 
+@@ -1521,7 +1521,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ 
+ 	r = kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+ 
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		r |= kvm_tdp_mmu_zap_hva_range(kvm, start, end);
+ 
+ 	return r;
+@@ -1533,7 +1533,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+ 
+ 	r = kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
+ 
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		r |= kvm_tdp_mmu_set_spte_hva(kvm, hva, &pte);
+ 
+ 	return r;
+@@ -1588,7 +1588,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+ 	int young = false;
+ 
+ 	young = kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		young |= kvm_tdp_mmu_age_hva_range(kvm, start, end);
+ 
+ 	return young;
+@@ -1599,7 +1599,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+ 	int young = false;
+ 
+ 	young = kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		young |= kvm_tdp_mmu_test_age_hva(kvm, hva);
+ 
+ 	return young;
+@@ -3161,7 +3161,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
+ 	sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK);
+ 
+ 	if (kvm_mmu_put_root(kvm, sp)) {
+-		if (sp->tdp_mmu_page)
++		if (is_tdp_mmu_page(sp))
+ 			kvm_tdp_mmu_free_root(kvm, sp);
+ 		else if (sp->role.invalid)
+ 			kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+@@ -3255,7 +3255,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
+ 	hpa_t root;
+ 	unsigned i;
+ 
+-	if (vcpu->kvm->arch.tdp_mmu_enabled) {
++	if (is_tdp_mmu_enabled(vcpu->kvm)) {
+ 		root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
+ 
+ 		if (!VALID_PAGE(root))
+@@ -5447,7 +5447,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
+ 
+ 	kvm_zap_obsolete_pages(kvm);
+ 
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		kvm_tdp_mmu_zap_all(kvm);
+ 
+ 	spin_unlock(&kvm->mmu_lock);
+@@ -5510,7 +5510,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+ 		}
+ 	}
+ 
+-	if (kvm->arch.tdp_mmu_enabled) {
++	if (is_tdp_mmu_enabled(kvm)) {
+ 		flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end);
+ 		if (flush)
+ 			kvm_flush_remote_tlbs(kvm);
+@@ -5534,7 +5534,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
+ 	spin_lock(&kvm->mmu_lock);
+ 	flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
+ 				start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_4K);
+ 	spin_unlock(&kvm->mmu_lock);
+ 
+@@ -5600,7 +5600,7 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ 	slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
+ 			 kvm_mmu_zap_collapsible_spte, true);
+ 
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
+ 	spin_unlock(&kvm->mmu_lock);
+ }
+@@ -5627,7 +5627,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
+ 
+ 	spin_lock(&kvm->mmu_lock);
+ 	flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
+ 	spin_unlock(&kvm->mmu_lock);
+ 
+@@ -5650,7 +5650,7 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
+ 	spin_lock(&kvm->mmu_lock);
+ 	flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
+ 					false);
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
+ 	spin_unlock(&kvm->mmu_lock);
+ 
+@@ -5666,7 +5666,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
+ 
+ 	spin_lock(&kvm->mmu_lock);
+ 	flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
+ 	spin_unlock(&kvm->mmu_lock);
+ 
+@@ -5694,7 +5694,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
+ 
+ 	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ 
+-	if (kvm->arch.tdp_mmu_enabled)
++	if (is_tdp_mmu_enabled(kvm))
+ 		kvm_tdp_mmu_zap_all(kvm);
+ 
+ 	spin_unlock(&kvm->mmu_lock);
+@@ -6005,7 +6005,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
+ 				      struct kvm_mmu_page,
+ 				      lpage_disallowed_link);
+ 		WARN_ON_ONCE(!sp->lpage_disallowed);
+-		if (sp->tdp_mmu_page) {
++		if (is_tdp_mmu_page(sp)) {
+ 			kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
+ 				sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
+ 		} else {
+diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
+index 9e600dc30f08..cbac13a2bd45 100644
+--- a/arch/x86/kvm/mmu/mmu_internal.h
++++ b/arch/x86/kvm/mmu/mmu_internal.h
+@@ -56,10 +56,12 @@ struct kvm_mmu_page {
+ 	/* Number of writes since the last time traversal visited this page.  */
+ 	atomic_t write_flooding_count;
+ 
++#ifdef CONFIG_X86_64
+ 	bool tdp_mmu_page;
+ 
+ 	/* Used for freeing the page asyncronously if it is a TDP MMU page. */
+ 	struct rcu_head rcu_head;
++#endif
+ };
+ 
+ extern struct kmem_cache *mmu_page_header_cache;
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index bb6faa9193b4..e2157d0a5712 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -10,24 +10,13 @@
+ #include <asm/cmpxchg.h>
+ #include <trace/events/kvm.h>
+ 
+-#ifdef CONFIG_X86_64
+ static bool __read_mostly tdp_mmu_enabled = false;
+ module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
+-#endif
+-
+-static bool is_tdp_mmu_enabled(void)
+-{
+-#ifdef CONFIG_X86_64
+-	return tdp_enabled && READ_ONCE(tdp_mmu_enabled);
+-#else
+-	return false;
+-#endif /* CONFIG_X86_64 */
+-}
+ 
+ /* Initializes the TDP MMU for the VM, if enabled. */
+ void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+ {
+-	if (!is_tdp_mmu_enabled())
++	if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled))
+ 		return;
+ 
+ 	/* This should not be changed for the lifetime of the VM. */
+@@ -96,22 +85,6 @@ static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
+ #define for_each_tdp_mmu_root(_kvm, _root)				\
+ 	list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
+ 
+-bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
+-{
+-	struct kvm_mmu_page *sp;
+-
+-	if (!kvm->arch.tdp_mmu_enabled)
+-		return false;
+-	if (WARN_ON(!VALID_PAGE(hpa)))
+-		return false;
+-
+-	sp = to_shadow_page(hpa);
+-	if (WARN_ON(!sp))
+-		return false;
+-
+-	return sp->tdp_mmu_page && sp->root_count;
+-}
+-
+ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 			  gfn_t start, gfn_t end, bool can_yield, bool flush);
+ 
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
+index cbbdbadd1526..b4b65e3699b3 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.h
++++ b/arch/x86/kvm/mmu/tdp_mmu.h
+@@ -5,10 +5,6 @@
+ 
+ #include <linux/kvm_host.h>
+ 
+-void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
+-void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
+-
+-bool is_tdp_mmu_root(struct kvm *kvm, hpa_t root);
+ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
+ void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
+ 
+@@ -47,4 +43,32 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
+ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ 			 int *root_level);
+ 
++#ifdef CONFIG_X86_64
++void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
++void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
++static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
++static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
++#else
++static inline void kvm_mmu_init_tdp_mmu(struct kvm *kvm) {}
++static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
++static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
++static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
++#endif
++
++static inline bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
++{
++	struct kvm_mmu_page *sp;
++
++	if (!is_tdp_mmu_enabled(kvm))
++		return false;
++	if (WARN_ON(!VALID_PAGE(hpa)))
++		return false;
++
++	sp = to_shadow_page(hpa);
++	if (WARN_ON(!sp))
++		return false;
++
++	return is_tdp_mmu_page(sp) && sp->root_count;
++}
++
+ #endif /* __KVM_X86_MMU_TDP_MMU_H */
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch b/queue-5.11/kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch
new file mode 100644
index 00000000000..229a751c007
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch
@@ -0,0 +1,56 @@
+From 1775852f3dfdcaea00d6f010f16e3823216312f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:08 -0800
+Subject: KVM: x86/mmu: Add comment on __tdp_mmu_set_spte
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit fe43fa2f407b9d513f7bcf18142e14e1bf1508d6 ]
+
+__tdp_mmu_set_spte is a very important function in the TDP MMU which
+already accepts several arguments and will take more in future commits.
+To offset this complexity, add a comment to the function describing each
+of the arguemnts.
+
+No functional change intended.
+
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-3-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 50c088a41dee..6bd86bb4c089 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -397,6 +397,22 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ 				      new_spte, level);
+ }
+ 
++/*
++ * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
++ * @kvm: kvm instance
++ * @iter: a tdp_iter instance currently on the SPTE that should be set
++ * @new_spte: The value the SPTE should be set to
++ * @record_acc_track: Notify the MM subsystem of changes to the accessed state
++ *		      of the page. Should be set unless handling an MMU
++ *		      notifier for access tracking. Leaving record_acc_track
++ *		      unset in that case prevents page accesses from being
++ *		      double counted.
++ * @record_dirty_log: Record the page as dirty in the dirty bitmap if
++ *		      appropriate for the change being made. Should be set
++ *		      unless performing certain dirty logging operations.
++ *		      Leaving record_dirty_log unset in that case prevents page
++ *		      writes from being double counted.
++ */
+ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
+ 				      u64 new_spte, bool record_acc_track,
+ 				      bool record_dirty_log)
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch b/queue-5.11/kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch
new file mode 100644
index 00000000000..b4bf4f4dba9
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch
@@ -0,0 +1,41 @@
+From 401d23e1f29deccc722db7be92da1f413bb43a9c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:09 -0800
+Subject: KVM: x86/mmu: Add lockdep when setting a TDP MMU SPTE
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit 3a9a4aa5657471a02ffb7f9b7f3b7a468b3f257b ]
+
+Add lockdep to __tdp_mmu_set_spte to ensure that SPTEs are only modified
+under the MMU lock.
+
+No functional change intended.
+
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-4-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 0567286fba39..3a8bbc812a28 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -365,6 +365,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
+ 	struct kvm_mmu_page *root = sptep_to_sp(root_pt);
+ 	int as_id = kvm_mmu_page_as_id(root);
+ 
++	lockdep_assert_held(&kvm->mmu_lock);
++
+ 	WRITE_ONCE(*iter->sptep, new_spte);
+ 
+ 	__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch b/queue-5.11/kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch
new file mode 100644
index 00000000000..94f9b15c909
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch
@@ -0,0 +1,113 @@
+From b51cdb837368ace9ab7f04c6bd99246161333918 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:07 -0800
+Subject: KVM: x86/mmu: change TDP MMU yield function returns to match
+ cond_resched
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit e28a436ca4f65384cceaf3f4da0e00aa74244e6a ]
+
+Currently the TDP MMU yield / cond_resched functions either return
+nothing or return true if the TLBs were not flushed. These are confusing
+semantics, especially when making control flow decisions in calling
+functions.
+
+To clean things up, change both functions to have the same
+return value semantics as cond_resched: true if the thread yielded,
+false if it did not. If the function yielded in the _flush_ version,
+then the TLBs will have been flushed.
+
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-2-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 39 ++++++++++++++++++++++++++++----------
+ 1 file changed, 29 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 17976998bffb..abdd89771b9b 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -413,8 +413,15 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
+ 			 _mmu->shadow_root_level, _start, _end)
+ 
+ /*
+- * Flush the TLB if the process should drop kvm->mmu_lock.
+- * Return whether the caller still needs to flush the tlb.
++ * Flush the TLB and yield if the MMU lock is contended or this thread needs to
++ * return control to the scheduler.
++ *
++ * If this function yields, it will also reset the tdp_iter's walk over the
++ * paging structure and the calling function should allow the iterator to
++ * continue its traversal from the paging structure root.
++ *
++ * Return true if this function yielded, the TLBs were flushed, and the
++ * iterator's traversal was reset. Return false if a yield was not needed.
+  */
+ static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
+ {
+@@ -422,18 +429,32 @@ static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *it
+ 		kvm_flush_remote_tlbs(kvm);
+ 		cond_resched_lock(&kvm->mmu_lock);
+ 		tdp_iter_refresh_walk(iter);
+-		return false;
+-	} else {
+ 		return true;
+ 	}
++
++	return false;
+ }
+ 
+-static void tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
++/*
++ * Yield if the MMU lock is contended or this thread needs to return control
++ * to the scheduler.
++ *
++ * If this function yields, it will also reset the tdp_iter's walk over the
++ * paging structure and the calling function should allow the iterator to
++ * continue its traversal from the paging structure root.
++ *
++ * Return true if this function yielded and the iterator's traversal was reset.
++ * Return false if a yield was not needed.
++ */
++static bool tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
+ {
+ 	if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ 		cond_resched_lock(&kvm->mmu_lock);
+ 		tdp_iter_refresh_walk(iter);
++		return true;
+ 	}
++
++	return false;
+ }
+ 
+ /*
+@@ -469,10 +490,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+ 
+-		if (can_yield)
+-			flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter);
+-		else
+-			flush_needed = true;
++		flush_needed = !can_yield ||
++			       !tdp_mmu_iter_flush_cond_resched(kvm, &iter);
+ 	}
+ 	return flush_needed;
+ }
+@@ -1073,7 +1092,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
+ 
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+ 
+-		spte_set = tdp_mmu_iter_flush_cond_resched(kvm, &iter);
++		spte_set = !tdp_mmu_iter_flush_cond_resched(kvm, &iter);
+ 	}
+ 
+ 	if (spte_set)
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch b/queue-5.11/kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch
new file mode 100644
index 00000000000..9e580385426
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch
@@ -0,0 +1,41 @@
+From 4825236e2c3032f176048b051b7522ff9c12495d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:10 -0800
+Subject: KVM: x86/mmu: Don't redundantly clear TDP MMU pt memory
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit 734e45b329d626d2c14e2bcf8be3d069a33c3316 ]
+
+The KVM MMU caches already guarantee that shadow page table memory will
+be zeroed, so there is no reason to re-zero the page in the TDP MMU page
+fault handler.
+
+No functional change intended.
+
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-5-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 6bd86bb4c089..4a2b8844f00f 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -708,7 +708,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ 			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
+ 			list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages);
+ 			child_pt = sp->spt;
+-			clear_page(child_pt);
+ 			new_spte = make_nonleaf_spte(child_pt,
+ 						     !shadow_accessed_mask);
+ 
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch b/queue-5.11/kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch
new file mode 100644
index 00000000000..3ba5ee94a6b
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch
@@ -0,0 +1,147 @@
+From b58347d13f5c0b46dc8ce701f443931eff84d86c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:19 -0800
+Subject: KVM: x86/mmu: Ensure forward progress when yielding in TDP MMU iter
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit ed5e484b79e8a9b8be714bd85b6fc70bd6dc99a7 ]
+
+In some functions the TDP iter risks not making forward progress if two
+threads livelock yielding to one another. This is possible if two threads
+are trying to execute wrprot_gfn_range. Each could write protect an entry
+and then yield. This would reset the tdp_iter's walk over the paging
+structure and the loop would end up repeating the same entry over and
+over, preventing either thread from making forward progress.
+
+Fix this issue by only yielding if the loop has made forward progress
+since the last yield.
+
+Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU")
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+
+Message-Id: <20210202185734.1680553-14-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_iter.c | 18 +-----------------
+ arch/x86/kvm/mmu/tdp_iter.h |  7 ++++++-
+ arch/x86/kvm/mmu/tdp_mmu.c  | 21 ++++++++++++++++-----
+ 3 files changed, 23 insertions(+), 23 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
+index 9917c55b7d24..1a09d212186b 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.c
++++ b/arch/x86/kvm/mmu/tdp_iter.c
+@@ -31,6 +31,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+ 	WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
+ 
+ 	iter->next_last_level_gfn = next_last_level_gfn;
++	iter->yielded_gfn = iter->next_last_level_gfn;
+ 	iter->root_level = root_level;
+ 	iter->min_level = min_level;
+ 	iter->level = root_level;
+@@ -158,23 +159,6 @@ void tdp_iter_next(struct tdp_iter *iter)
+ 	iter->valid = false;
+ }
+ 
+-/*
+- * Restart the walk over the paging structure from the root, starting from the
+- * highest gfn the iterator had previously reached. Assumes that the entire
+- * paging structure, except the root page, may have been completely torn down
+- * and rebuilt.
+- */
+-void tdp_iter_refresh_walk(struct tdp_iter *iter)
+-{
+-	gfn_t next_last_level_gfn = iter->next_last_level_gfn;
+-
+-	if (iter->gfn > next_last_level_gfn)
+-		next_last_level_gfn = iter->gfn;
+-
+-	tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
+-		       iter->root_level, iter->min_level, next_last_level_gfn);
+-}
+-
+ u64 *tdp_iter_root_pt(struct tdp_iter *iter)
+ {
+ 	return iter->pt_path[iter->root_level - 1];
+diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
+index b2dd269c631f..d480c540ee27 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.h
++++ b/arch/x86/kvm/mmu/tdp_iter.h
+@@ -16,6 +16,12 @@ struct tdp_iter {
+ 	 * for this GFN.
+ 	 */
+ 	gfn_t next_last_level_gfn;
++	/*
++	 * The next_last_level_gfn at the time when the thread last
++	 * yielded. Only yielding when the next_last_level_gfn !=
++	 * yielded_gfn helps ensure forward progress.
++	 */
++	gfn_t yielded_gfn;
+ 	/* Pointers to the page tables traversed to reach the current SPTE */
+ 	u64 *pt_path[PT64_ROOT_MAX_LEVEL];
+ 	/* A pointer to the current SPTE */
+@@ -54,7 +60,6 @@ u64 *spte_to_child_pt(u64 pte, int level);
+ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+ 		    int min_level, gfn_t next_last_level_gfn);
+ void tdp_iter_next(struct tdp_iter *iter);
+-void tdp_iter_refresh_walk(struct tdp_iter *iter);
+ u64 *tdp_iter_root_pt(struct tdp_iter *iter);
+ 
+ #endif /* __KVM_X86_MMU_TDP_ITER_H */
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 0dd27767c770..a07d37abb63f 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -420,8 +420,9 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
+  * TLB flush before yielding.
+  *
+  * If this function yields, it will also reset the tdp_iter's walk over the
+- * paging structure and the calling function should allow the iterator to
+- * continue its traversal from the paging structure root.
++ * paging structure and the calling function should skip to the next
++ * iteration to allow the iterator to continue its traversal from the
++ * paging structure root.
+  *
+  * Return true if this function yielded and the iterator's traversal was reset.
+  * Return false if a yield was not needed.
+@@ -429,12 +430,22 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
+ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
+ 					     struct tdp_iter *iter, bool flush)
+ {
++	/* Ensure forward progress has been made before yielding. */
++	if (iter->next_last_level_gfn == iter->yielded_gfn)
++		return false;
++
+ 	if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ 		if (flush)
+ 			kvm_flush_remote_tlbs(kvm);
+ 
+ 		cond_resched_lock(&kvm->mmu_lock);
+-		tdp_iter_refresh_walk(iter);
++
++		WARN_ON(iter->gfn > iter->next_last_level_gfn);
++
++		tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
++			       iter->root_level, iter->min_level,
++			       iter->next_last_level_gfn);
++
+ 		return true;
+ 	}
+ 
+@@ -474,8 +485,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+ 
+-		flush_needed = !can_yield ||
+-			       !tdp_mmu_iter_cond_resched(kvm, &iter, true);
++		flush_needed = !(can_yield &&
++				 tdp_mmu_iter_cond_resched(kvm, &iter, true));
+ 	}
+ 	return flush_needed;
+ }
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch b/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch
new file mode 100644
index 00000000000..c5f3ac604b1
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch
@@ -0,0 +1,68 @@
+From 59cf6e724c855ccfa7d36d6bcd2b8aaa2a0dcc39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Mar 2021 13:01:18 -0700
+Subject: KVM: x86/mmu: Ensure TLBs are flushed for TDP MMU during NX zapping
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 048f49809c526348775425420fb5b8e84fd9a133 ]
+
+Honor the "flush needed" return from kvm_tdp_mmu_zap_gfn_range(), which
+does the flush itself if and only if it yields (which it will never do in
+this particular scenario), and otherwise expects the caller to do the
+flush.  If pages are zapped from the TDP MMU but not the legacy MMU, then
+no flush will occur.
+
+Fixes: 29cf0f5007a2 ("kvm: x86/mmu: NX largepage recovery for TDP MMU")
+Cc: stable@vger.kernel.org
+Cc: Ben Gardon <bgardon@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210325200119.1359384-3-seanjc@google.com>
+Reviewed-by: Ben Gardon <bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index d9901836d7aa..8643c766415a 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5985,6 +5985,8 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
+ 	struct kvm_mmu_page *sp;
+ 	unsigned int ratio;
+ 	LIST_HEAD(invalid_list);
++	bool flush = false;
++	gfn_t gfn_end;
+ 	ulong to_zap;
+ 
+ 	rcu_idx = srcu_read_lock(&kvm->srcu);
+@@ -6006,19 +6008,20 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
+ 				      lpage_disallowed_link);
+ 		WARN_ON_ONCE(!sp->lpage_disallowed);
+ 		if (is_tdp_mmu_page(sp)) {
+-			kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
+-				sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
++			gfn_end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
++			flush = kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, gfn_end);
+ 		} else {
+ 			kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+ 			WARN_ON_ONCE(sp->lpage_disallowed);
+ 		}
+ 
+ 		if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+-			kvm_mmu_commit_zap_page(kvm, &invalid_list);
++			kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
+ 			cond_resched_lock(&kvm->mmu_lock);
++			flush = false;
+ 		}
+ 	}
+-	kvm_mmu_commit_zap_page(kvm, &invalid_list);
++	kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
+ 
+ 	spin_unlock(&kvm->mmu_lock);
+ 	srcu_read_unlock(&kvm->srcu, rcu_idx);
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch b/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch
new file mode 100644
index 00000000000..c147bce232f
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch
@@ -0,0 +1,114 @@
+From 4cb3d4d92123732bc824cbe156b648c049bfe676 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Mar 2021 13:01:17 -0700
+Subject: KVM: x86/mmu: Ensure TLBs are flushed when yielding during GFN range
+ zap
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit a835429cda91621fca915d80672a157b47738afb ]
+
+When flushing a range of GFNs across multiple roots, ensure any pending
+flush from a previous root is honored before yielding while walking the
+tables of the current root.
+
+Note, kvm_tdp_mmu_zap_gfn_range() now intentionally overwrites its local
+"flush" with the result to avoid redundant flushes.  zap_gfn_range()
+preserves and return the incoming "flush", unless of course the flush was
+performed prior to yielding and no new flush was triggered.
+
+Fixes: 1af4a96025b3 ("KVM: x86/mmu: Yield in TDU MMU iter even if no SPTES changed")
+Cc: stable@vger.kernel.org
+Reviewed-by: Ben Gardon <bgardon@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210325200119.1359384-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 65c9172dcdf9..50c088a41dee 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -111,7 +111,7 @@ bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
+ }
+ 
+ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+-			  gfn_t start, gfn_t end, bool can_yield);
++			  gfn_t start, gfn_t end, bool can_yield, bool flush);
+ 
+ void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
+ {
+@@ -124,7 +124,7 @@ void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
+ 
+ 	list_del(&root->link);
+ 
+-	zap_gfn_range(kvm, root, 0, max_gfn, false);
++	zap_gfn_range(kvm, root, 0, max_gfn, false, false);
+ 
+ 	free_page((unsigned long)root->spt);
+ 	kmem_cache_free(mmu_page_header_cache, root);
+@@ -506,20 +506,21 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
+  * scheduler needs the CPU or there is contention on the MMU lock. If this
+  * function cannot yield, it will not release the MMU lock or reschedule and
+  * the caller must ensure it does not supply too large a GFN range, or the
+- * operation can cause a soft lockup.
++ * operation can cause a soft lockup.  Note, in some use cases a flush may be
++ * required by prior actions.  Ensure the pending flush is performed prior to
++ * yielding.
+  */
+ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+-			  gfn_t start, gfn_t end, bool can_yield)
++			  gfn_t start, gfn_t end, bool can_yield, bool flush)
+ {
+ 	struct tdp_iter iter;
+-	bool flush_needed = false;
+ 
+ 	rcu_read_lock();
+ 
+ 	tdp_root_for_each_pte(iter, root, start, end) {
+ 		if (can_yield &&
+-		    tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) {
+-			flush_needed = false;
++		    tdp_mmu_iter_cond_resched(kvm, &iter, flush)) {
++			flush = false;
+ 			continue;
+ 		}
+ 
+@@ -537,11 +538,11 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 			continue;
+ 
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+-		flush_needed = true;
++		flush = true;
+ 	}
+ 
+ 	rcu_read_unlock();
+-	return flush_needed;
++	return flush;
+ }
+ 
+ /*
+@@ -556,7 +557,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
+ 	bool flush = false;
+ 
+ 	for_each_tdp_mmu_root_yield_safe(kvm, root)
+-		flush |= zap_gfn_range(kvm, root, start, end, true);
++		flush = zap_gfn_range(kvm, root, start, end, true, flush);
+ 
+ 	return flush;
+ }
+@@ -759,7 +760,7 @@ static int zap_gfn_range_hva_wrapper(struct kvm *kvm,
+ 				     struct kvm_mmu_page *root, gfn_t start,
+ 				     gfn_t end, unsigned long unused)
+ {
+-	return zap_gfn_range(kvm, root, start, end, false);
++	return zap_gfn_range(kvm, root, start, end, false, false);
+ }
+ 
+ int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch b/queue-5.11/kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch
new file mode 100644
index 00000000000..44d990c5ab5
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch
@@ -0,0 +1,103 @@
+From 984759b7ac6f1b165b54e36b1ef607742868c136 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:25 -0800
+Subject: KVM: x86/mmu: Factor out functions to add/remove TDP MMU pages
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit a9442f594147f95307f691cfba0c31e25dc79b9d ]
+
+Move the work of adding and removing TDP MMU pages to/from  "secondary"
+data structures to helper functions. These functions will be built on in
+future commits to enable MMU operations to proceed (mostly) in parallel.
+
+No functional change expected.
+
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-20-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 47 +++++++++++++++++++++++++++++++-------
+ 1 file changed, 39 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 4a2b8844f00f..bc49a5b90086 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -262,6 +262,39 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
+ 	}
+ }
+ 
++/**
++ * tdp_mmu_link_page - Add a new page to the list of pages used by the TDP MMU
++ *
++ * @kvm: kvm instance
++ * @sp: the new page
++ * @account_nx: This page replaces a NX large page and should be marked for
++ *		eventual reclaim.
++ */
++static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp,
++			      bool account_nx)
++{
++	lockdep_assert_held_write(&kvm->mmu_lock);
++
++	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
++	if (account_nx)
++		account_huge_nx_page(kvm, sp);
++}
++
++/**
++ * tdp_mmu_unlink_page - Remove page from the list of pages used by the TDP MMU
++ *
++ * @kvm: kvm instance
++ * @sp: the page to be removed
++ */
++static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp)
++{
++	lockdep_assert_held_write(&kvm->mmu_lock);
++
++	list_del(&sp->link);
++	if (sp->lpage_disallowed)
++		unaccount_huge_nx_page(kvm, sp);
++}
++
+ /**
+  * handle_removed_tdp_mmu_page - handle a pt removed from the TDP structure
+  *
+@@ -281,10 +314,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt)
+ 
+ 	trace_kvm_mmu_prepare_zap_page(sp);
+ 
+-	list_del(&sp->link);
+-
+-	if (sp->lpage_disallowed)
+-		unaccount_huge_nx_page(kvm, sp);
++	tdp_mmu_unlink_page(kvm, sp);
+ 
+ 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+ 		old_child_spte = READ_ONCE(*(pt + i));
+@@ -706,15 +736,16 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ 
+ 		if (!is_shadow_present_pte(iter.old_spte)) {
+ 			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
+-			list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages);
+ 			child_pt = sp->spt;
++
++			tdp_mmu_link_page(vcpu->kvm, sp,
++					  huge_page_disallowed &&
++					  req_level >= iter.level);
++
+ 			new_spte = make_nonleaf_spte(child_pt,
+ 						     !shadow_accessed_mask);
+ 
+ 			trace_kvm_mmu_get_page(sp, true);
+-			if (huge_page_disallowed && req_level >= iter.level)
+-				account_huge_nx_page(vcpu->kvm, sp);
+-
+ 			tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte);
+ 		}
+ 	}
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch b/queue-5.11/kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch
new file mode 100644
index 00000000000..f71a68c38ea
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch
@@ -0,0 +1,125 @@
+From 91e5cc29852d0bf893a88de84a9316fea94a71ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:11 -0800
+Subject: KVM: x86/mmu: Factor out handling of removed page tables
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit a066e61f13cf4b17d043ad8bea0cdde2b1e5ee49 ]
+
+Factor out the code to handle a disconnected subtree of the TDP paging
+structure from the code to handle the change to an individual SPTE.
+Future commits will build on this to allow asynchronous page freeing.
+
+No functional change intended.
+
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+
+Message-Id: <20210202185734.1680553-6-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 71 ++++++++++++++++++++++----------------
+ 1 file changed, 42 insertions(+), 29 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 3a8bbc812a28..3efaa8b44e45 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -234,6 +234,45 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
+ 	}
+ }
+ 
++/**
++ * handle_removed_tdp_mmu_page - handle a pt removed from the TDP structure
++ *
++ * @kvm: kvm instance
++ * @pt: the page removed from the paging structure
++ *
++ * Given a page table that has been removed from the TDP paging structure,
++ * iterates through the page table to clear SPTEs and free child page tables.
++ */
++static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt)
++{
++	struct kvm_mmu_page *sp = sptep_to_sp(pt);
++	int level = sp->role.level;
++	gfn_t gfn = sp->gfn;
++	u64 old_child_spte;
++	int i;
++
++	trace_kvm_mmu_prepare_zap_page(sp);
++
++	list_del(&sp->link);
++
++	if (sp->lpage_disallowed)
++		unaccount_huge_nx_page(kvm, sp);
++
++	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
++		old_child_spte = READ_ONCE(*(pt + i));
++		WRITE_ONCE(*(pt + i), 0);
++		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp),
++			gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
++			old_child_spte, 0, level - 1);
++	}
++
++	kvm_flush_remote_tlbs_with_address(kvm, gfn,
++					   KVM_PAGES_PER_HPAGE(level));
++
++	free_page((unsigned long)pt);
++	kmem_cache_free(mmu_page_header_cache, sp);
++}
++
+ /**
+  * handle_changed_spte - handle bookkeeping associated with an SPTE change
+  * @kvm: kvm instance
+@@ -254,10 +293,6 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ 	bool was_leaf = was_present && is_last_spte(old_spte, level);
+ 	bool is_leaf = is_present && is_last_spte(new_spte, level);
+ 	bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
+-	u64 *pt;
+-	struct kvm_mmu_page *sp;
+-	u64 old_child_spte;
+-	int i;
+ 
+ 	WARN_ON(level > PT64_ROOT_MAX_LEVEL);
+ 	WARN_ON(level < PG_LEVEL_4K);
+@@ -321,31 +356,9 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ 	 * Recursively handle child PTs if the change removed a subtree from
+ 	 * the paging structure.
+ 	 */
+-	if (was_present && !was_leaf && (pfn_changed || !is_present)) {
+-		pt = spte_to_child_pt(old_spte, level);
+-		sp = sptep_to_sp(pt);
+-
+-		trace_kvm_mmu_prepare_zap_page(sp);
+-
+-		list_del(&sp->link);
+-
+-		if (sp->lpage_disallowed)
+-			unaccount_huge_nx_page(kvm, sp);
+-
+-		for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+-			old_child_spte = READ_ONCE(*(pt + i));
+-			WRITE_ONCE(*(pt + i), 0);
+-			handle_changed_spte(kvm, as_id,
+-				gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
+-				old_child_spte, 0, level - 1);
+-		}
+-
+-		kvm_flush_remote_tlbs_with_address(kvm, gfn,
+-						   KVM_PAGES_PER_HPAGE(level));
+-
+-		free_page((unsigned long)pt);
+-		kmem_cache_free(mmu_page_header_cache, sp);
+-	}
++	if (was_present && !was_leaf && (pfn_changed || !is_present))
++		handle_removed_tdp_mmu_page(kvm,
++				spte_to_child_pt(old_spte, level));
+ }
+ 
+ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch b/queue-5.11/kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch
new file mode 100644
index 00000000000..642652ea93e
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch
@@ -0,0 +1,40 @@
+From ecbb84cf762594358ddecba3c2543a145ac143f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:15 -0800
+Subject: KVM: x86/mmu: Fix braces in kvm_recover_nx_lpages
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit 8d1a182ea791f0111b0258c8f3eb8d77af0a8386 ]
+
+No functional change intended.
+
+Fixes: 29cf0f5007a2 ("kvm: x86/mmu: NX largepage recovery for TDP MMU")
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-10-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index ed861245ecf0..5771102a840c 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -6005,10 +6005,10 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
+ 				      struct kvm_mmu_page,
+ 				      lpage_disallowed_link);
+ 		WARN_ON_ONCE(!sp->lpage_disallowed);
+-		if (sp->tdp_mmu_page)
++		if (sp->tdp_mmu_page) {
+ 			kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
+ 				sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
+-		else {
++		} else {
+ 			kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+ 			WARN_ON_ONCE(sp->lpage_disallowed);
+ 		}
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch b/queue-5.11/kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch
new file mode 100644
index 00000000000..f76efa3b402
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch
@@ -0,0 +1,125 @@
+From cd6c5a3966bb70785043f4a733c53bb74d8f57b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:17 -0800
+Subject: KVM: x86/mmu: Merge flush and non-flush tdp_mmu_iter_cond_resched
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit e139a34ef9d5627a41e1c02210229082140d1f92 ]
+
+The flushing and non-flushing variants of tdp_mmu_iter_cond_resched have
+almost identical implementations. Merge the two functions and add a
+flush parameter.
+
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-12-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 42 ++++++++++++--------------------------
+ 1 file changed, 13 insertions(+), 29 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index abdd89771b9b..0dd27767c770 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -412,33 +412,13 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
+ 	for_each_tdp_pte(_iter, __va(_mmu->root_hpa),		\
+ 			 _mmu->shadow_root_level, _start, _end)
+ 
+-/*
+- * Flush the TLB and yield if the MMU lock is contended or this thread needs to
+- * return control to the scheduler.
+- *
+- * If this function yields, it will also reset the tdp_iter's walk over the
+- * paging structure and the calling function should allow the iterator to
+- * continue its traversal from the paging structure root.
+- *
+- * Return true if this function yielded, the TLBs were flushed, and the
+- * iterator's traversal was reset. Return false if a yield was not needed.
+- */
+-static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
+-{
+-	if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+-		kvm_flush_remote_tlbs(kvm);
+-		cond_resched_lock(&kvm->mmu_lock);
+-		tdp_iter_refresh_walk(iter);
+-		return true;
+-	}
+-
+-	return false;
+-}
+-
+ /*
+  * Yield if the MMU lock is contended or this thread needs to return control
+  * to the scheduler.
+  *
++ * If this function should yield and flush is set, it will perform a remote
++ * TLB flush before yielding.
++ *
+  * If this function yields, it will also reset the tdp_iter's walk over the
+  * paging structure and the calling function should allow the iterator to
+  * continue its traversal from the paging structure root.
+@@ -446,9 +426,13 @@ static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *it
+  * Return true if this function yielded and the iterator's traversal was reset.
+  * Return false if a yield was not needed.
+  */
+-static bool tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
++static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
++					     struct tdp_iter *iter, bool flush)
+ {
+ 	if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
++		if (flush)
++			kvm_flush_remote_tlbs(kvm);
++
+ 		cond_resched_lock(&kvm->mmu_lock);
+ 		tdp_iter_refresh_walk(iter);
+ 		return true;
+@@ -491,7 +475,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+ 
+ 		flush_needed = !can_yield ||
+-			       !tdp_mmu_iter_flush_cond_resched(kvm, &iter);
++			       !tdp_mmu_iter_cond_resched(kvm, &iter, true);
+ 	}
+ 	return flush_needed;
+ }
+@@ -864,7 +848,7 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ 		spte_set = true;
+ 
+-		tdp_mmu_iter_cond_resched(kvm, &iter);
++		tdp_mmu_iter_cond_resched(kvm, &iter, false);
+ 	}
+ 	return spte_set;
+ }
+@@ -923,7 +907,7 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ 		spte_set = true;
+ 
+-		tdp_mmu_iter_cond_resched(kvm, &iter);
++		tdp_mmu_iter_cond_resched(kvm, &iter, false);
+ 	}
+ 	return spte_set;
+ }
+@@ -1039,7 +1023,7 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		tdp_mmu_set_spte(kvm, &iter, new_spte);
+ 		spte_set = true;
+ 
+-		tdp_mmu_iter_cond_resched(kvm, &iter);
++		tdp_mmu_iter_cond_resched(kvm, &iter, false);
+ 	}
+ 
+ 	return spte_set;
+@@ -1092,7 +1076,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
+ 
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+ 
+-		spte_set = !tdp_mmu_iter_flush_cond_resched(kvm, &iter);
++		spte_set = !tdp_mmu_iter_cond_resched(kvm, &iter, true);
+ 	}
+ 
+ 	if (spte_set)
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch b/queue-5.11/kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch
new file mode 100644
index 00000000000..13caba35607
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch
@@ -0,0 +1,505 @@
+From 975b3ceb8562de4246e1060c50355b2722fcd67d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:23 -0800
+Subject: KVM: x86/mmu: Protect TDP MMU page table memory with RCU
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit 7cca2d0b7e7d9f3cd740d41afdc00051c9b508a0 ]
+
+In order to enable concurrent modifications to the paging structures in
+the TDP MMU, threads must be able to safely remove pages of page table
+memory while other threads are traversing the same memory. To ensure
+threads do not access PT memory after it is freed, protect PT memory
+with RCU.
+
+Protecting concurrent accesses to page table memory from use-after-free
+bugs could also have been acomplished using
+walk_shadow_page_lockless_begin/end() and READING_SHADOW_PAGE_TABLES,
+coupling with the barriers in a TLB flush. The use of RCU for this case
+has several distinct advantages over that approach.
+1. Disabling interrupts for long running operations is not desirable.
+   Future commits will allow operations besides page faults to operate
+   without the exclusive protection of the MMU lock and those operations
+   are too long to disable iterrupts for their duration.
+2. The use of RCU here avoids long blocking / spinning operations in
+   perfromance critical paths. By freeing memory with an asynchronous
+   RCU API we avoid the longer wait times TLB flushes experience when
+   overlapping with a thread in walk_shadow_page_lockless_begin/end().
+3. RCU provides a separation of concerns when removing memory from the
+   paging structure. Because the RCU callback to free memory can be
+   scheduled immediately after a TLB flush, there's no need for the
+   thread to manually free a queue of pages later, as commit_zap_pages
+   does.
+
+Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+
+Message-Id: <20210202185734.1680553-18-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu_internal.h |  3 ++
+ arch/x86/kvm/mmu/tdp_iter.c     | 16 +++---
+ arch/x86/kvm/mmu/tdp_iter.h     | 10 ++--
+ arch/x86/kvm/mmu/tdp_mmu.c      | 95 +++++++++++++++++++++++++++++----
+ 4 files changed, 103 insertions(+), 21 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
+index cf101b73a360..9e600dc30f08 100644
+--- a/arch/x86/kvm/mmu/mmu_internal.h
++++ b/arch/x86/kvm/mmu/mmu_internal.h
+@@ -57,6 +57,9 @@ struct kvm_mmu_page {
+ 	atomic_t write_flooding_count;
+ 
+ 	bool tdp_mmu_page;
++
++	/* Used for freeing the page asyncronously if it is a TDP MMU page. */
++	struct rcu_head rcu_head;
+ };
+ 
+ extern struct kmem_cache *mmu_page_header_cache;
+diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
+index 1a09d212186b..e5f148106e20 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.c
++++ b/arch/x86/kvm/mmu/tdp_iter.c
+@@ -12,7 +12,7 @@ static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
+ {
+ 	iter->sptep = iter->pt_path[iter->level - 1] +
+ 		SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
+-	iter->old_spte = READ_ONCE(*iter->sptep);
++	iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
+ }
+ 
+ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
+@@ -35,7 +35,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+ 	iter->root_level = root_level;
+ 	iter->min_level = min_level;
+ 	iter->level = root_level;
+-	iter->pt_path[iter->level - 1] = root_pt;
++	iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt;
+ 
+ 	iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ 	tdp_iter_refresh_sptep(iter);
+@@ -48,7 +48,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+  * address of the child page table referenced by the SPTE. Returns null if
+  * there is no such entry.
+  */
+-u64 *spte_to_child_pt(u64 spte, int level)
++tdp_ptep_t spte_to_child_pt(u64 spte, int level)
+ {
+ 	/*
+ 	 * There's no child entry if this entry isn't present or is a
+@@ -57,7 +57,7 @@ u64 *spte_to_child_pt(u64 spte, int level)
+ 	if (!is_shadow_present_pte(spte) || is_last_spte(spte, level))
+ 		return NULL;
+ 
+-	return __va(spte_to_pfn(spte) << PAGE_SHIFT);
++	return (tdp_ptep_t)__va(spte_to_pfn(spte) << PAGE_SHIFT);
+ }
+ 
+ /*
+@@ -66,7 +66,7 @@ u64 *spte_to_child_pt(u64 spte, int level)
+  */
+ static bool try_step_down(struct tdp_iter *iter)
+ {
+-	u64 *child_pt;
++	tdp_ptep_t child_pt;
+ 
+ 	if (iter->level == iter->min_level)
+ 		return false;
+@@ -75,7 +75,7 @@ static bool try_step_down(struct tdp_iter *iter)
+ 	 * Reread the SPTE before stepping down to avoid traversing into page
+ 	 * tables that are no longer linked from this entry.
+ 	 */
+-	iter->old_spte = READ_ONCE(*iter->sptep);
++	iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
+ 
+ 	child_pt = spte_to_child_pt(iter->old_spte, iter->level);
+ 	if (!child_pt)
+@@ -109,7 +109,7 @@ static bool try_step_side(struct tdp_iter *iter)
+ 	iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
+ 	iter->next_last_level_gfn = iter->gfn;
+ 	iter->sptep++;
+-	iter->old_spte = READ_ONCE(*iter->sptep);
++	iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
+ 
+ 	return true;
+ }
+@@ -159,7 +159,7 @@ void tdp_iter_next(struct tdp_iter *iter)
+ 	iter->valid = false;
+ }
+ 
+-u64 *tdp_iter_root_pt(struct tdp_iter *iter)
++tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
+ {
+ 	return iter->pt_path[iter->root_level - 1];
+ }
+diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
+index d480c540ee27..4cc177d75c4a 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.h
++++ b/arch/x86/kvm/mmu/tdp_iter.h
+@@ -7,6 +7,8 @@
+ 
+ #include "mmu.h"
+ 
++typedef u64 __rcu *tdp_ptep_t;
++
+ /*
+  * A TDP iterator performs a pre-order walk over a TDP paging structure.
+  */
+@@ -23,9 +25,9 @@ struct tdp_iter {
+ 	 */
+ 	gfn_t yielded_gfn;
+ 	/* Pointers to the page tables traversed to reach the current SPTE */
+-	u64 *pt_path[PT64_ROOT_MAX_LEVEL];
++	tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
+ 	/* A pointer to the current SPTE */
+-	u64 *sptep;
++	tdp_ptep_t sptep;
+ 	/* The lowest GFN mapped by the current SPTE */
+ 	gfn_t gfn;
+ 	/* The level of the root page given to the iterator */
+@@ -55,11 +57,11 @@ struct tdp_iter {
+ #define for_each_tdp_pte(iter, root, root_level, start, end) \
+ 	for_each_tdp_pte_min_level(iter, root, root_level, PG_LEVEL_4K, start, end)
+ 
+-u64 *spte_to_child_pt(u64 pte, int level);
++tdp_ptep_t spte_to_child_pt(u64 pte, int level);
+ 
+ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+ 		    int min_level, gfn_t next_last_level_gfn);
+ void tdp_iter_next(struct tdp_iter *iter);
+-u64 *tdp_iter_root_pt(struct tdp_iter *iter);
++tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter);
+ 
+ #endif /* __KVM_X86_MMU_TDP_ITER_H */
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 3efaa8b44e45..65c9172dcdf9 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -42,6 +42,12 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
+ 		return;
+ 
+ 	WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
++
++	/*
++	 * Ensure that all the outstanding RCU callbacks to free shadow pages
++	 * can run before the VM is torn down.
++	 */
++	rcu_barrier();
+ }
+ 
+ static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
+@@ -196,6 +202,28 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
+ 	return __pa(root->spt);
+ }
+ 
++static void tdp_mmu_free_sp(struct kvm_mmu_page *sp)
++{
++	free_page((unsigned long)sp->spt);
++	kmem_cache_free(mmu_page_header_cache, sp);
++}
++
++/*
++ * This is called through call_rcu in order to free TDP page table memory
++ * safely with respect to other kernel threads that may be operating on
++ * the memory.
++ * By only accessing TDP MMU page table memory in an RCU read critical
++ * section, and freeing it after a grace period, lockless access to that
++ * memory won't use it after it is freed.
++ */
++static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
++{
++	struct kvm_mmu_page *sp = container_of(head, struct kvm_mmu_page,
++					       rcu_head);
++
++	tdp_mmu_free_sp(sp);
++}
++
+ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ 				u64 old_spte, u64 new_spte, int level);
+ 
+@@ -269,8 +297,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt)
+ 	kvm_flush_remote_tlbs_with_address(kvm, gfn,
+ 					   KVM_PAGES_PER_HPAGE(level));
+ 
+-	free_page((unsigned long)pt);
+-	kmem_cache_free(mmu_page_header_cache, sp);
++	call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
+ }
+ 
+ /**
+@@ -374,13 +401,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
+ 				      u64 new_spte, bool record_acc_track,
+ 				      bool record_dirty_log)
+ {
+-	u64 *root_pt = tdp_iter_root_pt(iter);
++	tdp_ptep_t root_pt = tdp_iter_root_pt(iter);
+ 	struct kvm_mmu_page *root = sptep_to_sp(root_pt);
+ 	int as_id = kvm_mmu_page_as_id(root);
+ 
+ 	lockdep_assert_held(&kvm->mmu_lock);
+ 
+-	WRITE_ONCE(*iter->sptep, new_spte);
++	WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
+ 
+ 	__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
+ 			      iter->level);
+@@ -450,10 +477,13 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
+ 		return false;
+ 
+ 	if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
++		rcu_read_unlock();
++
+ 		if (flush)
+ 			kvm_flush_remote_tlbs(kvm);
+ 
+ 		cond_resched_lock(&kvm->mmu_lock);
++		rcu_read_lock();
+ 
+ 		WARN_ON(iter->gfn > iter->next_last_level_gfn);
+ 
+@@ -484,6 +514,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	struct tdp_iter iter;
+ 	bool flush_needed = false;
+ 
++	rcu_read_lock();
++
+ 	tdp_root_for_each_pte(iter, root, start, end) {
+ 		if (can_yield &&
+ 		    tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) {
+@@ -507,6 +539,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+ 		flush_needed = true;
+ 	}
++
++	rcu_read_unlock();
+ 	return flush_needed;
+ }
+ 
+@@ -552,13 +586,15 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
+ 
+ 	if (unlikely(is_noslot_pfn(pfn))) {
+ 		new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
+-		trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte);
++		trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
++				     new_spte);
+ 	} else {
+ 		make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn,
+ 					 pfn, iter->old_spte, prefault, true,
+ 					 map_writable, !shadow_accessed_mask,
+ 					 &new_spte);
+-		trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep);
++		trace_kvm_mmu_set_spte(iter->level, iter->gfn,
++				       rcu_dereference(iter->sptep));
+ 	}
+ 
+ 	if (new_spte == iter->old_spte)
+@@ -581,7 +617,8 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
+ 	if (unlikely(is_mmio_spte(new_spte)))
+ 		ret = RET_PF_EMULATE;
+ 
+-	trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep);
++	trace_kvm_mmu_set_spte(iter->level, iter->gfn,
++			       rcu_dereference(iter->sptep));
+ 	if (!prefault)
+ 		vcpu->stat.pf_fixed++;
+ 
+@@ -619,6 +656,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ 					huge_page_disallowed, &req_level);
+ 
+ 	trace_kvm_mmu_spte_requested(gpa, level, pfn);
++
++	rcu_read_lock();
++
+ 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
+ 		if (nx_huge_page_workaround_enabled)
+ 			disallowed_hugepage_adjust(iter.old_spte, gfn,
+@@ -644,7 +684,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ 			 * because the new value informs the !present
+ 			 * path below.
+ 			 */
+-			iter.old_spte = READ_ONCE(*iter.sptep);
++			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+ 		}
+ 
+ 		if (!is_shadow_present_pte(iter.old_spte)) {
+@@ -663,11 +703,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ 		}
+ 	}
+ 
+-	if (WARN_ON(iter.level != level))
++	if (WARN_ON(iter.level != level)) {
++		rcu_read_unlock();
+ 		return RET_PF_RETRY;
++	}
+ 
+ 	ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter,
+ 					      pfn, prefault);
++	rcu_read_unlock();
+ 
+ 	return ret;
+ }
+@@ -738,6 +781,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
+ 	int young = 0;
+ 	u64 new_spte = 0;
+ 
++	rcu_read_lock();
++
+ 	tdp_root_for_each_leaf_pte(iter, root, start, end) {
+ 		/*
+ 		 * If we have a non-accessed entry we don't need to change the
+@@ -769,6 +814,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
+ 		trace_kvm_age_page(iter.gfn, iter.level, slot, young);
+ 	}
+ 
++	rcu_read_unlock();
++
+ 	return young;
+ }
+ 
+@@ -814,6 +861,8 @@ static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot,
+ 	u64 new_spte;
+ 	int need_flush = 0;
+ 
++	rcu_read_lock();
++
+ 	WARN_ON(pte_huge(*ptep));
+ 
+ 	new_pfn = pte_pfn(*ptep);
+@@ -842,6 +891,8 @@ static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot,
+ 	if (need_flush)
+ 		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
+ 
++	rcu_read_unlock();
++
+ 	return 0;
+ }
+ 
+@@ -865,6 +916,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	u64 new_spte;
+ 	bool spte_set = false;
+ 
++	rcu_read_lock();
++
+ 	BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
+ 
+ 	for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
+@@ -881,6 +934,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ 		spte_set = true;
+ 	}
++
++	rcu_read_unlock();
+ 	return spte_set;
+ }
+ 
+@@ -922,6 +977,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	u64 new_spte;
+ 	bool spte_set = false;
+ 
++	rcu_read_lock();
++
+ 	tdp_root_for_each_leaf_pte(iter, root, start, end) {
+ 		if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
+ 			continue;
+@@ -941,6 +998,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ 		spte_set = true;
+ 	}
++
++	rcu_read_unlock();
+ 	return spte_set;
+ }
+ 
+@@ -982,6 +1041,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	struct tdp_iter iter;
+ 	u64 new_spte;
+ 
++	rcu_read_lock();
++
+ 	tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
+ 				    gfn + BITS_PER_LONG) {
+ 		if (!mask)
+@@ -1007,6 +1068,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
+ 
+ 		mask &= ~(1UL << (iter.gfn - gfn));
+ 	}
++
++	rcu_read_unlock();
+ }
+ 
+ /*
+@@ -1046,6 +1109,8 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	u64 new_spte;
+ 	bool spte_set = false;
+ 
++	rcu_read_lock();
++
+ 	tdp_root_for_each_pte(iter, root, start, end) {
+ 		if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
+ 			continue;
+@@ -1059,6 +1124,7 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		spte_set = true;
+ 	}
+ 
++	rcu_read_unlock();
+ 	return spte_set;
+ }
+ 
+@@ -1096,6 +1162,8 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
+ 	kvm_pfn_t pfn;
+ 	bool spte_set = false;
+ 
++	rcu_read_lock();
++
+ 	tdp_root_for_each_pte(iter, root, start, end) {
+ 		if (tdp_mmu_iter_cond_resched(kvm, &iter, spte_set)) {
+ 			spte_set = false;
+@@ -1117,6 +1185,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
+ 		spte_set = true;
+ 	}
+ 
++	rcu_read_unlock();
+ 	if (spte_set)
+ 		kvm_flush_remote_tlbs(kvm);
+ }
+@@ -1153,6 +1222,8 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	u64 new_spte;
+ 	bool spte_set = false;
+ 
++	rcu_read_lock();
++
+ 	tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) {
+ 		if (!is_writable_pte(iter.old_spte))
+ 			break;
+@@ -1164,6 +1235,8 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
+ 		spte_set = true;
+ 	}
+ 
++	rcu_read_unlock();
++
+ 	return spte_set;
+ }
+ 
+@@ -1204,10 +1277,14 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ 
+ 	*root_level = vcpu->arch.mmu->shadow_root_level;
+ 
++	rcu_read_lock();
++
+ 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
+ 		leaf = iter.level;
+ 		sptes[leaf] = iter.old_spte;
+ 	}
+ 
++	rcu_read_unlock();
++
+ 	return leaf;
+ }
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch b/queue-5.11/kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch
new file mode 100644
index 00000000000..014983a9236
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch
@@ -0,0 +1,114 @@
+From f8decc13cabd456eae14a487ca45f41540199093 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:18 -0800
+Subject: KVM: x86/mmu: Rename goal_gfn to next_last_level_gfn
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit 74953d3530280dc53256054e1906f58d07bfba44 ]
+
+The goal_gfn field in tdp_iter can be misleading as it implies that it
+is the iterator's final goal. It is really a target for the lowest gfn
+mapped by the leaf level SPTE the iterator will traverse towards. Change
+the field's name to be more precise.
+
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-13-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_iter.c | 20 ++++++++++----------
+ arch/x86/kvm/mmu/tdp_iter.h |  4 ++--
+ 2 files changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
+index 87b7e16911db..9917c55b7d24 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.c
++++ b/arch/x86/kvm/mmu/tdp_iter.c
+@@ -22,21 +22,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
+ 
+ /*
+  * Sets a TDP iterator to walk a pre-order traversal of the paging structure
+- * rooted at root_pt, starting with the walk to translate goal_gfn.
++ * rooted at root_pt, starting with the walk to translate next_last_level_gfn.
+  */
+ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+-		    int min_level, gfn_t goal_gfn)
++		    int min_level, gfn_t next_last_level_gfn)
+ {
+ 	WARN_ON(root_level < 1);
+ 	WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
+ 
+-	iter->goal_gfn = goal_gfn;
++	iter->next_last_level_gfn = next_last_level_gfn;
+ 	iter->root_level = root_level;
+ 	iter->min_level = min_level;
+ 	iter->level = root_level;
+ 	iter->pt_path[iter->level - 1] = root_pt;
+ 
+-	iter->gfn = round_gfn_for_level(iter->goal_gfn, iter->level);
++	iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ 	tdp_iter_refresh_sptep(iter);
+ 
+ 	iter->valid = true;
+@@ -82,7 +82,7 @@ static bool try_step_down(struct tdp_iter *iter)
+ 
+ 	iter->level--;
+ 	iter->pt_path[iter->level - 1] = child_pt;
+-	iter->gfn = round_gfn_for_level(iter->goal_gfn, iter->level);
++	iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ 	tdp_iter_refresh_sptep(iter);
+ 
+ 	return true;
+@@ -106,7 +106,7 @@ static bool try_step_side(struct tdp_iter *iter)
+ 		return false;
+ 
+ 	iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
+-	iter->goal_gfn = iter->gfn;
++	iter->next_last_level_gfn = iter->gfn;
+ 	iter->sptep++;
+ 	iter->old_spte = READ_ONCE(*iter->sptep);
+ 
+@@ -166,13 +166,13 @@ void tdp_iter_next(struct tdp_iter *iter)
+  */
+ void tdp_iter_refresh_walk(struct tdp_iter *iter)
+ {
+-	gfn_t goal_gfn = iter->goal_gfn;
++	gfn_t next_last_level_gfn = iter->next_last_level_gfn;
+ 
+-	if (iter->gfn > goal_gfn)
+-		goal_gfn = iter->gfn;
++	if (iter->gfn > next_last_level_gfn)
++		next_last_level_gfn = iter->gfn;
+ 
+ 	tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
+-		       iter->root_level, iter->min_level, goal_gfn);
++		       iter->root_level, iter->min_level, next_last_level_gfn);
+ }
+ 
+ u64 *tdp_iter_root_pt(struct tdp_iter *iter)
+diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
+index 47170d0dc98e..b2dd269c631f 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.h
++++ b/arch/x86/kvm/mmu/tdp_iter.h
+@@ -15,7 +15,7 @@ struct tdp_iter {
+ 	 * The iterator will traverse the paging structure towards the mapping
+ 	 * for this GFN.
+ 	 */
+-	gfn_t goal_gfn;
++	gfn_t next_last_level_gfn;
+ 	/* Pointers to the page tables traversed to reach the current SPTE */
+ 	u64 *pt_path[PT64_ROOT_MAX_LEVEL];
+ 	/* A pointer to the current SPTE */
+@@ -52,7 +52,7 @@ struct tdp_iter {
+ u64 *spte_to_child_pt(u64 pte, int level);
+ 
+ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+-		    int min_level, gfn_t goal_gfn);
++		    int min_level, gfn_t next_last_level_gfn);
+ void tdp_iter_next(struct tdp_iter *iter);
+ void tdp_iter_refresh_walk(struct tdp_iter *iter);
+ u64 *tdp_iter_root_pt(struct tdp_iter *iter);
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch b/queue-5.11/kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch
new file mode 100644
index 00000000000..c38ee5d17ab
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch
@@ -0,0 +1,371 @@
+From d4aa26febd7e7a1198a88af587864968a3238f5b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:26 -0800
+Subject: KVM: x86/mmu: Use atomic ops to set SPTEs in TDP MMU map
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit 9a77daacc87dee9fd63e31243f21894132ed8407 ]
+
+To prepare for handling page faults in parallel, change the TDP MMU
+page fault handler to use atomic operations to set SPTEs so that changes
+are not lost if multiple threads attempt to modify the same SPTE.
+
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+
+Message-Id: <20210202185734.1680553-21-bgardon@google.com>
+[Document new locking rules. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/virt/kvm/locking.rst |   9 +-
+ arch/x86/include/asm/kvm_host.h    |  13 +++
+ arch/x86/kvm/mmu/tdp_mmu.c         | 142 ++++++++++++++++++++++-------
+ 3 files changed, 130 insertions(+), 34 deletions(-)
+
+diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
+index b21a34c34a21..0aa4817b466d 100644
+--- a/Documentation/virt/kvm/locking.rst
++++ b/Documentation/virt/kvm/locking.rst
+@@ -16,7 +16,14 @@ The acquisition orders for mutexes are as follows:
+ - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
+   them together is quite rare.
+ 
+-On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
++On x86:
++
++- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
++
++- kvm->arch.mmu_lock is an rwlock.  kvm->arch.tdp_mmu_pages_lock is
++  taken inside kvm->arch.mmu_lock, and cannot be taken without already
++  holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
++  there's no need to take kvm->arch.tdp_mmu_pages_lock at all).
+ 
+ Everything else is a leaf: no other lock is taken inside the critical
+ sections.
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index e0cfd620b293..42fca28d6189 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1030,6 +1030,19 @@ struct kvm_arch {
+ 	 * tdp_mmu_page set and a root_count of 0.
+ 	 */
+ 	struct list_head tdp_mmu_pages;
++
++	/*
++	 * Protects accesses to the following fields when the MMU lock
++	 * is held in read mode:
++	 *  - tdp_mmu_pages (above)
++	 *  - the link field of struct kvm_mmu_pages used by the TDP MMU
++	 *  - lpage_disallowed_mmu_pages
++	 *  - the lpage_disallowed_link field of struct kvm_mmu_pages used
++	 *    by the TDP MMU
++	 * It is acceptable, but not necessary, to acquire this lock when
++	 * the thread holds the MMU lock in write mode.
++	 */
++	spinlock_t tdp_mmu_pages_lock;
+ };
+ 
+ struct kvm_vm_stat {
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index bc49a5b90086..bb6faa9193b4 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -7,6 +7,7 @@
+ #include "tdp_mmu.h"
+ #include "spte.h"
+ 
++#include <asm/cmpxchg.h>
+ #include <trace/events/kvm.h>
+ 
+ #ifdef CONFIG_X86_64
+@@ -33,6 +34,7 @@ void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+ 	kvm->arch.tdp_mmu_enabled = true;
+ 
+ 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
++	spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
+ 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
+ }
+ 
+@@ -225,7 +227,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
+ }
+ 
+ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+-				u64 old_spte, u64 new_spte, int level);
++				u64 old_spte, u64 new_spte, int level,
++				bool shared);
+ 
+ static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
+ {
+@@ -267,17 +270,26 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
+  *
+  * @kvm: kvm instance
+  * @sp: the new page
++ * @shared: This operation may not be running under the exclusive use of
++ *	    the MMU lock and the operation must synchronize with other
++ *	    threads that might be adding or removing pages.
+  * @account_nx: This page replaces a NX large page and should be marked for
+  *		eventual reclaim.
+  */
+ static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+-			      bool account_nx)
++			      bool shared, bool account_nx)
+ {
+-	lockdep_assert_held_write(&kvm->mmu_lock);
++	if (shared)
++		spin_lock(&kvm->arch.tdp_mmu_pages_lock);
++	else
++		lockdep_assert_held_write(&kvm->mmu_lock);
+ 
+ 	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
+ 	if (account_nx)
+ 		account_huge_nx_page(kvm, sp);
++
++	if (shared)
++		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+ }
+ 
+ /**
+@@ -285,14 +297,24 @@ static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+  *
+  * @kvm: kvm instance
+  * @sp: the page to be removed
++ * @shared: This operation may not be running under the exclusive use of
++ *	    the MMU lock and the operation must synchronize with other
++ *	    threads that might be adding or removing pages.
+  */
+-static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp)
++static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp,
++				bool shared)
+ {
+-	lockdep_assert_held_write(&kvm->mmu_lock);
++	if (shared)
++		spin_lock(&kvm->arch.tdp_mmu_pages_lock);
++	else
++		lockdep_assert_held_write(&kvm->mmu_lock);
+ 
+ 	list_del(&sp->link);
+ 	if (sp->lpage_disallowed)
+ 		unaccount_huge_nx_page(kvm, sp);
++
++	if (shared)
++		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+ }
+ 
+ /**
+@@ -300,28 +322,39 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+  *
+  * @kvm: kvm instance
+  * @pt: the page removed from the paging structure
++ * @shared: This operation may not be running under the exclusive use
++ *	    of the MMU lock and the operation must synchronize with other
++ *	    threads that might be modifying SPTEs.
+  *
+  * Given a page table that has been removed from the TDP paging structure,
+  * iterates through the page table to clear SPTEs and free child page tables.
+  */
+-static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt)
++static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
++					bool shared)
+ {
+ 	struct kvm_mmu_page *sp = sptep_to_sp(pt);
+ 	int level = sp->role.level;
+ 	gfn_t gfn = sp->gfn;
+ 	u64 old_child_spte;
++	u64 *sptep;
+ 	int i;
+ 
+ 	trace_kvm_mmu_prepare_zap_page(sp);
+ 
+-	tdp_mmu_unlink_page(kvm, sp);
++	tdp_mmu_unlink_page(kvm, sp, shared);
+ 
+ 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+-		old_child_spte = READ_ONCE(*(pt + i));
+-		WRITE_ONCE(*(pt + i), 0);
++		sptep = pt + i;
++
++		if (shared) {
++			old_child_spte = xchg(sptep, 0);
++		} else {
++			old_child_spte = READ_ONCE(*sptep);
++			WRITE_ONCE(*sptep, 0);
++		}
+ 		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp),
+ 			gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
+-			old_child_spte, 0, level - 1);
++			old_child_spte, 0, level - 1, shared);
+ 	}
+ 
+ 	kvm_flush_remote_tlbs_with_address(kvm, gfn,
+@@ -338,12 +371,16 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt)
+  * @old_spte: The value of the SPTE before the change
+  * @new_spte: The value of the SPTE after the change
+  * @level: the level of the PT the SPTE is part of in the paging structure
++ * @shared: This operation may not be running under the exclusive use of
++ *	    the MMU lock and the operation must synchronize with other
++ *	    threads that might be modifying SPTEs.
+  *
+  * Handle bookkeeping that might result from the modification of a SPTE.
+  * This function must be called for all TDP SPTE modifications.
+  */
+ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+-				u64 old_spte, u64 new_spte, int level)
++				  u64 old_spte, u64 new_spte, int level,
++				  bool shared)
+ {
+ 	bool was_present = is_shadow_present_pte(old_spte);
+ 	bool is_present = is_shadow_present_pte(new_spte);
+@@ -415,18 +452,51 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ 	 */
+ 	if (was_present && !was_leaf && (pfn_changed || !is_present))
+ 		handle_removed_tdp_mmu_page(kvm,
+-				spte_to_child_pt(old_spte, level));
++				spte_to_child_pt(old_spte, level), shared);
+ }
+ 
+ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+-				u64 old_spte, u64 new_spte, int level)
++				u64 old_spte, u64 new_spte, int level,
++				bool shared)
+ {
+-	__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level);
++	__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level,
++			      shared);
+ 	handle_changed_spte_acc_track(old_spte, new_spte, level);
+ 	handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
+ 				      new_spte, level);
+ }
+ 
++/*
++ * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the
++ * associated bookkeeping
++ *
++ * @kvm: kvm instance
++ * @iter: a tdp_iter instance currently on the SPTE that should be set
++ * @new_spte: The value the SPTE should be set to
++ * Returns: true if the SPTE was set, false if it was not. If false is returned,
++ *	    this function will have no side-effects.
++ */
++static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
++					   struct tdp_iter *iter,
++					   u64 new_spte)
++{
++	u64 *root_pt = tdp_iter_root_pt(iter);
++	struct kvm_mmu_page *root = sptep_to_sp(root_pt);
++	int as_id = kvm_mmu_page_as_id(root);
++
++	lockdep_assert_held_read(&kvm->mmu_lock);
++
++	if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
++		      new_spte) != iter->old_spte)
++		return false;
++
++	handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
++			    iter->level, true);
++
++	return true;
++}
++
++
+ /*
+  * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
+  * @kvm: kvm instance
+@@ -456,7 +526,7 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
+ 	WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
+ 
+ 	__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
+-			      iter->level);
++			      iter->level, false);
+ 	if (record_acc_track)
+ 		handle_changed_spte_acc_track(iter->old_spte, new_spte,
+ 					      iter->level);
+@@ -631,23 +701,18 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
+ 	int ret = 0;
+ 	int make_spte_ret = 0;
+ 
+-	if (unlikely(is_noslot_pfn(pfn))) {
++	if (unlikely(is_noslot_pfn(pfn)))
+ 		new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
+-		trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
+-				     new_spte);
+-	} else {
++	else
+ 		make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn,
+ 					 pfn, iter->old_spte, prefault, true,
+ 					 map_writable, !shadow_accessed_mask,
+ 					 &new_spte);
+-		trace_kvm_mmu_set_spte(iter->level, iter->gfn,
+-				       rcu_dereference(iter->sptep));
+-	}
+ 
+ 	if (new_spte == iter->old_spte)
+ 		ret = RET_PF_SPURIOUS;
+-	else
+-		tdp_mmu_set_spte(vcpu->kvm, iter, new_spte);
++	else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
++		return RET_PF_RETRY;
+ 
+ 	/*
+ 	 * If the page fault was caused by a write but the page is write
+@@ -661,8 +726,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
+ 	}
+ 
+ 	/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
+-	if (unlikely(is_mmio_spte(new_spte)))
++	if (unlikely(is_mmio_spte(new_spte))) {
++		trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
++				     new_spte);
+ 		ret = RET_PF_EMULATE;
++	} else
++		trace_kvm_mmu_set_spte(iter->level, iter->gfn,
++				       rcu_dereference(iter->sptep));
+ 
+ 	trace_kvm_mmu_set_spte(iter->level, iter->gfn,
+ 			       rcu_dereference(iter->sptep));
+@@ -721,7 +791,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ 		 */
+ 		if (is_shadow_present_pte(iter.old_spte) &&
+ 		    is_large_pte(iter.old_spte)) {
+-			tdp_mmu_set_spte(vcpu->kvm, &iter, 0);
++			if (!tdp_mmu_set_spte_atomic(vcpu->kvm, &iter, 0))
++				break;
+ 
+ 			kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn,
+ 					KVM_PAGES_PER_HPAGE(iter.level));
+@@ -738,19 +809,24 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ 			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
+ 			child_pt = sp->spt;
+ 
+-			tdp_mmu_link_page(vcpu->kvm, sp,
+-					  huge_page_disallowed &&
+-					  req_level >= iter.level);
+-
+ 			new_spte = make_nonleaf_spte(child_pt,
+ 						     !shadow_accessed_mask);
+ 
+-			trace_kvm_mmu_get_page(sp, true);
+-			tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte);
++			if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter,
++						    new_spte)) {
++				tdp_mmu_link_page(vcpu->kvm, sp, true,
++						  huge_page_disallowed &&
++						  req_level >= iter.level);
++
++				trace_kvm_mmu_get_page(sp, true);
++			} else {
++				tdp_mmu_free_sp(sp);
++				break;
++			}
+ 		}
+ 	}
+ 
+-	if (WARN_ON(iter.level != level)) {
++	if (iter.level != level) {
+ 		rcu_read_unlock();
+ 		return RET_PF_RETRY;
+ 	}
+-- 
+2.30.1
+
diff --git a/queue-5.11/kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch b/queue-5.11/kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch
new file mode 100644
index 00000000000..721ccc757f5
--- /dev/null
+++ b/queue-5.11/kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch
@@ -0,0 +1,137 @@
+From 45d6572cbf46381c2814c197536eff72d2768c54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Feb 2021 10:57:20 -0800
+Subject: KVM: x86/mmu: Yield in TDU MMU iter even if no SPTES changed
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit 1af4a96025b33587ca953c7ef12a1b20c6e70412 ]
+
+Given certain conditions, some TDP MMU functions may not yield
+reliably / frequently enough. For example, if a paging structure was
+very large but had few, if any writable entries, wrprot_gfn_range
+could traverse many entries before finding a writable entry and yielding
+because the check for yielding only happens after an SPTE is modified.
+
+Fix this issue by moving the yield to the beginning of the loop.
+
+Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU")
+Reviewed-by: Peter Feiner <pfeiner@google.com>
+Signed-off-by: Ben Gardon <bgardon@google.com>
+
+Message-Id: <20210202185734.1680553-15-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c | 32 ++++++++++++++++++++++----------
+ 1 file changed, 22 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index a07d37abb63f..0567286fba39 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -470,6 +470,12 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	bool flush_needed = false;
+ 
+ 	tdp_root_for_each_pte(iter, root, start, end) {
++		if (can_yield &&
++		    tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) {
++			flush_needed = false;
++			continue;
++		}
++
+ 		if (!is_shadow_present_pte(iter.old_spte))
+ 			continue;
+ 
+@@ -484,9 +490,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 			continue;
+ 
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+-
+-		flush_needed = !(can_yield &&
+-				 tdp_mmu_iter_cond_resched(kvm, &iter, true));
++		flush_needed = true;
+ 	}
+ 	return flush_needed;
+ }
+@@ -850,6 +854,9 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 
+ 	for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
+ 				   min_level, start, end) {
++		if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
++			continue;
++
+ 		if (!is_shadow_present_pte(iter.old_spte) ||
+ 		    !is_last_spte(iter.old_spte, iter.level))
+ 			continue;
+@@ -858,8 +865,6 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 
+ 		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ 		spte_set = true;
+-
+-		tdp_mmu_iter_cond_resched(kvm, &iter, false);
+ 	}
+ 	return spte_set;
+ }
+@@ -903,6 +908,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	bool spte_set = false;
+ 
+ 	tdp_root_for_each_leaf_pte(iter, root, start, end) {
++		if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
++			continue;
++
+ 		if (spte_ad_need_write_protect(iter.old_spte)) {
+ 			if (is_writable_pte(iter.old_spte))
+ 				new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
+@@ -917,8 +925,6 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 
+ 		tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ 		spte_set = true;
+-
+-		tdp_mmu_iter_cond_resched(kvm, &iter, false);
+ 	}
+ 	return spte_set;
+ }
+@@ -1026,6 +1032,9 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 	bool spte_set = false;
+ 
+ 	tdp_root_for_each_pte(iter, root, start, end) {
++		if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
++			continue;
++
+ 		if (!is_shadow_present_pte(iter.old_spte))
+ 			continue;
+ 
+@@ -1033,8 +1042,6 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ 
+ 		tdp_mmu_set_spte(kvm, &iter, new_spte);
+ 		spte_set = true;
+-
+-		tdp_mmu_iter_cond_resched(kvm, &iter, false);
+ 	}
+ 
+ 	return spte_set;
+@@ -1075,6 +1082,11 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
+ 	bool spte_set = false;
+ 
+ 	tdp_root_for_each_pte(iter, root, start, end) {
++		if (tdp_mmu_iter_cond_resched(kvm, &iter, spte_set)) {
++			spte_set = false;
++			continue;
++		}
++
+ 		if (!is_shadow_present_pte(iter.old_spte) ||
+ 		    !is_last_spte(iter.old_spte, iter.level))
+ 			continue;
+@@ -1087,7 +1099,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
+ 
+ 		tdp_mmu_set_spte(kvm, &iter, 0);
+ 
+-		spte_set = !tdp_mmu_iter_cond_resched(kvm, &iter, true);
++		spte_set = true;
+ 	}
+ 
+ 	if (spte_set)
+-- 
+2.30.1
+
diff --git a/queue-5.11/series b/queue-5.11/series
index f8445eed7d7..b552246ad0c 100644
--- a/queue-5.11/series
+++ b/queue-5.11/series
@@ -102,3 +102,19 @@ pinctrl-qcom-sc7280-fix-sdc_qdsd_pingroup-and-ufs_reset-offsets.patch
 pinctrl-qcom-sc7280-fix-sdc1_rclk-configurations.patch
 pinctrl-qcom-lpass-lpi-use-default-pullup-strength-values.patch
 pinctrl-qcom-fix-unintentional-string-concatenation.patch
+kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch
+kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch
+kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch
+kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch
+kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch
+kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch
+kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch
+kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch
+kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch
+kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch
+kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch
+kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch
+kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch
+kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch
+kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch
+kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch
-- 
2.47.3