From 257dade45ce4a5223cbbae12820b1624561a9736 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 6 Apr 2021 14:45:46 -0400 Subject: [PATCH] Revert "Fixes for 5.11" This reverts commit a24c6965e15f5dd516b73804d8423cc1bda6ddcf. Signed-off-by: Sasha Levin --- ...ompile-out-tdp-mmu-on-32-bit-systems.patch | 351 ------------ ...mu-add-comment-on-__tdp_mmu_set_spte.patch | 56 -- ...-lockdep-when-setting-a-tdp-mmu-spte.patch | 41 -- ...ge-tdp-mmu-yield-function-returns-to.patch | 113 ---- ...t-redundantly-clear-tdp-mmu-pt-memor.patch | 41 -- ...re-forward-progress-when-yielding-in.patch | 147 ----- ...re-tlbs-are-flushed-for-tdp-mmu-duri.patch | 68 --- ...re-tlbs-are-flushed-when-yielding-du.patch | 114 ---- ...or-out-functions-to-add-remove-tdp-m.patch | 103 ---- ...or-out-handling-of-removed-page-tabl.patch | 125 ----- ...-fix-braces-in-kvm_recover_nx_lpages.patch | 40 -- ...e-flush-and-non-flush-tdp_mmu_iter_c.patch | 125 ----- ...ect-tdp-mmu-page-table-memory-with-r.patch | 505 ------------------ ...name-goal_gfn-to-next_last_level_gfn.patch | 114 ---- ...atomic-ops-to-set-sptes-in-tdp-mmu-m.patch | 371 ------------- ...d-in-tdu-mmu-iter-even-if-no-sptes-c.patch | 137 ----- queue-5.11/series | 16 - 17 files changed, 2467 deletions(-) delete mode 100644 queue-5.11/kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch delete mode 100644 queue-5.11/kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch delete mode 100644 queue-5.11/kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch delete mode 100644 queue-5.11/kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch delete mode 100644 queue-5.11/kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch delete mode 100644 queue-5.11/kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch delete mode 100644 queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch delete mode 100644 queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch delete mode 100644 queue-5.11/kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch delete mode 100644 queue-5.11/kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch delete mode 100644 queue-5.11/kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch delete mode 100644 queue-5.11/kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch delete mode 100644 queue-5.11/kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch delete mode 100644 queue-5.11/kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch delete mode 100644 queue-5.11/kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch delete mode 100644 queue-5.11/kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch diff --git a/queue-5.11/kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch b/queue-5.11/kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch deleted file mode 100644 index c4b7ed198d4..00000000000 --- a/queue-5.11/kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch +++ /dev/null @@ -1,351 +0,0 @@ -From 54204429391068a6a503c135e37f7d7d1fe08d63 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Sat, 6 Feb 2021 09:53:33 -0500 -Subject: KVM: x86: compile out TDP MMU on 32-bit systems - -From: Paolo Bonzini - -[ Upstream commit 897218ff7cf19290ec2d69652ce673d8ed6fedeb ] - -The TDP MMU assumes that it can do atomic accesses to 64-bit PTEs. -Rather than just disabling it, compile it out completely so that it -is possible to use for example 64-bit xchg. - -To limit the number of stubs, wrap all accesses to tdp_mmu_enabled -or tdp_mmu_page with a function. Calls to all other functions in -tdp_mmu.c are eliminated and do not even reach the linker. - -Reviewed-by: Sean Christopherson -Tested-by: Sean Christopherson -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/include/asm/kvm_host.h | 2 ++ - arch/x86/kvm/Makefile | 3 ++- - arch/x86/kvm/mmu/mmu.c | 36 ++++++++++++++++----------------- - arch/x86/kvm/mmu/mmu_internal.h | 2 ++ - arch/x86/kvm/mmu/tdp_mmu.c | 29 +------------------------- - arch/x86/kvm/mmu/tdp_mmu.h | 32 +++++++++++++++++++++++++---- - 6 files changed, 53 insertions(+), 51 deletions(-) - -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index 42fca28d6189..0cbb13b83a16 100644 ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -1005,6 +1005,7 @@ struct kvm_arch { - struct kvm_pmu_event_filter *pmu_event_filter; - struct task_struct *nx_lpage_recovery_thread; - -+#ifdef CONFIG_X86_64 - /* - * Whether the TDP MMU is enabled for this VM. This contains a - * snapshot of the TDP MMU module parameter from when the VM was -@@ -1043,6 +1044,7 @@ struct kvm_arch { - * the thread holds the MMU lock in write mode. - */ - spinlock_t tdp_mmu_pages_lock; -+#endif /* CONFIG_X86_64 */ - }; - - struct kvm_vm_stat { -diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile -index 4bd14ab01323..53c54cdcc923 100644 ---- a/arch/x86/kvm/Makefile -+++ b/arch/x86/kvm/Makefile -@@ -17,7 +17,8 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o - kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ - i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ - hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \ -- mmu/spte.o mmu/tdp_iter.o mmu/tdp_mmu.o -+ mmu/spte.o -+kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o - - kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ - vmx/evmcs.o vmx/nested.o vmx/posted_intr.o -diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c -index 5771102a840c..d9901836d7aa 100644 ---- a/arch/x86/kvm/mmu/mmu.c -+++ b/arch/x86/kvm/mmu/mmu.c -@@ -1225,7 +1225,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, - { - struct kvm_rmap_head *rmap_head; - -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, - slot->base_gfn + gfn_offset, mask, true); - while (mask) { -@@ -1254,7 +1254,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, - { - struct kvm_rmap_head *rmap_head; - -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, - slot->base_gfn + gfn_offset, mask, false); - while (mask) { -@@ -1309,7 +1309,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, - write_protected |= __rmap_write_protect(kvm, rmap_head, true); - } - -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - write_protected |= - kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn); - -@@ -1521,7 +1521,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, - - r = kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); - -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - r |= kvm_tdp_mmu_zap_hva_range(kvm, start, end); - - return r; -@@ -1533,7 +1533,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) - - r = kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); - -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - r |= kvm_tdp_mmu_set_spte_hva(kvm, hva, &pte); - - return r; -@@ -1588,7 +1588,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) - int young = false; - - young = kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - young |= kvm_tdp_mmu_age_hva_range(kvm, start, end); - - return young; -@@ -1599,7 +1599,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) - int young = false; - - young = kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp); -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - young |= kvm_tdp_mmu_test_age_hva(kvm, hva); - - return young; -@@ -3161,7 +3161,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, - sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK); - - if (kvm_mmu_put_root(kvm, sp)) { -- if (sp->tdp_mmu_page) -+ if (is_tdp_mmu_page(sp)) - kvm_tdp_mmu_free_root(kvm, sp); - else if (sp->role.invalid) - kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); -@@ -3255,7 +3255,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) - hpa_t root; - unsigned i; - -- if (vcpu->kvm->arch.tdp_mmu_enabled) { -+ if (is_tdp_mmu_enabled(vcpu->kvm)) { - root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu); - - if (!VALID_PAGE(root)) -@@ -5447,7 +5447,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) - - kvm_zap_obsolete_pages(kvm); - -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - kvm_tdp_mmu_zap_all(kvm); - - spin_unlock(&kvm->mmu_lock); -@@ -5510,7 +5510,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) - } - } - -- if (kvm->arch.tdp_mmu_enabled) { -+ if (is_tdp_mmu_enabled(kvm)) { - flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end); - if (flush) - kvm_flush_remote_tlbs(kvm); -@@ -5534,7 +5534,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, - spin_lock(&kvm->mmu_lock); - flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, - start_level, KVM_MAX_HUGEPAGE_LEVEL, false); -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_4K); - spin_unlock(&kvm->mmu_lock); - -@@ -5600,7 +5600,7 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, - slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot, - kvm_mmu_zap_collapsible_spte, true); - -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot); - spin_unlock(&kvm->mmu_lock); - } -@@ -5627,7 +5627,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, - - spin_lock(&kvm->mmu_lock); - flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false); -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); - -@@ -5650,7 +5650,7 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, - spin_lock(&kvm->mmu_lock); - flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect, - false); -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M); - spin_unlock(&kvm->mmu_lock); - -@@ -5666,7 +5666,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm, - - spin_lock(&kvm->mmu_lock); - flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false); -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot); - spin_unlock(&kvm->mmu_lock); - -@@ -5694,7 +5694,7 @@ void kvm_mmu_zap_all(struct kvm *kvm) - - kvm_mmu_commit_zap_page(kvm, &invalid_list); - -- if (kvm->arch.tdp_mmu_enabled) -+ if (is_tdp_mmu_enabled(kvm)) - kvm_tdp_mmu_zap_all(kvm); - - spin_unlock(&kvm->mmu_lock); -@@ -6005,7 +6005,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) - struct kvm_mmu_page, - lpage_disallowed_link); - WARN_ON_ONCE(!sp->lpage_disallowed); -- if (sp->tdp_mmu_page) { -+ if (is_tdp_mmu_page(sp)) { - kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, - sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level)); - } else { -diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h -index 9e600dc30f08..cbac13a2bd45 100644 ---- a/arch/x86/kvm/mmu/mmu_internal.h -+++ b/arch/x86/kvm/mmu/mmu_internal.h -@@ -56,10 +56,12 @@ struct kvm_mmu_page { - /* Number of writes since the last time traversal visited this page. */ - atomic_t write_flooding_count; - -+#ifdef CONFIG_X86_64 - bool tdp_mmu_page; - - /* Used for freeing the page asyncronously if it is a TDP MMU page. */ - struct rcu_head rcu_head; -+#endif - }; - - extern struct kmem_cache *mmu_page_header_cache; -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index bb6faa9193b4..e2157d0a5712 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -10,24 +10,13 @@ - #include - #include - --#ifdef CONFIG_X86_64 - static bool __read_mostly tdp_mmu_enabled = false; - module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); --#endif -- --static bool is_tdp_mmu_enabled(void) --{ --#ifdef CONFIG_X86_64 -- return tdp_enabled && READ_ONCE(tdp_mmu_enabled); --#else -- return false; --#endif /* CONFIG_X86_64 */ --} - - /* Initializes the TDP MMU for the VM, if enabled. */ - void kvm_mmu_init_tdp_mmu(struct kvm *kvm) - { -- if (!is_tdp_mmu_enabled()) -+ if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled)) - return; - - /* This should not be changed for the lifetime of the VM. */ -@@ -96,22 +85,6 @@ static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, - #define for_each_tdp_mmu_root(_kvm, _root) \ - list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) - --bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) --{ -- struct kvm_mmu_page *sp; -- -- if (!kvm->arch.tdp_mmu_enabled) -- return false; -- if (WARN_ON(!VALID_PAGE(hpa))) -- return false; -- -- sp = to_shadow_page(hpa); -- if (WARN_ON(!sp)) -- return false; -- -- return sp->tdp_mmu_page && sp->root_count; --} -- - static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - gfn_t start, gfn_t end, bool can_yield, bool flush); - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h -index cbbdbadd1526..b4b65e3699b3 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.h -+++ b/arch/x86/kvm/mmu/tdp_mmu.h -@@ -5,10 +5,6 @@ - - #include - --void kvm_mmu_init_tdp_mmu(struct kvm *kvm); --void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); -- --bool is_tdp_mmu_root(struct kvm *kvm, hpa_t root); - hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); - void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root); - -@@ -47,4 +43,32 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, - int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, - int *root_level); - -+#ifdef CONFIG_X86_64 -+void kvm_mmu_init_tdp_mmu(struct kvm *kvm); -+void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); -+static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; } -+static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; } -+#else -+static inline void kvm_mmu_init_tdp_mmu(struct kvm *kvm) {} -+static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {} -+static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; } -+static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; } -+#endif -+ -+static inline bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) -+{ -+ struct kvm_mmu_page *sp; -+ -+ if (!is_tdp_mmu_enabled(kvm)) -+ return false; -+ if (WARN_ON(!VALID_PAGE(hpa))) -+ return false; -+ -+ sp = to_shadow_page(hpa); -+ if (WARN_ON(!sp)) -+ return false; -+ -+ return is_tdp_mmu_page(sp) && sp->root_count; -+} -+ - #endif /* __KVM_X86_MMU_TDP_MMU_H */ --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch b/queue-5.11/kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch deleted file mode 100644 index 229a751c007..00000000000 --- a/queue-5.11/kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 1775852f3dfdcaea00d6f010f16e3823216312f2 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:08 -0800 -Subject: KVM: x86/mmu: Add comment on __tdp_mmu_set_spte - -From: Ben Gardon - -[ Upstream commit fe43fa2f407b9d513f7bcf18142e14e1bf1508d6 ] - -__tdp_mmu_set_spte is a very important function in the TDP MMU which -already accepts several arguments and will take more in future commits. -To offset this complexity, add a comment to the function describing each -of the arguemnts. - -No functional change intended. - -Reviewed-by: Peter Feiner -Acked-by: Paolo Bonzini -Signed-off-by: Ben Gardon -Message-Id: <20210202185734.1680553-3-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 50c088a41dee..6bd86bb4c089 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -397,6 +397,22 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, - new_spte, level); - } - -+/* -+ * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping -+ * @kvm: kvm instance -+ * @iter: a tdp_iter instance currently on the SPTE that should be set -+ * @new_spte: The value the SPTE should be set to -+ * @record_acc_track: Notify the MM subsystem of changes to the accessed state -+ * of the page. Should be set unless handling an MMU -+ * notifier for access tracking. Leaving record_acc_track -+ * unset in that case prevents page accesses from being -+ * double counted. -+ * @record_dirty_log: Record the page as dirty in the dirty bitmap if -+ * appropriate for the change being made. Should be set -+ * unless performing certain dirty logging operations. -+ * Leaving record_dirty_log unset in that case prevents page -+ * writes from being double counted. -+ */ - static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, - u64 new_spte, bool record_acc_track, - bool record_dirty_log) --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch b/queue-5.11/kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch deleted file mode 100644 index b4bf4f4dba9..00000000000 --- a/queue-5.11/kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 401d23e1f29deccc722db7be92da1f413bb43a9c Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:09 -0800 -Subject: KVM: x86/mmu: Add lockdep when setting a TDP MMU SPTE - -From: Ben Gardon - -[ Upstream commit 3a9a4aa5657471a02ffb7f9b7f3b7a468b3f257b ] - -Add lockdep to __tdp_mmu_set_spte to ensure that SPTEs are only modified -under the MMU lock. - -No functional change intended. - -Reviewed-by: Peter Feiner -Reviewed-by: Sean Christopherson -Acked-by: Paolo Bonzini -Signed-off-by: Ben Gardon -Message-Id: <20210202185734.1680553-4-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 0567286fba39..3a8bbc812a28 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -365,6 +365,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, - struct kvm_mmu_page *root = sptep_to_sp(root_pt); - int as_id = kvm_mmu_page_as_id(root); - -+ lockdep_assert_held(&kvm->mmu_lock); -+ - WRITE_ONCE(*iter->sptep, new_spte); - - __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch b/queue-5.11/kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch deleted file mode 100644 index 94f9b15c909..00000000000 --- a/queue-5.11/kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch +++ /dev/null @@ -1,113 +0,0 @@ -From b51cdb837368ace9ab7f04c6bd99246161333918 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:07 -0800 -Subject: KVM: x86/mmu: change TDP MMU yield function returns to match - cond_resched - -From: Ben Gardon - -[ Upstream commit e28a436ca4f65384cceaf3f4da0e00aa74244e6a ] - -Currently the TDP MMU yield / cond_resched functions either return -nothing or return true if the TLBs were not flushed. These are confusing -semantics, especially when making control flow decisions in calling -functions. - -To clean things up, change both functions to have the same -return value semantics as cond_resched: true if the thread yielded, -false if it did not. If the function yielded in the _flush_ version, -then the TLBs will have been flushed. - -Reviewed-by: Peter Feiner -Acked-by: Paolo Bonzini -Signed-off-by: Ben Gardon -Message-Id: <20210202185734.1680553-2-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 39 ++++++++++++++++++++++++++++---------- - 1 file changed, 29 insertions(+), 10 deletions(-) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 17976998bffb..abdd89771b9b 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -413,8 +413,15 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, - _mmu->shadow_root_level, _start, _end) - - /* -- * Flush the TLB if the process should drop kvm->mmu_lock. -- * Return whether the caller still needs to flush the tlb. -+ * Flush the TLB and yield if the MMU lock is contended or this thread needs to -+ * return control to the scheduler. -+ * -+ * If this function yields, it will also reset the tdp_iter's walk over the -+ * paging structure and the calling function should allow the iterator to -+ * continue its traversal from the paging structure root. -+ * -+ * Return true if this function yielded, the TLBs were flushed, and the -+ * iterator's traversal was reset. Return false if a yield was not needed. - */ - static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter) - { -@@ -422,18 +429,32 @@ static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *it - kvm_flush_remote_tlbs(kvm); - cond_resched_lock(&kvm->mmu_lock); - tdp_iter_refresh_walk(iter); -- return false; -- } else { - return true; - } -+ -+ return false; - } - --static void tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter) -+/* -+ * Yield if the MMU lock is contended or this thread needs to return control -+ * to the scheduler. -+ * -+ * If this function yields, it will also reset the tdp_iter's walk over the -+ * paging structure and the calling function should allow the iterator to -+ * continue its traversal from the paging structure root. -+ * -+ * Return true if this function yielded and the iterator's traversal was reset. -+ * Return false if a yield was not needed. -+ */ -+static bool tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter) - { - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { - cond_resched_lock(&kvm->mmu_lock); - tdp_iter_refresh_walk(iter); -+ return true; - } -+ -+ return false; - } - - /* -@@ -469,10 +490,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - - tdp_mmu_set_spte(kvm, &iter, 0); - -- if (can_yield) -- flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter); -- else -- flush_needed = true; -+ flush_needed = !can_yield || -+ !tdp_mmu_iter_flush_cond_resched(kvm, &iter); - } - return flush_needed; - } -@@ -1073,7 +1092,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm, - - tdp_mmu_set_spte(kvm, &iter, 0); - -- spte_set = tdp_mmu_iter_flush_cond_resched(kvm, &iter); -+ spte_set = !tdp_mmu_iter_flush_cond_resched(kvm, &iter); - } - - if (spte_set) --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch b/queue-5.11/kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch deleted file mode 100644 index 9e580385426..00000000000 --- a/queue-5.11/kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 4825236e2c3032f176048b051b7522ff9c12495d Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:10 -0800 -Subject: KVM: x86/mmu: Don't redundantly clear TDP MMU pt memory - -From: Ben Gardon - -[ Upstream commit 734e45b329d626d2c14e2bcf8be3d069a33c3316 ] - -The KVM MMU caches already guarantee that shadow page table memory will -be zeroed, so there is no reason to re-zero the page in the TDP MMU page -fault handler. - -No functional change intended. - -Reviewed-by: Peter Feiner -Reviewed-by: Sean Christopherson -Acked-by: Paolo Bonzini -Signed-off-by: Ben Gardon -Message-Id: <20210202185734.1680553-5-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 6bd86bb4c089..4a2b8844f00f 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -708,7 +708,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, - sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); - list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages); - child_pt = sp->spt; -- clear_page(child_pt); - new_spte = make_nonleaf_spte(child_pt, - !shadow_accessed_mask); - --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch b/queue-5.11/kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch deleted file mode 100644 index 3ba5ee94a6b..00000000000 --- a/queue-5.11/kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch +++ /dev/null @@ -1,147 +0,0 @@ -From b58347d13f5c0b46dc8ce701f443931eff84d86c Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:19 -0800 -Subject: KVM: x86/mmu: Ensure forward progress when yielding in TDP MMU iter - -From: Ben Gardon - -[ Upstream commit ed5e484b79e8a9b8be714bd85b6fc70bd6dc99a7 ] - -In some functions the TDP iter risks not making forward progress if two -threads livelock yielding to one another. This is possible if two threads -are trying to execute wrprot_gfn_range. Each could write protect an entry -and then yield. This would reset the tdp_iter's walk over the paging -structure and the loop would end up repeating the same entry over and -over, preventing either thread from making forward progress. - -Fix this issue by only yielding if the loop has made forward progress -since the last yield. - -Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU") -Reviewed-by: Peter Feiner -Signed-off-by: Ben Gardon - -Message-Id: <20210202185734.1680553-14-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_iter.c | 18 +----------------- - arch/x86/kvm/mmu/tdp_iter.h | 7 ++++++- - arch/x86/kvm/mmu/tdp_mmu.c | 21 ++++++++++++++++----- - 3 files changed, 23 insertions(+), 23 deletions(-) - -diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c -index 9917c55b7d24..1a09d212186b 100644 ---- a/arch/x86/kvm/mmu/tdp_iter.c -+++ b/arch/x86/kvm/mmu/tdp_iter.c -@@ -31,6 +31,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, - WARN_ON(root_level > PT64_ROOT_MAX_LEVEL); - - iter->next_last_level_gfn = next_last_level_gfn; -+ iter->yielded_gfn = iter->next_last_level_gfn; - iter->root_level = root_level; - iter->min_level = min_level; - iter->level = root_level; -@@ -158,23 +159,6 @@ void tdp_iter_next(struct tdp_iter *iter) - iter->valid = false; - } - --/* -- * Restart the walk over the paging structure from the root, starting from the -- * highest gfn the iterator had previously reached. Assumes that the entire -- * paging structure, except the root page, may have been completely torn down -- * and rebuilt. -- */ --void tdp_iter_refresh_walk(struct tdp_iter *iter) --{ -- gfn_t next_last_level_gfn = iter->next_last_level_gfn; -- -- if (iter->gfn > next_last_level_gfn) -- next_last_level_gfn = iter->gfn; -- -- tdp_iter_start(iter, iter->pt_path[iter->root_level - 1], -- iter->root_level, iter->min_level, next_last_level_gfn); --} -- - u64 *tdp_iter_root_pt(struct tdp_iter *iter) - { - return iter->pt_path[iter->root_level - 1]; -diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h -index b2dd269c631f..d480c540ee27 100644 ---- a/arch/x86/kvm/mmu/tdp_iter.h -+++ b/arch/x86/kvm/mmu/tdp_iter.h -@@ -16,6 +16,12 @@ struct tdp_iter { - * for this GFN. - */ - gfn_t next_last_level_gfn; -+ /* -+ * The next_last_level_gfn at the time when the thread last -+ * yielded. Only yielding when the next_last_level_gfn != -+ * yielded_gfn helps ensure forward progress. -+ */ -+ gfn_t yielded_gfn; - /* Pointers to the page tables traversed to reach the current SPTE */ - u64 *pt_path[PT64_ROOT_MAX_LEVEL]; - /* A pointer to the current SPTE */ -@@ -54,7 +60,6 @@ u64 *spte_to_child_pt(u64 pte, int level); - void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, - int min_level, gfn_t next_last_level_gfn); - void tdp_iter_next(struct tdp_iter *iter); --void tdp_iter_refresh_walk(struct tdp_iter *iter); - u64 *tdp_iter_root_pt(struct tdp_iter *iter); - - #endif /* __KVM_X86_MMU_TDP_ITER_H */ -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 0dd27767c770..a07d37abb63f 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -420,8 +420,9 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, - * TLB flush before yielding. - * - * If this function yields, it will also reset the tdp_iter's walk over the -- * paging structure and the calling function should allow the iterator to -- * continue its traversal from the paging structure root. -+ * paging structure and the calling function should skip to the next -+ * iteration to allow the iterator to continue its traversal from the -+ * paging structure root. - * - * Return true if this function yielded and the iterator's traversal was reset. - * Return false if a yield was not needed. -@@ -429,12 +430,22 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, - static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, - struct tdp_iter *iter, bool flush) - { -+ /* Ensure forward progress has been made before yielding. */ -+ if (iter->next_last_level_gfn == iter->yielded_gfn) -+ return false; -+ - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { - if (flush) - kvm_flush_remote_tlbs(kvm); - - cond_resched_lock(&kvm->mmu_lock); -- tdp_iter_refresh_walk(iter); -+ -+ WARN_ON(iter->gfn > iter->next_last_level_gfn); -+ -+ tdp_iter_start(iter, iter->pt_path[iter->root_level - 1], -+ iter->root_level, iter->min_level, -+ iter->next_last_level_gfn); -+ - return true; - } - -@@ -474,8 +485,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - - tdp_mmu_set_spte(kvm, &iter, 0); - -- flush_needed = !can_yield || -- !tdp_mmu_iter_cond_resched(kvm, &iter, true); -+ flush_needed = !(can_yield && -+ tdp_mmu_iter_cond_resched(kvm, &iter, true)); - } - return flush_needed; - } --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch b/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch deleted file mode 100644 index c5f3ac604b1..00000000000 --- a/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 59cf6e724c855ccfa7d36d6bcd2b8aaa2a0dcc39 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 25 Mar 2021 13:01:18 -0700 -Subject: KVM: x86/mmu: Ensure TLBs are flushed for TDP MMU during NX zapping - -From: Sean Christopherson - -[ Upstream commit 048f49809c526348775425420fb5b8e84fd9a133 ] - -Honor the "flush needed" return from kvm_tdp_mmu_zap_gfn_range(), which -does the flush itself if and only if it yields (which it will never do in -this particular scenario), and otherwise expects the caller to do the -flush. If pages are zapped from the TDP MMU but not the legacy MMU, then -no flush will occur. - -Fixes: 29cf0f5007a2 ("kvm: x86/mmu: NX largepage recovery for TDP MMU") -Cc: stable@vger.kernel.org -Cc: Ben Gardon -Signed-off-by: Sean Christopherson -Message-Id: <20210325200119.1359384-3-seanjc@google.com> -Reviewed-by: Ben Gardon -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/mmu.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c -index d9901836d7aa..8643c766415a 100644 ---- a/arch/x86/kvm/mmu/mmu.c -+++ b/arch/x86/kvm/mmu/mmu.c -@@ -5985,6 +5985,8 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) - struct kvm_mmu_page *sp; - unsigned int ratio; - LIST_HEAD(invalid_list); -+ bool flush = false; -+ gfn_t gfn_end; - ulong to_zap; - - rcu_idx = srcu_read_lock(&kvm->srcu); -@@ -6006,19 +6008,20 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) - lpage_disallowed_link); - WARN_ON_ONCE(!sp->lpage_disallowed); - if (is_tdp_mmu_page(sp)) { -- kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, -- sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level)); -+ gfn_end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level); -+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, gfn_end); - } else { - kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - WARN_ON_ONCE(sp->lpage_disallowed); - } - - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { -- kvm_mmu_commit_zap_page(kvm, &invalid_list); -+ kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); - cond_resched_lock(&kvm->mmu_lock); -+ flush = false; - } - } -- kvm_mmu_commit_zap_page(kvm, &invalid_list); -+ kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); - - spin_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, rcu_idx); --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch b/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch deleted file mode 100644 index c147bce232f..00000000000 --- a/queue-5.11/kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 4cb3d4d92123732bc824cbe156b648c049bfe676 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 25 Mar 2021 13:01:17 -0700 -Subject: KVM: x86/mmu: Ensure TLBs are flushed when yielding during GFN range - zap - -From: Sean Christopherson - -[ Upstream commit a835429cda91621fca915d80672a157b47738afb ] - -When flushing a range of GFNs across multiple roots, ensure any pending -flush from a previous root is honored before yielding while walking the -tables of the current root. - -Note, kvm_tdp_mmu_zap_gfn_range() now intentionally overwrites its local -"flush" with the result to avoid redundant flushes. zap_gfn_range() -preserves and return the incoming "flush", unless of course the flush was -performed prior to yielding and no new flush was triggered. - -Fixes: 1af4a96025b3 ("KVM: x86/mmu: Yield in TDU MMU iter even if no SPTES changed") -Cc: stable@vger.kernel.org -Reviewed-by: Ben Gardon -Signed-off-by: Sean Christopherson -Message-Id: <20210325200119.1359384-2-seanjc@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 23 ++++++++++++----------- - 1 file changed, 12 insertions(+), 11 deletions(-) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 65c9172dcdf9..50c088a41dee 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -111,7 +111,7 @@ bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) - } - - static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, -- gfn_t start, gfn_t end, bool can_yield); -+ gfn_t start, gfn_t end, bool can_yield, bool flush); - - void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) - { -@@ -124,7 +124,7 @@ void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) - - list_del(&root->link); - -- zap_gfn_range(kvm, root, 0, max_gfn, false); -+ zap_gfn_range(kvm, root, 0, max_gfn, false, false); - - free_page((unsigned long)root->spt); - kmem_cache_free(mmu_page_header_cache, root); -@@ -506,20 +506,21 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, - * scheduler needs the CPU or there is contention on the MMU lock. If this - * function cannot yield, it will not release the MMU lock or reschedule and - * the caller must ensure it does not supply too large a GFN range, or the -- * operation can cause a soft lockup. -+ * operation can cause a soft lockup. Note, in some use cases a flush may be -+ * required by prior actions. Ensure the pending flush is performed prior to -+ * yielding. - */ - static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, -- gfn_t start, gfn_t end, bool can_yield) -+ gfn_t start, gfn_t end, bool can_yield, bool flush) - { - struct tdp_iter iter; -- bool flush_needed = false; - - rcu_read_lock(); - - tdp_root_for_each_pte(iter, root, start, end) { - if (can_yield && -- tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) { -- flush_needed = false; -+ tdp_mmu_iter_cond_resched(kvm, &iter, flush)) { -+ flush = false; - continue; - } - -@@ -537,11 +538,11 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - continue; - - tdp_mmu_set_spte(kvm, &iter, 0); -- flush_needed = true; -+ flush = true; - } - - rcu_read_unlock(); -- return flush_needed; -+ return flush; - } - - /* -@@ -556,7 +557,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) - bool flush = false; - - for_each_tdp_mmu_root_yield_safe(kvm, root) -- flush |= zap_gfn_range(kvm, root, start, end, true); -+ flush = zap_gfn_range(kvm, root, start, end, true, flush); - - return flush; - } -@@ -759,7 +760,7 @@ static int zap_gfn_range_hva_wrapper(struct kvm *kvm, - struct kvm_mmu_page *root, gfn_t start, - gfn_t end, unsigned long unused) - { -- return zap_gfn_range(kvm, root, start, end, false); -+ return zap_gfn_range(kvm, root, start, end, false, false); - } - - int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start, --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch b/queue-5.11/kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch deleted file mode 100644 index 44d990c5ab5..00000000000 --- a/queue-5.11/kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 984759b7ac6f1b165b54e36b1ef607742868c136 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:25 -0800 -Subject: KVM: x86/mmu: Factor out functions to add/remove TDP MMU pages - -From: Ben Gardon - -[ Upstream commit a9442f594147f95307f691cfba0c31e25dc79b9d ] - -Move the work of adding and removing TDP MMU pages to/from "secondary" -data structures to helper functions. These functions will be built on in -future commits to enable MMU operations to proceed (mostly) in parallel. - -No functional change expected. - -Signed-off-by: Ben Gardon -Message-Id: <20210202185734.1680553-20-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 47 +++++++++++++++++++++++++++++++------- - 1 file changed, 39 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 4a2b8844f00f..bc49a5b90086 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -262,6 +262,39 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, - } - } - -+/** -+ * tdp_mmu_link_page - Add a new page to the list of pages used by the TDP MMU -+ * -+ * @kvm: kvm instance -+ * @sp: the new page -+ * @account_nx: This page replaces a NX large page and should be marked for -+ * eventual reclaim. -+ */ -+static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp, -+ bool account_nx) -+{ -+ lockdep_assert_held_write(&kvm->mmu_lock); -+ -+ list_add(&sp->link, &kvm->arch.tdp_mmu_pages); -+ if (account_nx) -+ account_huge_nx_page(kvm, sp); -+} -+ -+/** -+ * tdp_mmu_unlink_page - Remove page from the list of pages used by the TDP MMU -+ * -+ * @kvm: kvm instance -+ * @sp: the page to be removed -+ */ -+static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp) -+{ -+ lockdep_assert_held_write(&kvm->mmu_lock); -+ -+ list_del(&sp->link); -+ if (sp->lpage_disallowed) -+ unaccount_huge_nx_page(kvm, sp); -+} -+ - /** - * handle_removed_tdp_mmu_page - handle a pt removed from the TDP structure - * -@@ -281,10 +314,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt) - - trace_kvm_mmu_prepare_zap_page(sp); - -- list_del(&sp->link); -- -- if (sp->lpage_disallowed) -- unaccount_huge_nx_page(kvm, sp); -+ tdp_mmu_unlink_page(kvm, sp); - - for (i = 0; i < PT64_ENT_PER_PAGE; i++) { - old_child_spte = READ_ONCE(*(pt + i)); -@@ -706,15 +736,16 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, - - if (!is_shadow_present_pte(iter.old_spte)) { - sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); -- list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages); - child_pt = sp->spt; -+ -+ tdp_mmu_link_page(vcpu->kvm, sp, -+ huge_page_disallowed && -+ req_level >= iter.level); -+ - new_spte = make_nonleaf_spte(child_pt, - !shadow_accessed_mask); - - trace_kvm_mmu_get_page(sp, true); -- if (huge_page_disallowed && req_level >= iter.level) -- account_huge_nx_page(vcpu->kvm, sp); -- - tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); - } - } --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch b/queue-5.11/kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch deleted file mode 100644 index f71a68c38ea..00000000000 --- a/queue-5.11/kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 91e5cc29852d0bf893a88de84a9316fea94a71ac Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:11 -0800 -Subject: KVM: x86/mmu: Factor out handling of removed page tables - -From: Ben Gardon - -[ Upstream commit a066e61f13cf4b17d043ad8bea0cdde2b1e5ee49 ] - -Factor out the code to handle a disconnected subtree of the TDP paging -structure from the code to handle the change to an individual SPTE. -Future commits will build on this to allow asynchronous page freeing. - -No functional change intended. - -Reviewed-by: Peter Feiner -Acked-by: Paolo Bonzini -Signed-off-by: Ben Gardon - -Message-Id: <20210202185734.1680553-6-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 71 ++++++++++++++++++++++---------------- - 1 file changed, 42 insertions(+), 29 deletions(-) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 3a8bbc812a28..3efaa8b44e45 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -234,6 +234,45 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, - } - } - -+/** -+ * handle_removed_tdp_mmu_page - handle a pt removed from the TDP structure -+ * -+ * @kvm: kvm instance -+ * @pt: the page removed from the paging structure -+ * -+ * Given a page table that has been removed from the TDP paging structure, -+ * iterates through the page table to clear SPTEs and free child page tables. -+ */ -+static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt) -+{ -+ struct kvm_mmu_page *sp = sptep_to_sp(pt); -+ int level = sp->role.level; -+ gfn_t gfn = sp->gfn; -+ u64 old_child_spte; -+ int i; -+ -+ trace_kvm_mmu_prepare_zap_page(sp); -+ -+ list_del(&sp->link); -+ -+ if (sp->lpage_disallowed) -+ unaccount_huge_nx_page(kvm, sp); -+ -+ for (i = 0; i < PT64_ENT_PER_PAGE; i++) { -+ old_child_spte = READ_ONCE(*(pt + i)); -+ WRITE_ONCE(*(pt + i), 0); -+ handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), -+ gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), -+ old_child_spte, 0, level - 1); -+ } -+ -+ kvm_flush_remote_tlbs_with_address(kvm, gfn, -+ KVM_PAGES_PER_HPAGE(level)); -+ -+ free_page((unsigned long)pt); -+ kmem_cache_free(mmu_page_header_cache, sp); -+} -+ - /** - * handle_changed_spte - handle bookkeeping associated with an SPTE change - * @kvm: kvm instance -@@ -254,10 +293,6 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, - bool was_leaf = was_present && is_last_spte(old_spte, level); - bool is_leaf = is_present && is_last_spte(new_spte, level); - bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); -- u64 *pt; -- struct kvm_mmu_page *sp; -- u64 old_child_spte; -- int i; - - WARN_ON(level > PT64_ROOT_MAX_LEVEL); - WARN_ON(level < PG_LEVEL_4K); -@@ -321,31 +356,9 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, - * Recursively handle child PTs if the change removed a subtree from - * the paging structure. - */ -- if (was_present && !was_leaf && (pfn_changed || !is_present)) { -- pt = spte_to_child_pt(old_spte, level); -- sp = sptep_to_sp(pt); -- -- trace_kvm_mmu_prepare_zap_page(sp); -- -- list_del(&sp->link); -- -- if (sp->lpage_disallowed) -- unaccount_huge_nx_page(kvm, sp); -- -- for (i = 0; i < PT64_ENT_PER_PAGE; i++) { -- old_child_spte = READ_ONCE(*(pt + i)); -- WRITE_ONCE(*(pt + i), 0); -- handle_changed_spte(kvm, as_id, -- gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), -- old_child_spte, 0, level - 1); -- } -- -- kvm_flush_remote_tlbs_with_address(kvm, gfn, -- KVM_PAGES_PER_HPAGE(level)); -- -- free_page((unsigned long)pt); -- kmem_cache_free(mmu_page_header_cache, sp); -- } -+ if (was_present && !was_leaf && (pfn_changed || !is_present)) -+ handle_removed_tdp_mmu_page(kvm, -+ spte_to_child_pt(old_spte, level)); - } - - static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch b/queue-5.11/kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch deleted file mode 100644 index 642652ea93e..00000000000 --- a/queue-5.11/kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch +++ /dev/null @@ -1,40 +0,0 @@ -From ecbb84cf762594358ddecba3c2543a145ac143f5 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:15 -0800 -Subject: KVM: x86/mmu: Fix braces in kvm_recover_nx_lpages - -From: Ben Gardon - -[ Upstream commit 8d1a182ea791f0111b0258c8f3eb8d77af0a8386 ] - -No functional change intended. - -Fixes: 29cf0f5007a2 ("kvm: x86/mmu: NX largepage recovery for TDP MMU") -Signed-off-by: Ben Gardon -Message-Id: <20210202185734.1680553-10-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/mmu.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c -index ed861245ecf0..5771102a840c 100644 ---- a/arch/x86/kvm/mmu/mmu.c -+++ b/arch/x86/kvm/mmu/mmu.c -@@ -6005,10 +6005,10 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) - struct kvm_mmu_page, - lpage_disallowed_link); - WARN_ON_ONCE(!sp->lpage_disallowed); -- if (sp->tdp_mmu_page) -+ if (sp->tdp_mmu_page) { - kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, - sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level)); -- else { -+ } else { - kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - WARN_ON_ONCE(sp->lpage_disallowed); - } --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch b/queue-5.11/kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch deleted file mode 100644 index f76efa3b402..00000000000 --- a/queue-5.11/kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch +++ /dev/null @@ -1,125 +0,0 @@ -From cd6c5a3966bb70785043f4a733c53bb74d8f57b6 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:17 -0800 -Subject: KVM: x86/mmu: Merge flush and non-flush tdp_mmu_iter_cond_resched - -From: Ben Gardon - -[ Upstream commit e139a34ef9d5627a41e1c02210229082140d1f92 ] - -The flushing and non-flushing variants of tdp_mmu_iter_cond_resched have -almost identical implementations. Merge the two functions and add a -flush parameter. - -Signed-off-by: Ben Gardon -Message-Id: <20210202185734.1680553-12-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 42 ++++++++++++-------------------------- - 1 file changed, 13 insertions(+), 29 deletions(-) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index abdd89771b9b..0dd27767c770 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -412,33 +412,13 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, - for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \ - _mmu->shadow_root_level, _start, _end) - --/* -- * Flush the TLB and yield if the MMU lock is contended or this thread needs to -- * return control to the scheduler. -- * -- * If this function yields, it will also reset the tdp_iter's walk over the -- * paging structure and the calling function should allow the iterator to -- * continue its traversal from the paging structure root. -- * -- * Return true if this function yielded, the TLBs were flushed, and the -- * iterator's traversal was reset. Return false if a yield was not needed. -- */ --static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter) --{ -- if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { -- kvm_flush_remote_tlbs(kvm); -- cond_resched_lock(&kvm->mmu_lock); -- tdp_iter_refresh_walk(iter); -- return true; -- } -- -- return false; --} -- - /* - * Yield if the MMU lock is contended or this thread needs to return control - * to the scheduler. - * -+ * If this function should yield and flush is set, it will perform a remote -+ * TLB flush before yielding. -+ * - * If this function yields, it will also reset the tdp_iter's walk over the - * paging structure and the calling function should allow the iterator to - * continue its traversal from the paging structure root. -@@ -446,9 +426,13 @@ static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *it - * Return true if this function yielded and the iterator's traversal was reset. - * Return false if a yield was not needed. - */ --static bool tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter) -+static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, -+ struct tdp_iter *iter, bool flush) - { - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { -+ if (flush) -+ kvm_flush_remote_tlbs(kvm); -+ - cond_resched_lock(&kvm->mmu_lock); - tdp_iter_refresh_walk(iter); - return true; -@@ -491,7 +475,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - tdp_mmu_set_spte(kvm, &iter, 0); - - flush_needed = !can_yield || -- !tdp_mmu_iter_flush_cond_resched(kvm, &iter); -+ !tdp_mmu_iter_cond_resched(kvm, &iter, true); - } - return flush_needed; - } -@@ -864,7 +848,7 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); - spte_set = true; - -- tdp_mmu_iter_cond_resched(kvm, &iter); -+ tdp_mmu_iter_cond_resched(kvm, &iter, false); - } - return spte_set; - } -@@ -923,7 +907,7 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); - spte_set = true; - -- tdp_mmu_iter_cond_resched(kvm, &iter); -+ tdp_mmu_iter_cond_resched(kvm, &iter, false); - } - return spte_set; - } -@@ -1039,7 +1023,7 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - tdp_mmu_set_spte(kvm, &iter, new_spte); - spte_set = true; - -- tdp_mmu_iter_cond_resched(kvm, &iter); -+ tdp_mmu_iter_cond_resched(kvm, &iter, false); - } - - return spte_set; -@@ -1092,7 +1076,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm, - - tdp_mmu_set_spte(kvm, &iter, 0); - -- spte_set = !tdp_mmu_iter_flush_cond_resched(kvm, &iter); -+ spte_set = !tdp_mmu_iter_cond_resched(kvm, &iter, true); - } - - if (spte_set) --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch b/queue-5.11/kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch deleted file mode 100644 index 13caba35607..00000000000 --- a/queue-5.11/kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch +++ /dev/null @@ -1,505 +0,0 @@ -From 975b3ceb8562de4246e1060c50355b2722fcd67d Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:23 -0800 -Subject: KVM: x86/mmu: Protect TDP MMU page table memory with RCU - -From: Ben Gardon - -[ Upstream commit 7cca2d0b7e7d9f3cd740d41afdc00051c9b508a0 ] - -In order to enable concurrent modifications to the paging structures in -the TDP MMU, threads must be able to safely remove pages of page table -memory while other threads are traversing the same memory. To ensure -threads do not access PT memory after it is freed, protect PT memory -with RCU. - -Protecting concurrent accesses to page table memory from use-after-free -bugs could also have been acomplished using -walk_shadow_page_lockless_begin/end() and READING_SHADOW_PAGE_TABLES, -coupling with the barriers in a TLB flush. The use of RCU for this case -has several distinct advantages over that approach. -1. Disabling interrupts for long running operations is not desirable. - Future commits will allow operations besides page faults to operate - without the exclusive protection of the MMU lock and those operations - are too long to disable iterrupts for their duration. -2. The use of RCU here avoids long blocking / spinning operations in - perfromance critical paths. By freeing memory with an asynchronous - RCU API we avoid the longer wait times TLB flushes experience when - overlapping with a thread in walk_shadow_page_lockless_begin/end(). -3. RCU provides a separation of concerns when removing memory from the - paging structure. Because the RCU callback to free memory can be - scheduled immediately after a TLB flush, there's no need for the - thread to manually free a queue of pages later, as commit_zap_pages - does. - -Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") -Reviewed-by: Peter Feiner -Suggested-by: Sean Christopherson -Signed-off-by: Ben Gardon - -Message-Id: <20210202185734.1680553-18-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/mmu_internal.h | 3 ++ - arch/x86/kvm/mmu/tdp_iter.c | 16 +++--- - arch/x86/kvm/mmu/tdp_iter.h | 10 ++-- - arch/x86/kvm/mmu/tdp_mmu.c | 95 +++++++++++++++++++++++++++++---- - 4 files changed, 103 insertions(+), 21 deletions(-) - -diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h -index cf101b73a360..9e600dc30f08 100644 ---- a/arch/x86/kvm/mmu/mmu_internal.h -+++ b/arch/x86/kvm/mmu/mmu_internal.h -@@ -57,6 +57,9 @@ struct kvm_mmu_page { - atomic_t write_flooding_count; - - bool tdp_mmu_page; -+ -+ /* Used for freeing the page asyncronously if it is a TDP MMU page. */ -+ struct rcu_head rcu_head; - }; - - extern struct kmem_cache *mmu_page_header_cache; -diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c -index 1a09d212186b..e5f148106e20 100644 ---- a/arch/x86/kvm/mmu/tdp_iter.c -+++ b/arch/x86/kvm/mmu/tdp_iter.c -@@ -12,7 +12,7 @@ static void tdp_iter_refresh_sptep(struct tdp_iter *iter) - { - iter->sptep = iter->pt_path[iter->level - 1] + - SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level); -- iter->old_spte = READ_ONCE(*iter->sptep); -+ iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep)); - } - - static gfn_t round_gfn_for_level(gfn_t gfn, int level) -@@ -35,7 +35,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, - iter->root_level = root_level; - iter->min_level = min_level; - iter->level = root_level; -- iter->pt_path[iter->level - 1] = root_pt; -+ iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt; - - iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); - tdp_iter_refresh_sptep(iter); -@@ -48,7 +48,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, - * address of the child page table referenced by the SPTE. Returns null if - * there is no such entry. - */ --u64 *spte_to_child_pt(u64 spte, int level) -+tdp_ptep_t spte_to_child_pt(u64 spte, int level) - { - /* - * There's no child entry if this entry isn't present or is a -@@ -57,7 +57,7 @@ u64 *spte_to_child_pt(u64 spte, int level) - if (!is_shadow_present_pte(spte) || is_last_spte(spte, level)) - return NULL; - -- return __va(spte_to_pfn(spte) << PAGE_SHIFT); -+ return (tdp_ptep_t)__va(spte_to_pfn(spte) << PAGE_SHIFT); - } - - /* -@@ -66,7 +66,7 @@ u64 *spte_to_child_pt(u64 spte, int level) - */ - static bool try_step_down(struct tdp_iter *iter) - { -- u64 *child_pt; -+ tdp_ptep_t child_pt; - - if (iter->level == iter->min_level) - return false; -@@ -75,7 +75,7 @@ static bool try_step_down(struct tdp_iter *iter) - * Reread the SPTE before stepping down to avoid traversing into page - * tables that are no longer linked from this entry. - */ -- iter->old_spte = READ_ONCE(*iter->sptep); -+ iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep)); - - child_pt = spte_to_child_pt(iter->old_spte, iter->level); - if (!child_pt) -@@ -109,7 +109,7 @@ static bool try_step_side(struct tdp_iter *iter) - iter->gfn += KVM_PAGES_PER_HPAGE(iter->level); - iter->next_last_level_gfn = iter->gfn; - iter->sptep++; -- iter->old_spte = READ_ONCE(*iter->sptep); -+ iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep)); - - return true; - } -@@ -159,7 +159,7 @@ void tdp_iter_next(struct tdp_iter *iter) - iter->valid = false; - } - --u64 *tdp_iter_root_pt(struct tdp_iter *iter) -+tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter) - { - return iter->pt_path[iter->root_level - 1]; - } -diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h -index d480c540ee27..4cc177d75c4a 100644 ---- a/arch/x86/kvm/mmu/tdp_iter.h -+++ b/arch/x86/kvm/mmu/tdp_iter.h -@@ -7,6 +7,8 @@ - - #include "mmu.h" - -+typedef u64 __rcu *tdp_ptep_t; -+ - /* - * A TDP iterator performs a pre-order walk over a TDP paging structure. - */ -@@ -23,9 +25,9 @@ struct tdp_iter { - */ - gfn_t yielded_gfn; - /* Pointers to the page tables traversed to reach the current SPTE */ -- u64 *pt_path[PT64_ROOT_MAX_LEVEL]; -+ tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL]; - /* A pointer to the current SPTE */ -- u64 *sptep; -+ tdp_ptep_t sptep; - /* The lowest GFN mapped by the current SPTE */ - gfn_t gfn; - /* The level of the root page given to the iterator */ -@@ -55,11 +57,11 @@ struct tdp_iter { - #define for_each_tdp_pte(iter, root, root_level, start, end) \ - for_each_tdp_pte_min_level(iter, root, root_level, PG_LEVEL_4K, start, end) - --u64 *spte_to_child_pt(u64 pte, int level); -+tdp_ptep_t spte_to_child_pt(u64 pte, int level); - - void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, - int min_level, gfn_t next_last_level_gfn); - void tdp_iter_next(struct tdp_iter *iter); --u64 *tdp_iter_root_pt(struct tdp_iter *iter); -+tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter); - - #endif /* __KVM_X86_MMU_TDP_ITER_H */ -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index 3efaa8b44e45..65c9172dcdf9 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -42,6 +42,12 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) - return; - - WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); -+ -+ /* -+ * Ensure that all the outstanding RCU callbacks to free shadow pages -+ * can run before the VM is torn down. -+ */ -+ rcu_barrier(); - } - - static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) -@@ -196,6 +202,28 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) - return __pa(root->spt); - } - -+static void tdp_mmu_free_sp(struct kvm_mmu_page *sp) -+{ -+ free_page((unsigned long)sp->spt); -+ kmem_cache_free(mmu_page_header_cache, sp); -+} -+ -+/* -+ * This is called through call_rcu in order to free TDP page table memory -+ * safely with respect to other kernel threads that may be operating on -+ * the memory. -+ * By only accessing TDP MMU page table memory in an RCU read critical -+ * section, and freeing it after a grace period, lockless access to that -+ * memory won't use it after it is freed. -+ */ -+static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) -+{ -+ struct kvm_mmu_page *sp = container_of(head, struct kvm_mmu_page, -+ rcu_head); -+ -+ tdp_mmu_free_sp(sp); -+} -+ - static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, - u64 old_spte, u64 new_spte, int level); - -@@ -269,8 +297,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt) - kvm_flush_remote_tlbs_with_address(kvm, gfn, - KVM_PAGES_PER_HPAGE(level)); - -- free_page((unsigned long)pt); -- kmem_cache_free(mmu_page_header_cache, sp); -+ call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); - } - - /** -@@ -374,13 +401,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, - u64 new_spte, bool record_acc_track, - bool record_dirty_log) - { -- u64 *root_pt = tdp_iter_root_pt(iter); -+ tdp_ptep_t root_pt = tdp_iter_root_pt(iter); - struct kvm_mmu_page *root = sptep_to_sp(root_pt); - int as_id = kvm_mmu_page_as_id(root); - - lockdep_assert_held(&kvm->mmu_lock); - -- WRITE_ONCE(*iter->sptep, new_spte); -+ WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte); - - __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, - iter->level); -@@ -450,10 +477,13 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, - return false; - - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { -+ rcu_read_unlock(); -+ - if (flush) - kvm_flush_remote_tlbs(kvm); - - cond_resched_lock(&kvm->mmu_lock); -+ rcu_read_lock(); - - WARN_ON(iter->gfn > iter->next_last_level_gfn); - -@@ -484,6 +514,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - struct tdp_iter iter; - bool flush_needed = false; - -+ rcu_read_lock(); -+ - tdp_root_for_each_pte(iter, root, start, end) { - if (can_yield && - tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) { -@@ -507,6 +539,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - tdp_mmu_set_spte(kvm, &iter, 0); - flush_needed = true; - } -+ -+ rcu_read_unlock(); - return flush_needed; - } - -@@ -552,13 +586,15 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, - - if (unlikely(is_noslot_pfn(pfn))) { - new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); -- trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); -+ trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, -+ new_spte); - } else { - make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, - pfn, iter->old_spte, prefault, true, - map_writable, !shadow_accessed_mask, - &new_spte); -- trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); -+ trace_kvm_mmu_set_spte(iter->level, iter->gfn, -+ rcu_dereference(iter->sptep)); - } - - if (new_spte == iter->old_spte) -@@ -581,7 +617,8 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, - if (unlikely(is_mmio_spte(new_spte))) - ret = RET_PF_EMULATE; - -- trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); -+ trace_kvm_mmu_set_spte(iter->level, iter->gfn, -+ rcu_dereference(iter->sptep)); - if (!prefault) - vcpu->stat.pf_fixed++; - -@@ -619,6 +656,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, - huge_page_disallowed, &req_level); - - trace_kvm_mmu_spte_requested(gpa, level, pfn); -+ -+ rcu_read_lock(); -+ - tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { - if (nx_huge_page_workaround_enabled) - disallowed_hugepage_adjust(iter.old_spte, gfn, -@@ -644,7 +684,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, - * because the new value informs the !present - * path below. - */ -- iter.old_spte = READ_ONCE(*iter.sptep); -+ iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep)); - } - - if (!is_shadow_present_pte(iter.old_spte)) { -@@ -663,11 +703,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, - } - } - -- if (WARN_ON(iter.level != level)) -+ if (WARN_ON(iter.level != level)) { -+ rcu_read_unlock(); - return RET_PF_RETRY; -+ } - - ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter, - pfn, prefault); -+ rcu_read_unlock(); - - return ret; - } -@@ -738,6 +781,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, - int young = 0; - u64 new_spte = 0; - -+ rcu_read_lock(); -+ - tdp_root_for_each_leaf_pte(iter, root, start, end) { - /* - * If we have a non-accessed entry we don't need to change the -@@ -769,6 +814,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, - trace_kvm_age_page(iter.gfn, iter.level, slot, young); - } - -+ rcu_read_unlock(); -+ - return young; - } - -@@ -814,6 +861,8 @@ static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot, - u64 new_spte; - int need_flush = 0; - -+ rcu_read_lock(); -+ - WARN_ON(pte_huge(*ptep)); - - new_pfn = pte_pfn(*ptep); -@@ -842,6 +891,8 @@ static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot, - if (need_flush) - kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); - -+ rcu_read_unlock(); -+ - return 0; - } - -@@ -865,6 +916,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - u64 new_spte; - bool spte_set = false; - -+ rcu_read_lock(); -+ - BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); - - for_each_tdp_pte_min_level(iter, root->spt, root->role.level, -@@ -881,6 +934,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); - spte_set = true; - } -+ -+ rcu_read_unlock(); - return spte_set; - } - -@@ -922,6 +977,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - u64 new_spte; - bool spte_set = false; - -+ rcu_read_lock(); -+ - tdp_root_for_each_leaf_pte(iter, root, start, end) { - if (tdp_mmu_iter_cond_resched(kvm, &iter, false)) - continue; -@@ -941,6 +998,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); - spte_set = true; - } -+ -+ rcu_read_unlock(); - return spte_set; - } - -@@ -982,6 +1041,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, - struct tdp_iter iter; - u64 new_spte; - -+ rcu_read_lock(); -+ - tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), - gfn + BITS_PER_LONG) { - if (!mask) -@@ -1007,6 +1068,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, - - mask &= ~(1UL << (iter.gfn - gfn)); - } -+ -+ rcu_read_unlock(); - } - - /* -@@ -1046,6 +1109,8 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - u64 new_spte; - bool spte_set = false; - -+ rcu_read_lock(); -+ - tdp_root_for_each_pte(iter, root, start, end) { - if (tdp_mmu_iter_cond_resched(kvm, &iter, false)) - continue; -@@ -1059,6 +1124,7 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - spte_set = true; - } - -+ rcu_read_unlock(); - return spte_set; - } - -@@ -1096,6 +1162,8 @@ static void zap_collapsible_spte_range(struct kvm *kvm, - kvm_pfn_t pfn; - bool spte_set = false; - -+ rcu_read_lock(); -+ - tdp_root_for_each_pte(iter, root, start, end) { - if (tdp_mmu_iter_cond_resched(kvm, &iter, spte_set)) { - spte_set = false; -@@ -1117,6 +1185,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm, - spte_set = true; - } - -+ rcu_read_unlock(); - if (spte_set) - kvm_flush_remote_tlbs(kvm); - } -@@ -1153,6 +1222,8 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, - u64 new_spte; - bool spte_set = false; - -+ rcu_read_lock(); -+ - tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) { - if (!is_writable_pte(iter.old_spte)) - break; -@@ -1164,6 +1235,8 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, - spte_set = true; - } - -+ rcu_read_unlock(); -+ - return spte_set; - } - -@@ -1204,10 +1277,14 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, - - *root_level = vcpu->arch.mmu->shadow_root_level; - -+ rcu_read_lock(); -+ - tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { - leaf = iter.level; - sptes[leaf] = iter.old_spte; - } - -+ rcu_read_unlock(); -+ - return leaf; - } --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch b/queue-5.11/kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch deleted file mode 100644 index 014983a9236..00000000000 --- a/queue-5.11/kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch +++ /dev/null @@ -1,114 +0,0 @@ -From f8decc13cabd456eae14a487ca45f41540199093 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:18 -0800 -Subject: KVM: x86/mmu: Rename goal_gfn to next_last_level_gfn - -From: Ben Gardon - -[ Upstream commit 74953d3530280dc53256054e1906f58d07bfba44 ] - -The goal_gfn field in tdp_iter can be misleading as it implies that it -is the iterator's final goal. It is really a target for the lowest gfn -mapped by the leaf level SPTE the iterator will traverse towards. Change -the field's name to be more precise. - -Signed-off-by: Ben Gardon -Message-Id: <20210202185734.1680553-13-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_iter.c | 20 ++++++++++---------- - arch/x86/kvm/mmu/tdp_iter.h | 4 ++-- - 2 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c -index 87b7e16911db..9917c55b7d24 100644 ---- a/arch/x86/kvm/mmu/tdp_iter.c -+++ b/arch/x86/kvm/mmu/tdp_iter.c -@@ -22,21 +22,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level) - - /* - * Sets a TDP iterator to walk a pre-order traversal of the paging structure -- * rooted at root_pt, starting with the walk to translate goal_gfn. -+ * rooted at root_pt, starting with the walk to translate next_last_level_gfn. - */ - void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, -- int min_level, gfn_t goal_gfn) -+ int min_level, gfn_t next_last_level_gfn) - { - WARN_ON(root_level < 1); - WARN_ON(root_level > PT64_ROOT_MAX_LEVEL); - -- iter->goal_gfn = goal_gfn; -+ iter->next_last_level_gfn = next_last_level_gfn; - iter->root_level = root_level; - iter->min_level = min_level; - iter->level = root_level; - iter->pt_path[iter->level - 1] = root_pt; - -- iter->gfn = round_gfn_for_level(iter->goal_gfn, iter->level); -+ iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); - tdp_iter_refresh_sptep(iter); - - iter->valid = true; -@@ -82,7 +82,7 @@ static bool try_step_down(struct tdp_iter *iter) - - iter->level--; - iter->pt_path[iter->level - 1] = child_pt; -- iter->gfn = round_gfn_for_level(iter->goal_gfn, iter->level); -+ iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); - tdp_iter_refresh_sptep(iter); - - return true; -@@ -106,7 +106,7 @@ static bool try_step_side(struct tdp_iter *iter) - return false; - - iter->gfn += KVM_PAGES_PER_HPAGE(iter->level); -- iter->goal_gfn = iter->gfn; -+ iter->next_last_level_gfn = iter->gfn; - iter->sptep++; - iter->old_spte = READ_ONCE(*iter->sptep); - -@@ -166,13 +166,13 @@ void tdp_iter_next(struct tdp_iter *iter) - */ - void tdp_iter_refresh_walk(struct tdp_iter *iter) - { -- gfn_t goal_gfn = iter->goal_gfn; -+ gfn_t next_last_level_gfn = iter->next_last_level_gfn; - -- if (iter->gfn > goal_gfn) -- goal_gfn = iter->gfn; -+ if (iter->gfn > next_last_level_gfn) -+ next_last_level_gfn = iter->gfn; - - tdp_iter_start(iter, iter->pt_path[iter->root_level - 1], -- iter->root_level, iter->min_level, goal_gfn); -+ iter->root_level, iter->min_level, next_last_level_gfn); - } - - u64 *tdp_iter_root_pt(struct tdp_iter *iter) -diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h -index 47170d0dc98e..b2dd269c631f 100644 ---- a/arch/x86/kvm/mmu/tdp_iter.h -+++ b/arch/x86/kvm/mmu/tdp_iter.h -@@ -15,7 +15,7 @@ struct tdp_iter { - * The iterator will traverse the paging structure towards the mapping - * for this GFN. - */ -- gfn_t goal_gfn; -+ gfn_t next_last_level_gfn; - /* Pointers to the page tables traversed to reach the current SPTE */ - u64 *pt_path[PT64_ROOT_MAX_LEVEL]; - /* A pointer to the current SPTE */ -@@ -52,7 +52,7 @@ struct tdp_iter { - u64 *spte_to_child_pt(u64 pte, int level); - - void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, -- int min_level, gfn_t goal_gfn); -+ int min_level, gfn_t next_last_level_gfn); - void tdp_iter_next(struct tdp_iter *iter); - void tdp_iter_refresh_walk(struct tdp_iter *iter); - u64 *tdp_iter_root_pt(struct tdp_iter *iter); --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch b/queue-5.11/kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch deleted file mode 100644 index c38ee5d17ab..00000000000 --- a/queue-5.11/kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch +++ /dev/null @@ -1,371 +0,0 @@ -From d4aa26febd7e7a1198a88af587864968a3238f5b Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:26 -0800 -Subject: KVM: x86/mmu: Use atomic ops to set SPTEs in TDP MMU map - -From: Ben Gardon - -[ Upstream commit 9a77daacc87dee9fd63e31243f21894132ed8407 ] - -To prepare for handling page faults in parallel, change the TDP MMU -page fault handler to use atomic operations to set SPTEs so that changes -are not lost if multiple threads attempt to modify the same SPTE. - -Reviewed-by: Peter Feiner -Signed-off-by: Ben Gardon - -Message-Id: <20210202185734.1680553-21-bgardon@google.com> -[Document new locking rules. - Paolo] -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - Documentation/virt/kvm/locking.rst | 9 +- - arch/x86/include/asm/kvm_host.h | 13 +++ - arch/x86/kvm/mmu/tdp_mmu.c | 142 ++++++++++++++++++++++------- - 3 files changed, 130 insertions(+), 34 deletions(-) - -diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst -index b21a34c34a21..0aa4817b466d 100644 ---- a/Documentation/virt/kvm/locking.rst -+++ b/Documentation/virt/kvm/locking.rst -@@ -16,7 +16,14 @@ The acquisition orders for mutexes are as follows: - - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring - them together is quite rare. - --On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. -+On x86: -+ -+- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock -+ -+- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is -+ taken inside kvm->arch.mmu_lock, and cannot be taken without already -+ holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise -+ there's no need to take kvm->arch.tdp_mmu_pages_lock at all). - - Everything else is a leaf: no other lock is taken inside the critical - sections. -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index e0cfd620b293..42fca28d6189 100644 ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -1030,6 +1030,19 @@ struct kvm_arch { - * tdp_mmu_page set and a root_count of 0. - */ - struct list_head tdp_mmu_pages; -+ -+ /* -+ * Protects accesses to the following fields when the MMU lock -+ * is held in read mode: -+ * - tdp_mmu_pages (above) -+ * - the link field of struct kvm_mmu_pages used by the TDP MMU -+ * - lpage_disallowed_mmu_pages -+ * - the lpage_disallowed_link field of struct kvm_mmu_pages used -+ * by the TDP MMU -+ * It is acceptable, but not necessary, to acquire this lock when -+ * the thread holds the MMU lock in write mode. -+ */ -+ spinlock_t tdp_mmu_pages_lock; - }; - - struct kvm_vm_stat { -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index bc49a5b90086..bb6faa9193b4 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -7,6 +7,7 @@ - #include "tdp_mmu.h" - #include "spte.h" - -+#include - #include - - #ifdef CONFIG_X86_64 -@@ -33,6 +34,7 @@ void kvm_mmu_init_tdp_mmu(struct kvm *kvm) - kvm->arch.tdp_mmu_enabled = true; - - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); -+ spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); - } - -@@ -225,7 +227,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) - } - - static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, -- u64 old_spte, u64 new_spte, int level); -+ u64 old_spte, u64 new_spte, int level, -+ bool shared); - - static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) - { -@@ -267,17 +270,26 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, - * - * @kvm: kvm instance - * @sp: the new page -+ * @shared: This operation may not be running under the exclusive use of -+ * the MMU lock and the operation must synchronize with other -+ * threads that might be adding or removing pages. - * @account_nx: This page replaces a NX large page and should be marked for - * eventual reclaim. - */ - static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp, -- bool account_nx) -+ bool shared, bool account_nx) - { -- lockdep_assert_held_write(&kvm->mmu_lock); -+ if (shared) -+ spin_lock(&kvm->arch.tdp_mmu_pages_lock); -+ else -+ lockdep_assert_held_write(&kvm->mmu_lock); - - list_add(&sp->link, &kvm->arch.tdp_mmu_pages); - if (account_nx) - account_huge_nx_page(kvm, sp); -+ -+ if (shared) -+ spin_unlock(&kvm->arch.tdp_mmu_pages_lock); - } - - /** -@@ -285,14 +297,24 @@ static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp, - * - * @kvm: kvm instance - * @sp: the page to be removed -+ * @shared: This operation may not be running under the exclusive use of -+ * the MMU lock and the operation must synchronize with other -+ * threads that might be adding or removing pages. - */ --static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp) -+static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp, -+ bool shared) - { -- lockdep_assert_held_write(&kvm->mmu_lock); -+ if (shared) -+ spin_lock(&kvm->arch.tdp_mmu_pages_lock); -+ else -+ lockdep_assert_held_write(&kvm->mmu_lock); - - list_del(&sp->link); - if (sp->lpage_disallowed) - unaccount_huge_nx_page(kvm, sp); -+ -+ if (shared) -+ spin_unlock(&kvm->arch.tdp_mmu_pages_lock); - } - - /** -@@ -300,28 +322,39 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp) - * - * @kvm: kvm instance - * @pt: the page removed from the paging structure -+ * @shared: This operation may not be running under the exclusive use -+ * of the MMU lock and the operation must synchronize with other -+ * threads that might be modifying SPTEs. - * - * Given a page table that has been removed from the TDP paging structure, - * iterates through the page table to clear SPTEs and free child page tables. - */ --static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt) -+static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, -+ bool shared) - { - struct kvm_mmu_page *sp = sptep_to_sp(pt); - int level = sp->role.level; - gfn_t gfn = sp->gfn; - u64 old_child_spte; -+ u64 *sptep; - int i; - - trace_kvm_mmu_prepare_zap_page(sp); - -- tdp_mmu_unlink_page(kvm, sp); -+ tdp_mmu_unlink_page(kvm, sp, shared); - - for (i = 0; i < PT64_ENT_PER_PAGE; i++) { -- old_child_spte = READ_ONCE(*(pt + i)); -- WRITE_ONCE(*(pt + i), 0); -+ sptep = pt + i; -+ -+ if (shared) { -+ old_child_spte = xchg(sptep, 0); -+ } else { -+ old_child_spte = READ_ONCE(*sptep); -+ WRITE_ONCE(*sptep, 0); -+ } - handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), - gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), -- old_child_spte, 0, level - 1); -+ old_child_spte, 0, level - 1, shared); - } - - kvm_flush_remote_tlbs_with_address(kvm, gfn, -@@ -338,12 +371,16 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt) - * @old_spte: The value of the SPTE before the change - * @new_spte: The value of the SPTE after the change - * @level: the level of the PT the SPTE is part of in the paging structure -+ * @shared: This operation may not be running under the exclusive use of -+ * the MMU lock and the operation must synchronize with other -+ * threads that might be modifying SPTEs. - * - * Handle bookkeeping that might result from the modification of a SPTE. - * This function must be called for all TDP SPTE modifications. - */ - static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, -- u64 old_spte, u64 new_spte, int level) -+ u64 old_spte, u64 new_spte, int level, -+ bool shared) - { - bool was_present = is_shadow_present_pte(old_spte); - bool is_present = is_shadow_present_pte(new_spte); -@@ -415,18 +452,51 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, - */ - if (was_present && !was_leaf && (pfn_changed || !is_present)) - handle_removed_tdp_mmu_page(kvm, -- spte_to_child_pt(old_spte, level)); -+ spte_to_child_pt(old_spte, level), shared); - } - - static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, -- u64 old_spte, u64 new_spte, int level) -+ u64 old_spte, u64 new_spte, int level, -+ bool shared) - { -- __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level); -+ __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, -+ shared); - handle_changed_spte_acc_track(old_spte, new_spte, level); - handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, - new_spte, level); - } - -+/* -+ * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the -+ * associated bookkeeping -+ * -+ * @kvm: kvm instance -+ * @iter: a tdp_iter instance currently on the SPTE that should be set -+ * @new_spte: The value the SPTE should be set to -+ * Returns: true if the SPTE was set, false if it was not. If false is returned, -+ * this function will have no side-effects. -+ */ -+static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, -+ struct tdp_iter *iter, -+ u64 new_spte) -+{ -+ u64 *root_pt = tdp_iter_root_pt(iter); -+ struct kvm_mmu_page *root = sptep_to_sp(root_pt); -+ int as_id = kvm_mmu_page_as_id(root); -+ -+ lockdep_assert_held_read(&kvm->mmu_lock); -+ -+ if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, -+ new_spte) != iter->old_spte) -+ return false; -+ -+ handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, -+ iter->level, true); -+ -+ return true; -+} -+ -+ - /* - * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping - * @kvm: kvm instance -@@ -456,7 +526,7 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, - WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte); - - __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, -- iter->level); -+ iter->level, false); - if (record_acc_track) - handle_changed_spte_acc_track(iter->old_spte, new_spte, - iter->level); -@@ -631,23 +701,18 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, - int ret = 0; - int make_spte_ret = 0; - -- if (unlikely(is_noslot_pfn(pfn))) { -+ if (unlikely(is_noslot_pfn(pfn))) - new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); -- trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, -- new_spte); -- } else { -+ else - make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, - pfn, iter->old_spte, prefault, true, - map_writable, !shadow_accessed_mask, - &new_spte); -- trace_kvm_mmu_set_spte(iter->level, iter->gfn, -- rcu_dereference(iter->sptep)); -- } - - if (new_spte == iter->old_spte) - ret = RET_PF_SPURIOUS; -- else -- tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); -+ else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte)) -+ return RET_PF_RETRY; - - /* - * If the page fault was caused by a write but the page is write -@@ -661,8 +726,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, - } - - /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ -- if (unlikely(is_mmio_spte(new_spte))) -+ if (unlikely(is_mmio_spte(new_spte))) { -+ trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, -+ new_spte); - ret = RET_PF_EMULATE; -+ } else -+ trace_kvm_mmu_set_spte(iter->level, iter->gfn, -+ rcu_dereference(iter->sptep)); - - trace_kvm_mmu_set_spte(iter->level, iter->gfn, - rcu_dereference(iter->sptep)); -@@ -721,7 +791,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, - */ - if (is_shadow_present_pte(iter.old_spte) && - is_large_pte(iter.old_spte)) { -- tdp_mmu_set_spte(vcpu->kvm, &iter, 0); -+ if (!tdp_mmu_set_spte_atomic(vcpu->kvm, &iter, 0)) -+ break; - - kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, - KVM_PAGES_PER_HPAGE(iter.level)); -@@ -738,19 +809,24 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, - sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); - child_pt = sp->spt; - -- tdp_mmu_link_page(vcpu->kvm, sp, -- huge_page_disallowed && -- req_level >= iter.level); -- - new_spte = make_nonleaf_spte(child_pt, - !shadow_accessed_mask); - -- trace_kvm_mmu_get_page(sp, true); -- tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); -+ if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter, -+ new_spte)) { -+ tdp_mmu_link_page(vcpu->kvm, sp, true, -+ huge_page_disallowed && -+ req_level >= iter.level); -+ -+ trace_kvm_mmu_get_page(sp, true); -+ } else { -+ tdp_mmu_free_sp(sp); -+ break; -+ } - } - } - -- if (WARN_ON(iter.level != level)) { -+ if (iter.level != level) { - rcu_read_unlock(); - return RET_PF_RETRY; - } --- -2.30.1 - diff --git a/queue-5.11/kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch b/queue-5.11/kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch deleted file mode 100644 index 721ccc757f5..00000000000 --- a/queue-5.11/kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 45d6572cbf46381c2814c197536eff72d2768c54 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 2 Feb 2021 10:57:20 -0800 -Subject: KVM: x86/mmu: Yield in TDU MMU iter even if no SPTES changed - -From: Ben Gardon - -[ Upstream commit 1af4a96025b33587ca953c7ef12a1b20c6e70412 ] - -Given certain conditions, some TDP MMU functions may not yield -reliably / frequently enough. For example, if a paging structure was -very large but had few, if any writable entries, wrprot_gfn_range -could traverse many entries before finding a writable entry and yielding -because the check for yielding only happens after an SPTE is modified. - -Fix this issue by moving the yield to the beginning of the loop. - -Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU") -Reviewed-by: Peter Feiner -Signed-off-by: Ben Gardon - -Message-Id: <20210202185734.1680553-15-bgardon@google.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Sasha Levin ---- - arch/x86/kvm/mmu/tdp_mmu.c | 32 ++++++++++++++++++++++---------- - 1 file changed, 22 insertions(+), 10 deletions(-) - -diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c -index a07d37abb63f..0567286fba39 100644 ---- a/arch/x86/kvm/mmu/tdp_mmu.c -+++ b/arch/x86/kvm/mmu/tdp_mmu.c -@@ -470,6 +470,12 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - bool flush_needed = false; - - tdp_root_for_each_pte(iter, root, start, end) { -+ if (can_yield && -+ tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) { -+ flush_needed = false; -+ continue; -+ } -+ - if (!is_shadow_present_pte(iter.old_spte)) - continue; - -@@ -484,9 +490,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - continue; - - tdp_mmu_set_spte(kvm, &iter, 0); -- -- flush_needed = !(can_yield && -- tdp_mmu_iter_cond_resched(kvm, &iter, true)); -+ flush_needed = true; - } - return flush_needed; - } -@@ -850,6 +854,9 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - - for_each_tdp_pte_min_level(iter, root->spt, root->role.level, - min_level, start, end) { -+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false)) -+ continue; -+ - if (!is_shadow_present_pte(iter.old_spte) || - !is_last_spte(iter.old_spte, iter.level)) - continue; -@@ -858,8 +865,6 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - - tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); - spte_set = true; -- -- tdp_mmu_iter_cond_resched(kvm, &iter, false); - } - return spte_set; - } -@@ -903,6 +908,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - bool spte_set = false; - - tdp_root_for_each_leaf_pte(iter, root, start, end) { -+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false)) -+ continue; -+ - if (spte_ad_need_write_protect(iter.old_spte)) { - if (is_writable_pte(iter.old_spte)) - new_spte = iter.old_spte & ~PT_WRITABLE_MASK; -@@ -917,8 +925,6 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - - tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); - spte_set = true; -- -- tdp_mmu_iter_cond_resched(kvm, &iter, false); - } - return spte_set; - } -@@ -1026,6 +1032,9 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - bool spte_set = false; - - tdp_root_for_each_pte(iter, root, start, end) { -+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false)) -+ continue; -+ - if (!is_shadow_present_pte(iter.old_spte)) - continue; - -@@ -1033,8 +1042,6 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - - tdp_mmu_set_spte(kvm, &iter, new_spte); - spte_set = true; -- -- tdp_mmu_iter_cond_resched(kvm, &iter, false); - } - - return spte_set; -@@ -1075,6 +1082,11 @@ static void zap_collapsible_spte_range(struct kvm *kvm, - bool spte_set = false; - - tdp_root_for_each_pte(iter, root, start, end) { -+ if (tdp_mmu_iter_cond_resched(kvm, &iter, spte_set)) { -+ spte_set = false; -+ continue; -+ } -+ - if (!is_shadow_present_pte(iter.old_spte) || - !is_last_spte(iter.old_spte, iter.level)) - continue; -@@ -1087,7 +1099,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm, - - tdp_mmu_set_spte(kvm, &iter, 0); - -- spte_set = !tdp_mmu_iter_cond_resched(kvm, &iter, true); -+ spte_set = true; - } - - if (spte_set) --- -2.30.1 - diff --git a/queue-5.11/series b/queue-5.11/series index 0359584310c..d9c9b362c7d 100644 --- a/queue-5.11/series +++ b/queue-5.11/series @@ -102,22 +102,6 @@ pinctrl-qcom-sc7280-fix-sdc_qdsd_pingroup-and-ufs_reset-offsets.patch pinctrl-qcom-sc7280-fix-sdc1_rclk-configurations.patch pinctrl-qcom-lpass-lpi-use-default-pullup-strength-values.patch pinctrl-qcom-fix-unintentional-string-concatenation.patch -kvm-x86-mmu-change-tdp-mmu-yield-function-returns-to.patch -kvm-x86-mmu-merge-flush-and-non-flush-tdp_mmu_iter_c.patch -kvm-x86-mmu-rename-goal_gfn-to-next_last_level_gfn.patch -kvm-x86-mmu-ensure-forward-progress-when-yielding-in.patch -kvm-x86-mmu-yield-in-tdu-mmu-iter-even-if-no-sptes-c.patch -kvm-x86-mmu-add-lockdep-when-setting-a-tdp-mmu-spte.patch -kvm-x86-mmu-factor-out-handling-of-removed-page-tabl.patch -kvm-x86-mmu-protect-tdp-mmu-page-table-memory-with-r.patch -kvm-x86-mmu-ensure-tlbs-are-flushed-when-yielding-du.patch -kvm-x86-mmu-add-comment-on-__tdp_mmu_set_spte.patch -kvm-x86-mmu-don-t-redundantly-clear-tdp-mmu-pt-memor.patch -kvm-x86-mmu-fix-braces-in-kvm_recover_nx_lpages.patch -kvm-x86-mmu-factor-out-functions-to-add-remove-tdp-m.patch -kvm-x86-mmu-use-atomic-ops-to-set-sptes-in-tdp-mmu-m.patch -kvm-x86-compile-out-tdp-mmu-on-32-bit-systems.patch -kvm-x86-mmu-ensure-tlbs-are-flushed-for-tdp-mmu-duri.patch extcon-add-stubs-for-extcon_register_notifier_all-fu.patch extcon-fix-error-handling-in-extcon_dev_register.patch firmware-stratix10-svc-reset-command_reconfig_flag_p.patch -- 2.47.3