KVM: x86/mmu: Track possible NX huge pages separately for TDP vs. Shadow MMU

author Vipin Sharma <vipinsh@google.com>

Mon, 7 Jul 2025 22:47:14 +0000 (22:47 +0000)

committer Sean Christopherson <seanjc@google.com>

Tue, 19 Aug 2025 14:39:10 +0000 (07:39 -0700)
author Vipin Sharma <vipinsh@google.com>
Mon, 7 Jul 2025 22:47:14 +0000 (22:47 +0000)
committer Sean Christopherson <seanjc@google.com>
Tue, 19 Aug 2025 14:39:10 +0000 (07:39 -0700)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index f19a76d3ca0ed265f50d8976d386852bd4392596..c038d7cd187d1b23c19cf188f807811f675eed38 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1348,6 +1348,30 @@ enum kvm_apicv_inhibit {
         __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED),     \
         __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
  
+struct kvm_possible_nx_huge_pages {
+       /*
+        * A list of kvm_mmu_page structs that, if zapped, could possibly be
+        * replaced by an NX huge page.  A shadow page is on this list if its
+        * existence disallows an NX huge page (nx_huge_page_disallowed is set)
+        * and there are no other conditions that prevent a huge page, e.g.
+        * the backing host page is huge, dirtly logging is not enabled for its
+        * memslot, etc...  Note, zapping shadow pages on this list doesn't
+        * guarantee an NX huge page will be created in its stead, e.g. if the
+        * guest attempts to execute from the region then KVM obviously can't
+        * create an NX huge page (without hanging the guest).
+        */
+       struct list_head pages;
+       u64 nr_pages;
+};
+
+enum kvm_mmu_type {
+       KVM_SHADOW_MMU,
+#ifdef CONFIG_X86_64
+       KVM_TDP_MMU,
+#endif
+       KVM_NR_MMU_TYPES,
+};
+
  struct kvm_arch {
         unsigned long n_used_mmu_pages;
         unsigned long n_requested_mmu_pages;
@@ -1360,18 +1384,7 @@ struct kvm_arch {
         bool pre_fault_allowed;
         struct hlist_head *mmu_page_hash;
         struct list_head active_mmu_pages;
-       /*
-        * A list of kvm_mmu_page structs that, if zapped, could possibly be
-        * replaced by an NX huge page.  A shadow page is on this list if its
-        * existence disallows an NX huge page (nx_huge_page_disallowed is set)
-        * and there are no other conditions that prevent a huge page, e.g.
-        * the backing host page is huge, dirtly logging is not enabled for its
-        * memslot, etc...  Note, zapping shadow pages on this list doesn't
-        * guarantee an NX huge page will be created in its stead, e.g. if the
-        * guest attempts to execute from the region then KVM obviously can't
-        * create an NX huge page (without hanging the guest).
-        */
-       struct list_head possible_nx_huge_pages;
+       struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES];
  #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
         struct kvm_page_track_notifier_head track_notifier_head;
  #endif
@@ -1526,7 +1539,7 @@ struct kvm_arch {
          * is held in read mode:
          *  - tdp_mmu_roots (above)
          *  - the link field of kvm_mmu_page structs used by the TDP MMU
-        *  - possible_nx_huge_pages;
+        *  - possible_nx_huge_pages[KVM_TDP_MMU];
          *  - the possible_nx_huge_page_link field of kvm_mmu_page structs used
          *    by the TDP MMU
          * Because the lock is only taken within the MMU lock, strictly
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 6e838cb6c9e129c052864f487b1406da1a4e74c8..e0d6579db531c30cfbbddd847dcc0570515fe6b1 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -776,7 +776,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
                 kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K);
  }
  
-void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+                                enum kvm_mmu_type mmu_type)
  {
         /*
          * If it's possible to replace the shadow page with an NX huge page,
@@ -790,8 +791,9 @@ void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
                 return;
  
         ++kvm->stat.nx_lpage_splits;
+       ++kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages;
         list_add_tail(&sp->possible_nx_huge_page_link,
-                     &kvm->arch.possible_nx_huge_pages);
+                     &kvm->arch.possible_nx_huge_pages[mmu_type].pages);
  }
  
  static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
@@ -800,7 +802,7 @@ static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
         sp->nx_huge_page_disallowed = true;
  
         if (nx_huge_page_possible)
-               track_possible_nx_huge_page(kvm, sp);
+               track_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU);
  }
  
  static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -819,12 +821,14 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
         kvm_mmu_gfn_allow_lpage(slot, gfn);
  }
  
-void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+                                  enum kvm_mmu_type mmu_type)
  {
         if (list_empty(&sp->possible_nx_huge_page_link))
                 return;
  
         --kvm->stat.nx_lpage_splits;
+       --kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages;
         list_del_init(&sp->possible_nx_huge_page_link);
  }
  
@@ -832,7 +836,7 @@ static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
  {
         sp->nx_huge_page_disallowed = false;
  
-       untrack_possible_nx_huge_page(kvm, sp);
+       untrack_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU);
  }
  
  static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu,
@@ -6737,11 +6741,12 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
  
  int kvm_mmu_init_vm(struct kvm *kvm)
  {
-       int r;
+       int r, i;
  
         kvm->arch.shadow_mmio_value = shadow_mmio_value;
         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
-       INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
+       for (i = 0; i < KVM_NR_MMU_TYPES; ++i)
+               INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages[i].pages);
         spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
  
         if (tdp_mmu_enabled) {
@@ -7582,16 +7587,32 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
         return err;
  }
  
-static void kvm_recover_nx_huge_pages(struct kvm *kvm)
+static unsigned long nx_huge_pages_to_zap(struct kvm *kvm,
+                                         enum kvm_mmu_type mmu_type)
  {
-       unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
+       unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages);
+       unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+
+       return ratio ? DIV_ROUND_UP(pages, ratio) : 0;
+}
+
+static void kvm_recover_nx_huge_pages(struct kvm *kvm,
+                                     enum kvm_mmu_type mmu_type)
+{
+#ifdef CONFIG_X86_64
+       const bool is_tdp_mmu = mmu_type == KVM_TDP_MMU;
+#else
+       const bool is_tdp_mmu = false;
+#endif
+       unsigned long to_zap = nx_huge_pages_to_zap(kvm, mmu_type);
+       struct list_head *nx_huge_pages;
         struct kvm_memory_slot *slot;
-       int rcu_idx;
         struct kvm_mmu_page *sp;
-       unsigned int ratio;
         LIST_HEAD(invalid_list);
         bool flush = false;
-       ulong to_zap;
+       int rcu_idx;
+
+       nx_huge_pages = &kvm->arch.possible_nx_huge_pages[mmu_type].pages;
  
         rcu_idx = srcu_read_lock(&kvm->srcu);
         write_lock(&kvm->mmu_lock);
@@ -7603,10 +7624,8 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
          */
         rcu_read_lock();
  
-       ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
-       to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
         for ( ; to_zap; --to_zap) {
-               if (list_empty(&kvm->arch.possible_nx_huge_pages))
+               if (list_empty(nx_huge_pages))
                         break;
  
                 /*
@@ -7616,7 +7635,7 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
                  * the total number of shadow pages.  And because the TDP MMU
                  * doesn't use active_mmu_pages.
                  */
-               sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
+               sp = list_first_entry(nx_huge_pages,
                                       struct kvm_mmu_page,
                                       possible_nx_huge_page_link);
                 WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
@@ -7653,7 +7672,7 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
  
                 if (slot && kvm_slot_dirty_track_enabled(slot))
                         unaccount_nx_huge_page(kvm, sp);
-               else if (is_tdp_mmu_page(sp))
+               else if (is_tdp_mmu)
                         flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
                 else
                         kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
@@ -7684,9 +7703,10 @@ static void kvm_nx_huge_page_recovery_worker_kill(void *data)
  static bool kvm_nx_huge_page_recovery_worker(void *data)
  {
         struct kvm *kvm = data;
+       long remaining_time;
         bool enabled;
         uint period;
-       long remaining_time;
+       int i;
  
         enabled = calc_nx_huge_pages_recovery_period(&period);
         if (!enabled)
@@ -7701,7 +7721,8 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
         }
  
         __set_current_state(TASK_RUNNING);
-       kvm_recover_nx_huge_pages(kvm);
+       for (i = 0; i < KVM_NR_MMU_TYPES; ++i)
+               kvm_recover_nx_huge_pages(kvm, i);
         kvm->arch.nx_huge_page_last = get_jiffies_64();
         return true;
  }
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h

index 65f3c89d7c5d24979bb9e0df11c3f96017f9dc47..0b772c758f37a96f210a7786d96057be65a1c7a9 100644 (file)
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -416,7 +416,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
  void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
  void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
  
-void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
-void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+                                enum kvm_mmu_type mmu_type);
+void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+                                  enum kvm_mmu_type mmu_type);
  
  #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

index 7f3d7229b2c1ff268ab5b8e38120fccb17f234d2..48b070f9f4e130e410f6f99846315c46f50b73f2 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -355,7 +355,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
  
         spin_lock(&kvm->arch.tdp_mmu_pages_lock);
         sp->nx_huge_page_disallowed = false;
-       untrack_possible_nx_huge_page(kvm, sp);
+       untrack_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU);
         spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
  }
  
@@ -1303,7 +1303,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
                     fault->req_level >= iter.level) {
                         spin_lock(&kvm->arch.tdp_mmu_pages_lock);
                         if (sp->nx_huge_page_disallowed)
-                               track_possible_nx_huge_page(kvm, sp);
+                               track_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU);
                         spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
                 }
         }
author	Vipin Sharma <vipinsh@google.com>
	Mon, 7 Jul 2025 22:47:14 +0000 (22:47 +0000)
committer	Sean Christopherson <seanjc@google.com>
	Tue, 19 Aug 2025 14:39:10 +0000 (07:39 -0700)
arch/x86/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/x86/kvm/mmu/mmu.c		patch \| blob \| blame \| history
arch/x86/kvm/mmu/mmu_internal.h		patch \| blob \| blame \| history
arch/x86/kvm/mmu/tdp_mmu.c		patch \| blob \| blame \| history