]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
KVM: Allow lockless walk of SPTEs when handing aging mmu_notifier event
authorJames Houghton <jthoughton@google.com>
Tue, 4 Feb 2025 00:40:29 +0000 (00:40 +0000)
committerSean Christopherson <seanjc@google.com>
Fri, 14 Feb 2025 15:16:35 +0000 (07:16 -0800)
It is possible to correctly do aging without taking the KVM MMU lock,
or while taking it for read; add a Kconfig to let architectures do so.
Architectures that select KVM_MMU_LOCKLESS_AGING are responsible for
correctness.

Suggested-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20250204004038.1680123-3-jthoughton@google.com
[sean: massage shortlog+changelog, fix Kconfig goof and shorten name]
Signed-off-by: Sean Christopherson <seanjc@google.com>
include/linux/kvm_host.h
virt/kvm/Kconfig
virt/kvm/kvm_main.c

index f34f4cfaa5134465dc37abab5695800b254ecc77..c28a6aa1f2eda2947a56275e8023293f04bb65e7 100644 (file)
@@ -267,6 +267,7 @@ struct kvm_gfn_range {
        union kvm_mmu_notifier_arg arg;
        enum kvm_gfn_range_filter attr_filter;
        bool may_block;
+       bool lockless;
 };
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
index 54e959e7d68fb4f935a726055ec51a9202f95942..746e1f466aa6478e600a82f9059fb2dca71428ed 100644 (file)
@@ -104,6 +104,10 @@ config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
        depends on KVM_GENERIC_MMU_NOTIFIER
        bool
 
+config KVM_MMU_LOCKLESS_AGING
+       depends on KVM_GENERIC_MMU_NOTIFIER
+       bool
+
 config KVM_GENERIC_MEMORY_ATTRIBUTES
        depends on KVM_GENERIC_MMU_NOTIFIER
        bool
index 0f94349f99e27dac1919f15430251b7bd26f559e..201c14ff476f93031c720de91ecbc718586a5456 100644 (file)
@@ -517,6 +517,7 @@ struct kvm_mmu_notifier_range {
        on_lock_fn_t on_lock;
        bool flush_on_ret;
        bool may_block;
+       bool lockless;
 };
 
 /*
@@ -571,6 +572,10 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
                         IS_KVM_NULL_FN(range->handler)))
                return r;
 
+       /* on_lock will never be called for lockless walks */
+       if (WARN_ON_ONCE(range->lockless && !IS_KVM_NULL_FN(range->on_lock)))
+               return r;
+
        idx = srcu_read_lock(&kvm->srcu);
 
        for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
@@ -607,15 +612,18 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
                        gfn_range.start = hva_to_gfn_memslot(hva_start, slot);
                        gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
                        gfn_range.slot = slot;
+                       gfn_range.lockless = range->lockless;
 
                        if (!r.found_memslot) {
                                r.found_memslot = true;
-                               KVM_MMU_LOCK(kvm);
-                               if (!IS_KVM_NULL_FN(range->on_lock))
-                                       range->on_lock(kvm);
-
-                               if (IS_KVM_NULL_FN(range->handler))
-                                       goto mmu_unlock;
+                               if (!range->lockless) {
+                                       KVM_MMU_LOCK(kvm);
+                                       if (!IS_KVM_NULL_FN(range->on_lock))
+                                               range->on_lock(kvm);
+
+                                       if (IS_KVM_NULL_FN(range->handler))
+                                               goto mmu_unlock;
+                               }
                        }
                        r.ret |= range->handler(kvm, &gfn_range);
                }
@@ -625,7 +633,7 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
                kvm_flush_remote_tlbs(kvm);
 
 mmu_unlock:
-       if (r.found_memslot)
+       if (r.found_memslot && !range->lockless)
                KVM_MMU_UNLOCK(kvm);
 
        srcu_read_unlock(&kvm->srcu, idx);
@@ -647,6 +655,7 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
                .on_lock        = (void *)kvm_null_fn,
                .flush_on_ret   = flush_on_ret,
                .may_block      = false,
+               .lockless       = IS_ENABLED(CONFIG_KVM_MMU_LOCKLESS_AGING),
        };
 
        return kvm_handle_hva_range(kvm, &range).ret;