]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: x86: Allocate/free user_return_msrs at kvm.ko (un)loading time
authorChao Gao <chao.gao@intel.com>
Sat, 8 Nov 2025 01:36:01 +0000 (17:36 -0800)
committerSean Christopherson <seanjc@google.com>
Tue, 18 Nov 2025 23:53:54 +0000 (15:53 -0800)
Move user_return_msrs allocation/free from vendor modules (kvm-intel.ko and
kvm-amd.ko) (un)loading time to kvm.ko's to make it less risky to access
user_return_msrs in kvm.ko. Tying the lifetime of user_return_msrs to
vendor modules makes every access to user_return_msrs prone to
use-after-free issues as vendor modules may be unloaded at any time.

Opportunistically turn the per-CPU variable into full structs, as there's
no practical difference between statically allocating the memory and
allocating it unconditionally during module_init().

Zero out kvm_nr_uret_msrs on vendor module exit to further minimize the
chances of consuming stale data, and WARN on vendor module load if KVM
thinks there are existing user-return MSRs.

Note!  The user-return MSRs also need to be "destroyed" if
ops->hardware_setup() fails, as both SVM and VMX expect common KVM to
clean up (because common code, not vendor code, is responsible for
kvm_nr_uret_msrs).

Signed-off-by: Chao Gao <chao.gao@intel.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Link: https://patch.msgid.link/20251108013601.902918-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/kvm/x86.c

index 03b2121069c4a481c2a85c541581a6479e4cdd5c..be737d9645b9faecaa32813e9ccb2fb174fb6aba 100644 (file)
@@ -209,7 +209,7 @@ struct kvm_user_return_msrs {
 u32 __read_mostly kvm_nr_uret_msrs;
 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_nr_uret_msrs);
 static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
-static struct kvm_user_return_msrs __percpu *user_return_msrs;
+static DEFINE_PER_CPU(struct kvm_user_return_msrs, user_return_msrs);
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
                                | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
@@ -572,25 +572,14 @@ static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
                vcpu->arch.apf.gfns[i] = ~0;
 }
 
-static int kvm_init_user_return_msrs(void)
-{
-       user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
-       if (!user_return_msrs) {
-               pr_err("failed to allocate percpu user_return_msrs\n");
-               return -ENOMEM;
-       }
-       kvm_nr_uret_msrs = 0;
-       return 0;
-}
-
-static void kvm_free_user_return_msrs(void)
+static void kvm_destroy_user_return_msrs(void)
 {
        int cpu;
 
        for_each_possible_cpu(cpu)
-               WARN_ON_ONCE(per_cpu_ptr(user_return_msrs, cpu)->registered);
+               WARN_ON_ONCE(per_cpu(user_return_msrs, cpu).registered);
 
-       free_percpu(user_return_msrs);
+       kvm_nr_uret_msrs = 0;
 }
 
 static void kvm_on_user_return(struct user_return_notifier *urn)
@@ -653,7 +642,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_user_return_msr);
 
 static void kvm_user_return_msr_cpu_online(void)
 {
-       struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
+       struct kvm_user_return_msrs *msrs = this_cpu_ptr(&user_return_msrs);
        u64 value;
        int i;
 
@@ -675,7 +664,7 @@ static void kvm_user_return_register_notifier(struct kvm_user_return_msrs *msrs)
 
 int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
 {
-       struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
+       struct kvm_user_return_msrs *msrs = this_cpu_ptr(&user_return_msrs);
        int err;
 
        value = (value & mask) | (msrs->values[slot].host & ~mask);
@@ -693,13 +682,13 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_user_return_msr);
 
 u64 kvm_get_user_return_msr(unsigned int slot)
 {
-       return this_cpu_ptr(user_return_msrs)->values[slot].curr;
+       return this_cpu_ptr(&user_return_msrs)->values[slot].curr;
 }
 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_user_return_msr);
 
 static void drop_user_return_notifiers(void)
 {
-       struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
+       struct kvm_user_return_msrs *msrs = this_cpu_ptr(&user_return_msrs);
 
        if (msrs->registered)
                kvm_on_user_return(&msrs->urn);
@@ -10022,13 +10011,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
                return -ENOMEM;
        }
 
-       r = kvm_init_user_return_msrs();
-       if (r)
-               goto out_free_x86_emulator_cache;
-
        r = kvm_mmu_vendor_module_init();
        if (r)
-               goto out_free_percpu;
+               goto out_free_x86_emulator_cache;
 
        kvm_caps.supported_vm_types = BIT(KVM_X86_DEFAULT_VM);
        kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P;
@@ -10053,6 +10038,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
        if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
                rdmsrq(MSR_IA32_ARCH_CAPABILITIES, kvm_host.arch_capabilities);
 
+       WARN_ON_ONCE(kvm_nr_uret_msrs);
+
        r = ops->hardware_setup();
        if (r != 0)
                goto out_mmu_exit;
@@ -10125,9 +10112,8 @@ out_unwind_ops:
        kvm_x86_ops.enable_virtualization_cpu = NULL;
        kvm_x86_call(hardware_unsetup)();
 out_mmu_exit:
+       kvm_destroy_user_return_msrs();
        kvm_mmu_vendor_module_exit();
-out_free_percpu:
-       kvm_free_user_return_msrs();
 out_free_x86_emulator_cache:
        kmem_cache_destroy(x86_emulator_cache);
        return r;
@@ -10155,8 +10141,8 @@ void kvm_x86_vendor_exit(void)
        cancel_work_sync(&pvclock_gtod_work);
 #endif
        kvm_x86_call(hardware_unsetup)();
+       kvm_destroy_user_return_msrs();
        kvm_mmu_vendor_module_exit();
-       kvm_free_user_return_msrs();
        kmem_cache_destroy(x86_emulator_cache);
 #ifdef CONFIG_KVM_XEN
        static_key_deferred_flush(&kvm_xen_enabled);