KVM: x86: switch hugepage recovery thread to vhost_task

author Paolo Bonzini <pbonzini@redhat.com>

Fri, 8 Nov 2024 09:56:31 +0000 (04:56 -0500)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 5 Dec 2024 12:54:09 +0000 (13:54 +0100)
author Paolo Bonzini <pbonzini@redhat.com>
Fri, 8 Nov 2024 09:56:31 +0000 (04:56 -0500)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Dec 2024 12:54:09 +0000 (13:54 +0100)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index b4bcd5108079f030f9ebbd310c929a6225282bbd..ab25d289e895504517197d25d947d657e1ed211b 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -26,6 +26,7 @@
  #include <linux/irqbypass.h>
  #include <linux/hyperv.h>
  #include <linux/kfifo.h>
+#include <linux/sched/vhost_task.h>
  
  #include <asm/apic.h>
  #include <asm/pvclock-abi.h>
@@ -1445,7 +1446,8 @@ struct kvm_arch {
         bool sgx_provisioning_allowed;
  
         struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
-       struct task_struct *nx_huge_page_recovery_thread;
+       struct vhost_task *nx_huge_page_recovery_thread;
+       u64 nx_huge_page_last;
  
  #ifdef CONFIG_X86_64
         /* The number of TDP MMU pages across all roots. */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig

index 730c2f34d3479676224d0268a5969002798d98d1..2a0ce65ca9effceb5d7336c39e0fc0622579de0c 100644 (file)
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
         select HAVE_KVM_IRQ_BYPASS
         select HAVE_KVM_IRQ_ROUTING
         select HAVE_KVM_READONLY_MEM
+       select VHOST_TASK
         select KVM_ASYNC_PF
         select USER_RETURN_NOTIFIER
         select KVM_MMIO
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 7813d28b082f2f014dece5572bbfc0d0a49a6525..2da1ec9508a58e439f900a3458a64574f409378b 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7160,7 +7160,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
                         kvm_mmu_zap_all_fast(kvm);
                         mutex_unlock(&kvm->slots_lock);
  
-                       wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
+                       vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
                 }
                 mutex_unlock(&kvm_lock);
         }
@@ -7306,7 +7306,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
                 mutex_lock(&kvm_lock);
  
                 list_for_each_entry(kvm, &vm_list, vm_list)
-                       wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
+                       vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
  
                 mutex_unlock(&kvm_lock);
         }
@@ -7409,62 +7409,56 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
         srcu_read_unlock(&kvm->srcu, rcu_idx);
  }
  
-static long get_nx_huge_page_recovery_timeout(u64 start_time)
+static void kvm_nx_huge_page_recovery_worker_kill(void *data)
  {
-       bool enabled;
-       uint period;
-
-       enabled = calc_nx_huge_pages_recovery_period(&period);
-
-       return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
-                      : MAX_SCHEDULE_TIMEOUT;
  }
  
-static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
+static bool kvm_nx_huge_page_recovery_worker(void *data)
  {
-       u64 start_time;
+       struct kvm *kvm = data;
+       bool enabled;
+       uint period;
         long remaining_time;
  
-       while (true) {
-               start_time = get_jiffies_64();
-               remaining_time = get_nx_huge_page_recovery_timeout(start_time);
-
-               set_current_state(TASK_INTERRUPTIBLE);
-               while (!kthread_should_stop() && remaining_time > 0) {
-                       schedule_timeout(remaining_time);
-                       remaining_time = get_nx_huge_page_recovery_timeout(start_time);
-                       set_current_state(TASK_INTERRUPTIBLE);
-               }
-
-               set_current_state(TASK_RUNNING);
-
-               if (kthread_should_stop())
-                       return 0;
+       enabled = calc_nx_huge_pages_recovery_period(&period);
+       if (!enabled)
+               return false;
  
-               kvm_recover_nx_huge_pages(kvm);
+       remaining_time = kvm->arch.nx_huge_page_last + msecs_to_jiffies(period)
+               - get_jiffies_64();
+       if (remaining_time > 0) {
+               schedule_timeout(remaining_time);
+               /* check for signals and come back */
+               return true;
         }
+
+       __set_current_state(TASK_RUNNING);
+       kvm_recover_nx_huge_pages(kvm);
+       kvm->arch.nx_huge_page_last = get_jiffies_64();
+       return true;
  }
  
  int kvm_mmu_post_init_vm(struct kvm *kvm)
  {
-       int err;
-
         if (nx_hugepage_mitigation_hard_disabled)
                 return 0;
  
-       err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
-                                         "kvm-nx-lpage-recovery",
-                                         &kvm->arch.nx_huge_page_recovery_thread);
-       if (!err)
-               kthread_unpark(kvm->arch.nx_huge_page_recovery_thread);
+       kvm->arch.nx_huge_page_last = get_jiffies_64();
+       kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
+               kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
+               kvm, "kvm-nx-lpage-recovery");
  
-       return err;
+       if (!kvm->arch.nx_huge_page_recovery_thread)
+               return -ENOMEM;
+
+       vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
+       return 0;
  }
  
  void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
  {
         if (kvm->arch.nx_huge_page_recovery_thread)
-               kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
+               vhost_task_stop(kvm->arch.nx_huge_page_recovery_thread);
  }
  
  #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 0d5125a3e31a9db7f3cd74aa885da818ee7963b5..4173a0dc9c661d0f8fe1de771340ca9d378076da 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2370,12 +2370,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
  }
  #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
  
-typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
-
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
-                               uintptr_t data, const char *name,
-                               struct task_struct **thread_ptr);
-
  #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
  static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
  {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 7164a9ece20874e2c7c94bb5c12129b78fe21440..16f0c3566f16141af8f5cfeb5dc6b15838ff6ecc 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -6573,106 +6573,3 @@ void kvm_exit(void)
         kvm_irqfd_exit();
  }
  EXPORT_SYMBOL_GPL(kvm_exit);
-
-struct kvm_vm_worker_thread_context {
-       struct kvm *kvm;
-       struct task_struct *parent;
-       struct completion init_done;
-       kvm_vm_thread_fn_t thread_fn;
-       uintptr_t data;
-       int err;
-};
-
-static int kvm_vm_worker_thread(void *context)
-{
-       /*
-        * The init_context is allocated on the stack of the parent thread, so
-        * we have to locally copy anything that is needed beyond initialization
-        */
-       struct kvm_vm_worker_thread_context *init_context = context;
-       struct task_struct *parent;
-       struct kvm *kvm = init_context->kvm;
-       kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
-       uintptr_t data = init_context->data;
-       int err;
-
-       err = kthread_park(current);
-       /* kthread_park(current) is never supposed to return an error */
-       WARN_ON(err != 0);
-       if (err)
-               goto init_complete;
-
-       err = cgroup_attach_task_all(init_context->parent, current);
-       if (err) {
-               kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
-                       __func__, err);
-               goto init_complete;
-       }
-
-       set_user_nice(current, task_nice(init_context->parent));
-
-init_complete:
-       init_context->err = err;
-       complete(&init_context->init_done);
-       init_context = NULL;
-
-       if (err)
-               goto out;
-
-       /* Wait to be woken up by the spawner before proceeding. */
-       kthread_parkme();
-
-       if (!kthread_should_stop())
-               err = thread_fn(kvm, data);
-
-out:
-       /*
-        * Move kthread back to its original cgroup to prevent it lingering in
-        * the cgroup of the VM process, after the latter finishes its
-        * execution.
-        *
-        * kthread_stop() waits on the 'exited' completion condition which is
-        * set in exit_mm(), via mm_release(), in do_exit(). However, the
-        * kthread is removed from the cgroup in the cgroup_exit() which is
-        * called after the exit_mm(). This causes the kthread_stop() to return
-        * before the kthread actually quits the cgroup.
-        */
-       rcu_read_lock();
-       parent = rcu_dereference(current->real_parent);
-       get_task_struct(parent);
-       rcu_read_unlock();
-       cgroup_attach_task_all(parent, current);
-       put_task_struct(parent);
-
-       return err;
-}
-
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
-                               uintptr_t data, const char *name,
-                               struct task_struct **thread_ptr)
-{
-       struct kvm_vm_worker_thread_context init_context = {};
-       struct task_struct *thread;
-
-       *thread_ptr = NULL;
-       init_context.kvm = kvm;
-       init_context.parent = current;
-       init_context.thread_fn = thread_fn;
-       init_context.data = data;
-       init_completion(&init_context.init_done);
-
-       thread = kthread_run(kvm_vm_worker_thread, &init_context,
-                            "%s-%d", name, task_pid_nr(current));
-       if (IS_ERR(thread))
-               return PTR_ERR(thread);
-
-       /* kthread_run is never supposed to return NULL */
-       WARN_ON(thread == NULL);
-
-       wait_for_completion(&init_context.init_done);
-
-       if (!init_context.err)
-               *thread_ptr = thread;
-
-       return init_context.err;
-}
author	Paolo Bonzini <pbonzini@redhat.com>
	Fri, 8 Nov 2024 09:56:31 +0000 (04:56 -0500)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 5 Dec 2024 12:54:09 +0000 (13:54 +0100)
arch/x86/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/x86/kvm/Kconfig		patch \| blob \| blame \| history
arch/x86/kvm/mmu/mmu.c		patch \| blob \| blame \| history
include/linux/kvm_host.h		patch \| blob \| blame \| history
virt/kvm/kvm_main.c		patch \| blob \| blame \| history