From e53e94b1d3c2408ae5ecb8eca228aa90ebb04afd Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Oct 2017 21:36:51 +0800 Subject: [PATCH] kvm/x86: Avoid async PF preempting the kernel incorrectly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit commit a2b7861bb33b2538420bb5d8554153484d3f961f upstream. Currently, in PREEMPT_COUNT=n kernel, kvm_async_pf_task_wait() could call schedule() to reschedule in some cases. This could result in accidentally ending the current RCU read-side critical section early, causing random memory corruption in the guest, or otherwise preempting the currently running task inside between preempt_disable and preempt_enable. The difficulty to handle this well is because we don't know whether an async PF delivered in a preemptible section or RCU read-side critical section for PREEMPT_COUNT=n, since preempt_disable()/enable() and rcu_read_lock/unlock() are both no-ops in that case. To cure this, we treat any async PF interrupting a kernel context as one that cannot be preempted, preventing kvm_async_pf_task_wait() from choosing the schedule() path in that case. To do so, a second parameter for kvm_async_pf_task_wait() is introduced, so that we know whether it's called from a context interrupting the kernel, and the parameter is set properly in all the callsites. Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Wanpeng Li Signed-off-by: Boqun Feng Signed-off-by: Radim Krčmář [bwh: Backported to 3.2: - Use user_mode_vm() as equivalent to upstream user_mode() - Adjust filename, context] Signed-off-by: Ben Hutchings --- arch/x86/include/asm/kvm_para.h | 4 ++-- arch/x86/kernel/kvm.c | 14 ++++++++++---- arch/x86/kvm/svm.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9f0a680a7e122..22d3fb1e5290d 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -195,13 +195,13 @@ static inline unsigned int kvm_arch_para_features(void) #ifdef CONFIG_KVM_GUEST void __init kvm_guest_init(void); -void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); #else #define kvm_guest_init() do { } while (0) -#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wait(T, I) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline u32 kvm_read_and_reset_pf_reason(void) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e14165d23e19f..9d1121d6b00c2 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -113,7 +113,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, return NULL; } -void kvm_async_pf_task_wait(u32 token) +/* + * @interrupt_kernel: Is this called from a routine which interrupts the kernel + * (other than user space)? + */ +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; @@ -137,8 +141,10 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.halted = idle || preempt_count() > 1 || - rcu_preempt_depth(); + n.halted = idle || + (IS_ENABLED(CONFIG_PREEMPT_COUNT) + ? preempt_count() > 1 || rcu_preempt_depth() + : interrupt_kernel); init_waitqueue_head(&n.wq); hlist_add_head(&n.link, &b->list); spin_unlock(&b->lock); @@ -257,7 +263,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ - kvm_async_pf_task_wait((u32)read_cr2()); + kvm_async_pf_task_wait((u32)read_cr2(), !user_mode_vm(regs)); break; case KVM_PV_REASON_PAGE_READY: kvm_async_pf_task_wake((u32)read_cr2()); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0970c3f0c11de..fba31b73e4bc0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1620,7 +1620,7 @@ static int pf_interception(struct vcpu_svm *svm) case KVM_PV_REASON_PAGE_NOT_PRESENT: svm->apf_reason = 0; local_irq_disable(); - kvm_async_pf_task_wait(fault_address); + kvm_async_pf_task_wait(fault_address, 0); local_irq_enable(); break; case KVM_PV_REASON_PAGE_READY: -- 2.47.2