1 From 8764ed55c9705e426d889ff16c26f398bba70b9b Mon Sep 17 00:00:00 2001
2 From: Sean Christopherson <sean.j.christopherson@intel.com>
3 Date: Mon, 29 Apr 2019 07:04:15 -0700
4 Subject: KVM: x86: Whitelist port 0x7e for pre-incrementing %rip
6 From: Sean Christopherson <sean.j.christopherson@intel.com>
8 commit 8764ed55c9705e426d889ff16c26f398bba70b9b upstream.
10 KVM's recent bug fix to update %rip after emulating I/O broke userspace
11 that relied on the previous behavior of incrementing %rip prior to
12 exiting to userspace. When running a Windows XP guest on AMD hardware,
13 Qemu may patch "OUT 0x7E" instructions in reaction to the OUT itself.
14 Because KVM's old behavior was to increment %rip before exiting to
15 userspace to handle the I/O, Qemu manually adjusted %rip to account for
18 Arguably this is a userspace bug as KVM requires userspace to re-enter
19 the kernel to complete instruction emulation before taking any other
20 actions. That being said, this is a bit of a grey area and breaking
21 userspace that has worked for many years is bad.
23 Pre-increment %rip on OUT to port 0x7e before exiting to userspace to
24 hack around the issue.
26 Fixes: 45def77ebf79e ("KVM: x86: update %rip after emulating IO")
27 Reported-by: Simon Becherer <simon@becherer.de>
28 Reported-and-tested-by: Iakov Karpov <srid@rkmail.ru>
29 Reported-by: Gabriele Balducci <balducci@units.it>
30 Reported-by: Antti Antinoja <reader@fennosys.fi>
31 Cc: stable@vger.kernel.org
32 Cc: Takashi Iwai <tiwai@suse.com>
33 Cc: Jiri Slaby <jslaby@suse.com>
34 Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
35 Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
36 Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
37 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
40 arch/x86/include/uapi/asm/kvm.h | 1 +
41 arch/x86/kvm/x86.c | 21 +++++++++++++++++++--
42 2 files changed, 20 insertions(+), 2 deletions(-)
44 --- a/arch/x86/include/uapi/asm/kvm.h
45 +++ b/arch/x86/include/uapi/asm/kvm.h
46 @@ -378,6 +378,7 @@ struct kvm_sync_regs {
47 #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
48 #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
49 #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
50 +#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
52 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001
53 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002
54 --- a/arch/x86/kvm/x86.c
55 +++ b/arch/x86/kvm/x86.c
56 @@ -6328,6 +6328,12 @@ int kvm_emulate_instruction_from_buffer(
58 EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
60 +static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
62 + vcpu->arch.pio.count = 0;
66 static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
68 vcpu->arch.pio.count = 0;
69 @@ -6344,12 +6350,23 @@ static int kvm_fast_pio_out(struct kvm_v
70 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
71 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
78 + * Workaround userspace that relies on old KVM behavior of %rip being
79 + * incremented prior to exiting to userspace to handle "OUT 0x7e".
82 + kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
83 + vcpu->arch.complete_userspace_io =
84 + complete_fast_pio_out_port_0x7e;
85 + kvm_skip_emulated_instruction(vcpu);
87 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
88 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
94 static int complete_fast_pio_in(struct kvm_vcpu *vcpu)