From: Greg Kroah-Hartman Date: Thu, 3 Sep 2009 21:31:28 +0000 (-0700) Subject: kvm patches for .30 X-Git-Tag: v2.6.27.32~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ebfdf911b406681293256b6d12ef8eaa9f6ca27e;p=thirdparty%2Fkernel%2Fstable-queue.git kvm patches for .30 --- diff --git a/queue-2.6.30/kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch b/queue-2.6.30/kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch new file mode 100644 index 00000000000..7dfd864381e --- /dev/null +++ b/queue-2.6.30/kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch @@ -0,0 +1,66 @@ +From mtosatti@redhat.com Thu Sep 3 14:27:33 2009 +From: Marcelo Tosatti +Date: Tue, 1 Sep 2009 12:15:15 -0300 +Subject: KVM: Avoid redelivery of edge interrupt before next edge +To: stable@kernel.org +Cc: avi@redhat.com, Gleb Natapov +Message-ID: <1251818115-22157-7-git-send-email-mtosatti@redhat.com> + + +From: Gleb Natapov + +(cherry picked from commit b4a2f5e723e4f7df46731106faf9e2405673c073) + +The check for an edge is broken in current ioapic code. ioapic->irr is +cleared on each edge interrupt by ioapic_service() and this makes +old_irr != ioapic->irr condition in kvm_ioapic_set_irq() to be always +true. The patch fixes the code to properly recognise edge. + +Some HW emulation calls set_irq() without level change. If each such +call is propagated to an OS it may confuse a device driver. This is the +case with keyboard device emulation and Windows XP x64 installer on SMP VM. +Each keystroke produce two interrupts (down/up) one interrupt is +submitted to CPU0 and another to CPU1. This confuses Windows somehow +and it ignores keystrokes. + +Signed-off-by: Gleb Natapov +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/ioapic.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/virt/kvm/ioapic.c ++++ b/virt/kvm/ioapic.c +@@ -95,8 +95,6 @@ static int ioapic_service(struct kvm_ioa + if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) + pent->fields.remote_irr = 1; + } +- if (!pent->fields.trig_mode) +- ioapic->irr &= ~(1 << idx); + + return injected; + } +@@ -136,7 +134,8 @@ static void ioapic_write_indirect(struct + mask_after = ioapic->redirtbl[index].fields.mask; + if (mask_before != mask_after) + kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); +- if (ioapic->irr & (1 << index)) ++ if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG ++ && ioapic->irr & (1 << index)) + ioapic_service(ioapic, index); + break; + } +@@ -293,9 +292,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic + if (!level) + ioapic->irr &= ~mask; + else { ++ int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + ioapic->irr |= mask; +- if ((!entry.fields.trig_mode && old_irr != ioapic->irr) +- || !entry.fields.remote_irr) ++ if ((edge && old_irr != ioapic->irr) || ++ (!edge && !entry.fields.remote_irr)) + ret = ioapic_service(ioapic, irq); + } + } diff --git a/queue-2.6.30/kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch b/queue-2.6.30/kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch new file mode 100644 index 00000000000..d50e0fb2f79 --- /dev/null +++ b/queue-2.6.30/kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch @@ -0,0 +1,119 @@ +From mtosatti@redhat.com Thu Sep 3 14:23:35 2009 +From: Glauber Costa +Date: Mon, 3 Aug 2009 14:57:53 -0300 +Subject: KVM: Deal with interrupt shadow state for emulated instructions +To: stable@kernel.org +Cc: Glauber Costa , avi@redhat.com, Gleb Natapov , "H. Peter Anvin" +Message-ID: <1249322277-5824-6-git-send-email-mtosatti@redhat.com> + + +From: Glauber Costa + +(cherry picked from commit 310b5d306c1aee7ebe32f702c0e33e7988d50646) + +We currently unblock shadow interrupt state when we skip an instruction, +but failing to do so when we actually emulate one. This blocks interrupts +in key instruction blocks, in particular sti; hlt; sequences + +If the instruction emulated is an sti, we have to block shadow interrupts. +The same goes for mov ss. pop ss also needs it, but we don't currently +emulate it. + +Without this patch, I cannot boot gpxe option roms at vmx machines. +This is described at https://bugzilla.redhat.com/show_bug.cgi?id=494469 + +Signed-off-by: Glauber Costa +CC: H. Peter Anvin +CC: Gleb Natapov +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_x86_emulate.h | 3 +++ + arch/x86/kvm/x86.c | 6 +++++- + arch/x86/kvm/x86_emulate.c | 20 ++++++++++++++++++++ + 3 files changed, 28 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/kvm_x86_emulate.h ++++ b/arch/x86/include/asm/kvm_x86_emulate.h +@@ -155,6 +155,9 @@ struct x86_emulate_ctxt { + int mode; + u32 cs_base; + ++ /* interruptibility state, as a result of execution of STI or MOV SS */ ++ int interruptibility; ++ + /* decode cache */ + struct decode_cache decode; + }; +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -2366,7 +2366,7 @@ int emulate_instruction(struct kvm_vcpu + u16 error_code, + int emulation_type) + { +- int r; ++ int r, shadow_mask; + struct decode_cache *c; + + kvm_clear_exception_queue(vcpu); +@@ -2415,6 +2415,10 @@ int emulate_instruction(struct kvm_vcpu + } + + r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); ++ shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; ++ ++ if (r == 0) ++ kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); + + if (vcpu->arch.pio.string) + return EMULATE_DO_MMIO; +--- a/arch/x86/kvm/x86_emulate.c ++++ b/arch/x86/kvm/x86_emulate.c +@@ -1349,6 +1349,20 @@ static inline int writeback(struct x86_e + return 0; + } + ++void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) ++{ ++ u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); ++ /* ++ * an sti; sti; sequence only disable interrupts for the first ++ * instruction. So, if the last instruction, be it emulated or ++ * not, left the system with the INT_STI flag enabled, it ++ * means that the last instruction is an sti. We should not ++ * leave the flag on in this case. The same goes for mov ss ++ */ ++ if (!(int_shadow & mask)) ++ ctxt->interruptibility = mask; ++} ++ + int + x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) + { +@@ -1360,6 +1374,8 @@ x86_emulate_insn(struct x86_emulate_ctxt + int io_dir_in; + int rc = 0; + ++ ctxt->interruptibility = 0; ++ + /* Shadow copy of register state. Committed on successful emulation. + * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't + * modify them. +@@ -1609,6 +1625,9 @@ special_insn: + int err; + + sel = c->src.val; ++ if (c->modrm_reg == VCPU_SREG_SS) ++ toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); ++ + if (c->modrm_reg <= 5) { + type_bits = (c->modrm_reg == 1) ? 9 : 1; + err = kvm_load_segment_descriptor(ctxt->vcpu, sel, +@@ -1865,6 +1884,7 @@ special_insn: + c->dst.type = OP_NONE; /* Disable writeback. */ + break; + case 0xfb: /* sti */ ++ toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); + ctxt->eflags |= X86_EFLAGS_IF; + c->dst.type = OP_NONE; /* Disable writeback. */ + break; diff --git a/queue-2.6.30/kvm-fix-ack-not-being-delivered-when-msi-present.patch b/queue-2.6.30/kvm-fix-ack-not-being-delivered-when-msi-present.patch new file mode 100644 index 00000000000..b7ea044770e --- /dev/null +++ b/queue-2.6.30/kvm-fix-ack-not-being-delivered-when-msi-present.patch @@ -0,0 +1,56 @@ +From mtosatti@redhat.com Thu Sep 3 14:27:17 2009 +From: Marcelo Tosatti +Date: Tue, 1 Sep 2009 12:15:14 -0300 +Subject: KVM: fix ack not being delivered when msi present +To: stable@kernel.org +Cc: avi@redhat.com, "Michael S. Tsirkin" +Message-ID: <1251818115-22157-6-git-send-email-mtosatti@redhat.com> + + +From: Michael S. Tsirkin + +(cherry picked from commit 5116d8f6b977970ebefc1932c0f313163a6ec91f) + +kvm_notify_acked_irq does not check irq type, so that it sometimes +interprets msi vector as irq. As a result, ack notifiers are not +called, which typially hangs the guest. The fix is to track and +check irq type. + +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/kvm_host.h | 1 + + virt/kvm/irq_comm.c | 4 +++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -110,6 +110,7 @@ struct kvm_memory_slot { + + struct kvm_kernel_irq_routing_entry { + u32 gsi; ++ u32 type; + int (*set)(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level); + union { +--- a/virt/kvm/irq_comm.c ++++ b/virt/kvm/irq_comm.c +@@ -141,7 +141,8 @@ void kvm_notify_acked_irq(struct kvm *kv + unsigned gsi = pin; + + list_for_each_entry(e, &kvm->irq_routing, link) +- if (e->irqchip.irqchip == irqchip && ++ if (e->type == KVM_IRQ_ROUTING_IRQCHIP && ++ e->irqchip.irqchip == irqchip && + e->irqchip.pin == pin) { + gsi = e->gsi; + break; +@@ -240,6 +241,7 @@ static int setup_routing_entry(struct kv + int delta; + + e->gsi = ue->gsi; ++ e->type = ue->type; + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + delta = 0; diff --git a/queue-2.6.30/kvm-fix-cpuid-feature-misreporting.patch b/queue-2.6.30/kvm-fix-cpuid-feature-misreporting.patch new file mode 100644 index 00000000000..a33ee174665 --- /dev/null +++ b/queue-2.6.30/kvm-fix-cpuid-feature-misreporting.patch @@ -0,0 +1,40 @@ +From mtosatti@redhat.com Thu Sep 3 14:24:45 2009 +From: Avi Kivity +Date: Mon, 3 Aug 2009 14:57:57 -0300 +Subject: KVM: Fix cpuid feature misreporting +To: stable@kernel.org +Cc: Avi Kivity +Message-ID: <1249322277-5824-10-git-send-email-mtosatti@redhat.com> + + +From: Avi Kivity + +(cherry picked from commit 8d753f369bd28fff1706ffe9fb9fea4fd88cf85b) + +MTRR, PAT, MCE, and MCA are all supported (to some extent) but not reported. +Vista requires these features, so if userspace relies on kernel cpuid +reporting, it loses support for Vista. + +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1256,9 +1256,12 @@ static void do_cpuid_ent(struct kvm_cpui + bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | + bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | + bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | ++ bit(X86_FEATURE_MCE) | + bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | +- bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) | +- bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | ++ bit(X86_FEATURE_SEP) | bit(X86_FEATURE_MTRR) | ++ bit(X86_FEATURE_PGE) | bit(X86_FEATURE_MCA) | ++ bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PAT) | ++ bit(X86_FEATURE_PSE36) | + bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) | + bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) | + bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP); diff --git a/queue-2.6.30/kvm-fix-kvm_get_msr_index_list.patch b/queue-2.6.30/kvm-fix-kvm_get_msr_index_list.patch new file mode 100644 index 00000000000..2ce61071631 --- /dev/null +++ b/queue-2.6.30/kvm-fix-kvm_get_msr_index_list.patch @@ -0,0 +1,42 @@ +From e125e7b6944898831b56739a5448e705578bf7e2 Mon Sep 17 00:00:00 2001 +From: Jan Kiszka +Date: Thu, 2 Jul 2009 21:45:47 +0200 +Subject: KVM: Fix KVM_GET_MSR_INDEX_LIST + +From: Jan Kiszka + +commit e125e7b6944898831b56739a5448e705578bf7e2 upstream. + +So far, KVM copied the emulated_msrs (only MSR_IA32_MISC_ENABLE) to a +wrong address in user space due to broken pointer arithmetic. This +caused subtle corruption up there (missing MSR_IA32_MISC_ENABLE had +probably no practical relevance). Moreover, the size check for the +user-provided kvm_msr_list forgot about emulated MSRs. + +Signed-off-by: Jan Kiszka +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1117,14 +1117,13 @@ long kvm_arch_dev_ioctl(struct file *fil + if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) + goto out; + r = -E2BIG; +- if (n < num_msrs_to_save) ++ if (n < msr_list.nmsrs) + goto out; + r = -EFAULT; + if (copy_to_user(user_msr_list->indices, &msrs_to_save, + num_msrs_to_save * sizeof(u32))) + goto out; +- if (copy_to_user(user_msr_list->indices +- + num_msrs_to_save * sizeof(u32), ++ if (copy_to_user(user_msr_list->indices + num_msrs_to_save, + &emulated_msrs, + ARRAY_SIZE(emulated_msrs) * sizeof(u32))) + goto out; diff --git a/queue-2.6.30/kvm-ignore-reads-to-k7-evntsel-msrs.patch b/queue-2.6.30/kvm-ignore-reads-to-k7-evntsel-msrs.patch new file mode 100644 index 00000000000..c4453f718b3 --- /dev/null +++ b/queue-2.6.30/kvm-ignore-reads-to-k7-evntsel-msrs.patch @@ -0,0 +1,38 @@ +From mtosatti@redhat.com Thu Sep 3 14:24:25 2009 +From: Amit Shah +Date: Mon, 3 Aug 2009 14:57:56 -0300 +Subject: KVM: Ignore reads to K7 EVNTSEL MSRs +To: stable@kernel.org +Cc: Amit Shah , avi@redhat.com +Message-ID: <1249322277-5824-9-git-send-email-mtosatti@redhat.com> + + +From: Amit Shah + +(cherry picked from commit 9e6996240afcbe61682eab8eeaeb65c34333164d) + +In commit 7fe29e0faacb650d31b9e9f538203a157bec821d we ignored the +reads to the P6 EVNTSEL MSRs. That fixed crashes on Intel machines. + +Ignore the reads to K7 EVNTSEL MSRs as well to fix this on AMD +hosts. + +This fixes Kaspersky antivirus crashing Windows guests on AMD hosts. + +Signed-off-by: Amit Shah +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -900,6 +900,7 @@ int kvm_get_msr_common(struct kvm_vcpu * + case MSR_VM_HSAVE_PA: + case MSR_P6_EVNTSEL0: + case MSR_P6_EVNTSEL1: ++ case MSR_K7_EVNTSEL0: + data = 0; + break; + case MSR_MTRRcap: diff --git a/queue-2.6.30/kvm-introduce-set-get-_interrupt_shadow.patch b/queue-2.6.30/kvm-introduce-set-get-_interrupt_shadow.patch new file mode 100644 index 00000000000..5795610677e --- /dev/null +++ b/queue-2.6.30/kvm-introduce-set-get-_interrupt_shadow.patch @@ -0,0 +1,173 @@ +From mtosatti@redhat.com Thu Sep 3 14:23:20 2009 +From: Glauber Costa +Date: Mon, 3 Aug 2009 14:57:52 -0300 +Subject: KVM: Introduce {set/get}_interrupt_shadow() +To: stable@kernel.org +Cc: Glauber Costa , Marcelo Tosatti , avi@redhat.com +Message-ID: <1249322277-5824-5-git-send-email-mtosatti@redhat.com> + + +From: Glauber Costa + +This patch introduces set/get_interrupt_shadow(), that does exactly +what the name suggests. It also replaces open code that explicitly does +it with the now existent functions. It differs slightly from upstream, +because upstream merged it after gleb's interrupt rework, that we don't +ship. + +Just for reference, upstream changelog is +(2809f5d2c4cfad171167b131bb2a21ab65eba40f): + +This patch replaces drop_interrupt_shadow with the more +general set_interrupt_shadow, that can either drop or raise +it, depending on its parameter. It also adds ->get_interrupt_shadow() +for future use. + +Signed-off-by: Glauber Costa +Signed-off-by: Avi Kivity +Signed-off-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 2 + + arch/x86/include/asm/kvm_x86_emulate.h | 3 ++ + arch/x86/kvm/svm.c | 25 ++++++++++++++++++- + arch/x86/kvm/vmx.c | 42 +++++++++++++++++++++++++-------- + 4 files changed, 62 insertions(+), 10 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -513,6 +513,8 @@ struct kvm_x86_ops { + void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); + int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); + void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); ++ void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); ++ u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); + void (*patch_hypercall)(struct kvm_vcpu *vcpu, + unsigned char *hypercall_addr); + int (*get_irq)(struct kvm_vcpu *vcpu); +--- a/arch/x86/include/asm/kvm_x86_emulate.h ++++ b/arch/x86/include/asm/kvm_x86_emulate.h +@@ -143,6 +143,9 @@ struct decode_cache { + struct fetch_cache fetch; + }; + ++#define X86_SHADOW_INT_MOV_SS 1 ++#define X86_SHADOW_INT_STI 2 ++ + struct x86_emulate_ctxt { + /* Register state before/after emulation. */ + struct kvm_vcpu *vcpu; +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -227,6 +227,27 @@ static int is_external_interrupt(u32 inf + return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); + } + ++static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u32 ret = 0; ++ ++ if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ++ ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; ++ return ret & mask; ++} ++ ++static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (mask == 0) ++ svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; ++ else ++ svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; ++ ++} ++ + static void skip_emulated_instruction(struct kvm_vcpu *vcpu) + { + struct vcpu_svm *svm = to_svm(vcpu); +@@ -240,7 +261,7 @@ static void skip_emulated_instruction(st + __func__, kvm_rip_read(vcpu), svm->next_rip); + + kvm_rip_write(vcpu, svm->next_rip); +- svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; ++ svm_set_interrupt_shadow(vcpu, 0); + + vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK); + } +@@ -2667,6 +2688,8 @@ static struct kvm_x86_ops svm_x86_ops = + .run = svm_vcpu_run, + .handle_exit = handle_exit, + .skip_emulated_instruction = skip_emulated_instruction, ++ .set_interrupt_shadow = svm_set_interrupt_shadow, ++ .get_interrupt_shadow = svm_get_interrupt_shadow, + .patch_hypercall = svm_patch_hypercall, + .get_irq = svm_get_irq, + .set_irq = svm_set_irq, +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -732,23 +732,45 @@ static void vmx_set_rflags(struct kvm_vc + vmcs_writel(GUEST_RFLAGS, rflags); + } + ++static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) ++{ ++ u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); ++ int ret = 0; ++ ++ if (interruptibility & GUEST_INTR_STATE_STI) ++ ret |= X86_SHADOW_INT_STI; ++ if (interruptibility & GUEST_INTR_STATE_MOV_SS) ++ ret |= X86_SHADOW_INT_MOV_SS; ++ ++ return ret & mask; ++} ++ ++static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) ++{ ++ u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); ++ u32 interruptibility = interruptibility_old; ++ ++ interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); ++ ++ if (mask & X86_SHADOW_INT_MOV_SS) ++ interruptibility |= GUEST_INTR_STATE_MOV_SS; ++ if (mask & X86_SHADOW_INT_STI) ++ interruptibility |= GUEST_INTR_STATE_STI; ++ ++ if ((interruptibility != interruptibility_old)) ++ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); ++} ++ + static void skip_emulated_instruction(struct kvm_vcpu *vcpu) + { + unsigned long rip; +- u32 interruptibility; + + rip = kvm_rip_read(vcpu); + rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + kvm_rip_write(vcpu, rip); + +- /* +- * We emulated an instruction, so temporary interrupt blocking +- * should be removed, if set. +- */ +- interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); +- if (interruptibility & 3) +- vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, +- interruptibility & ~3); ++ /* skipping an emulated instruction also counts */ ++ vmx_set_interrupt_shadow(vcpu, 0); + vcpu->arch.interrupt_window_open = 1; + } + +@@ -3738,6 +3760,8 @@ static struct kvm_x86_ops vmx_x86_ops = + .run = vmx_vcpu_run, + .handle_exit = kvm_handle_exit, + .skip_emulated_instruction = skip_emulated_instruction, ++ .set_interrupt_shadow = vmx_set_interrupt_shadow, ++ .get_interrupt_shadow = vmx_get_interrupt_shadow, + .patch_hypercall = vmx_patch_hypercall, + .get_irq = vmx_get_irq, + .set_irq = vmx_inject_irq, diff --git a/queue-2.6.30/kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch b/queue-2.6.30/kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch new file mode 100644 index 00000000000..bf31267ab38 --- /dev/null +++ b/queue-2.6.30/kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch @@ -0,0 +1,52 @@ +From mtosatti@redhat.com Thu Sep 3 14:23:03 2009 +From: Gleb Natapov +Date: Mon, 3 Aug 2009 14:57:51 -0300 +Subject: KVM: MMU: do not free active mmu pages in free_mmu_pages() +To: stable@kernel.org +Cc: avi@redhat.com, Gleb Natapov +Message-ID: <1249322277-5824-4-git-send-email-mtosatti@redhat.com> + + +From: Gleb Natapov + +(cherry picked from commit f00be0cae4e6ad0a8c7be381c6d9be3586800b3e) + +free_mmu_pages() should only undo what alloc_mmu_pages() does. +Free mmu pages from the generic VM destruction function, kvm_destroy_vm(). + +Signed-off-by: Gleb Natapov +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu.c | 8 -------- + virt/kvm/kvm_main.c | 2 ++ + 2 files changed, 2 insertions(+), 8 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -2657,14 +2657,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp); + + static void free_mmu_pages(struct kvm_vcpu *vcpu) + { +- struct kvm_mmu_page *sp; +- +- while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) { +- sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, +- struct kvm_mmu_page, link); +- kvm_mmu_zap_page(vcpu->kvm, sp); +- cond_resched(); +- } + free_page((unsigned long)vcpu->arch.mmu.pae_root); + } + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -881,6 +881,8 @@ static void kvm_destroy_vm(struct kvm *k + #endif + #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); ++#else ++ kvm_arch_flush_shadow(kvm); + #endif + kvm_arch_destroy_vm(kvm); + mmdrop(mm); diff --git a/queue-2.6.30/kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch b/queue-2.6.30/kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch new file mode 100644 index 00000000000..e32333d8d94 --- /dev/null +++ b/queue-2.6.30/kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch @@ -0,0 +1,61 @@ +From mtosatti@redhat.com Thu Sep 3 14:26:28 2009 +From: Marcelo Tosatti +Date: Tue, 1 Sep 2009 12:15:12 -0300 +Subject: KVM: MMU: handle n_free_mmu_pages > n_alloc_mmu_pages in kvm_mmu_change_mmu_pages +To: stable@kernel.org +Cc: Marcelo Tosatti , avi@redhat.com +Message-ID: <1251818115-22157-4-git-send-email-mtosatti@redhat.com> + +From: Marcelo Tosatti + +(cherry picked from commit 025dbbf36a7680bffe54d9dcbf0a8bc01a7cbd10) + +kvm_mmu_change_mmu_pages mishandles the case where n_alloc_mmu_pages is +smaller then n_free_mmu_pages, by not checking if the result of +the subtraction is negative. + +Its a valid condition which can happen if a large number of pages has +been recently freed. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -1417,24 +1417,25 @@ static int kvm_mmu_zap_page(struct kvm * + */ + void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) + { ++ int used_pages; ++ ++ used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; ++ used_pages = max(0, used_pages); ++ + /* + * If we set the number of mmu pages to be smaller be than the + * number of actived pages , we must to free some mmu pages before we + * change the value + */ + +- if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > +- kvm_nr_mmu_pages) { +- int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages +- - kvm->arch.n_free_mmu_pages; +- +- while (n_used_mmu_pages > kvm_nr_mmu_pages) { ++ if (used_pages > kvm_nr_mmu_pages) { ++ while (used_pages > kvm_nr_mmu_pages) { + struct kvm_mmu_page *page; + + page = container_of(kvm->arch.active_mmu_pages.prev, + struct kvm_mmu_page, link); + kvm_mmu_zap_page(kvm, page); +- n_used_mmu_pages--; ++ used_pages--; + } + kvm->arch.n_free_mmu_pages = 0; + } diff --git a/queue-2.6.30/kvm-mmu-limit-rmap-chain-length.patch b/queue-2.6.30/kvm-mmu-limit-rmap-chain-length.patch new file mode 100644 index 00000000000..c69e87657cb --- /dev/null +++ b/queue-2.6.30/kvm-mmu-limit-rmap-chain-length.patch @@ -0,0 +1,109 @@ +From mtosatti@redhat.com Thu Sep 3 14:26:45 2009 +From: Marcelo Tosatti +Date: Tue, 1 Sep 2009 12:15:13 -0300 +Subject: [stable] [PATCH 14/16] KVM: MMU: limit rmap chain length +To: stable@kernel.org +Cc: Marcelo Tosatti , avi@redhat.com +Message-ID: <1251818115-22157-5-git-send-email-mtosatti@redhat.com> + +From: Marcelo Tosatti + +(cherry picked from commit 53a27b39ff4d2492f84b1fdc2f0047175f0b0b93) + +Otherwise the host can spend too long traversing an rmap chain, which +happens under a spinlock. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu.c | 33 ++++++++++++++++++++++++++++----- + 1 file changed, 28 insertions(+), 5 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -490,16 +490,20 @@ static unsigned long *gfn_to_rmap(struct + * + * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc + * containing more mappings. ++ * ++ * Returns the number of rmap entries before the spte was added or zero if ++ * the spte was not added. ++ * + */ +-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) ++static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) + { + struct kvm_mmu_page *sp; + struct kvm_rmap_desc *desc; + unsigned long *rmapp; +- int i; ++ int i, count = 0; + + if (!is_rmap_pte(*spte)) +- return; ++ return count; + gfn = unalias_gfn(vcpu->kvm, gfn); + sp = page_header(__pa(spte)); + sp->gfns[spte - sp->spt] = gfn; +@@ -516,8 +520,10 @@ static void rmap_add(struct kvm_vcpu *vc + } else { + rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); + desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); +- while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) ++ while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) { + desc = desc->more; ++ count += RMAP_EXT; ++ } + if (desc->shadow_ptes[RMAP_EXT-1]) { + desc->more = mmu_alloc_rmap_desc(vcpu); + desc = desc->more; +@@ -526,6 +532,7 @@ static void rmap_add(struct kvm_vcpu *vc + ; + desc->shadow_ptes[i] = spte; + } ++ return count; + } + + static void rmap_desc_remove_entry(unsigned long *rmapp, +@@ -755,6 +762,19 @@ static int kvm_age_rmapp(struct kvm *kvm + return young; + } + ++#define RMAP_RECYCLE_THRESHOLD 1000 ++ ++static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) ++{ ++ unsigned long *rmapp; ++ ++ gfn = unalias_gfn(vcpu->kvm, gfn); ++ rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); ++ ++ kvm_unmap_rmapp(vcpu->kvm, rmapp); ++ kvm_flush_remote_tlbs(vcpu->kvm); ++} ++ + int kvm_age_hva(struct kvm *kvm, unsigned long hva) + { + return kvm_handle_hva(kvm, hva, kvm_age_rmapp); +@@ -1771,6 +1791,7 @@ static void mmu_set_spte(struct kvm_vcpu + { + int was_rmapped = 0; + int was_writeble = is_writeble_pte(*shadow_pte); ++ int rmap_count; + + pgprintk("%s: spte %llx access %x write_fault %d" + " user_fault %d gfn %lx\n", +@@ -1812,9 +1833,11 @@ static void mmu_set_spte(struct kvm_vcpu + + page_header_update_slot(vcpu->kvm, shadow_pte, gfn); + if (!was_rmapped) { +- rmap_add(vcpu, shadow_pte, gfn, largepage); ++ rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage); + if (!is_rmap_pte(*shadow_pte)) + kvm_release_pfn_clean(pfn); ++ if (rmap_count > RMAP_RECYCLE_THRESHOLD) ++ rmap_recycle(vcpu, gfn, largepage); + } else { + if (was_writeble) + kvm_release_pfn_dirty(pfn); diff --git a/queue-2.6.30/kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch b/queue-2.6.30/kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch new file mode 100644 index 00000000000..a82fcdfa2f7 --- /dev/null +++ b/queue-2.6.30/kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch @@ -0,0 +1,85 @@ +From mtosatti@redhat.com Thu Sep 3 14:22:36 2009 +From: mtosatti@redhat.com +Date: Mon, 3 Aug 2009 14:57:50 -0300 +Subject: KVM: MMU: protect kvm_mmu_change_mmu_pages with mmu_lock +To: stable@kernel.org +Cc: Marcelo Tosatti , avi@redhat.com +Message-ID: <1249322277-5824-3-git-send-email-mtosatti@redhat.com> + + +From: Marcelo Tosatti + +(cherry picked from commit 7c8a83b75a38a807d37f5a4398eca2a42c8cf513) + +kvm_handle_hva, called by MMU notifiers, manipulates mmu data only with +the protection of mmu_lock. + +Update kvm_mmu_change_mmu_pages callers to take mmu_lock, thus protecting +against kvm_handle_hva. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu.c | 2 -- + arch/x86/kvm/x86.c | 6 ++++++ + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -2729,7 +2729,6 @@ void kvm_mmu_slot_remove_write_access(st + { + struct kvm_mmu_page *sp; + +- spin_lock(&kvm->mmu_lock); + list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { + int i; + u64 *pt; +@@ -2744,7 +2743,6 @@ void kvm_mmu_slot_remove_write_access(st + pt[i] &= ~PT_WRITABLE_MASK; + } + kvm_flush_remote_tlbs(kvm); +- spin_unlock(&kvm->mmu_lock); + } + + void kvm_mmu_zap_all(struct kvm *kvm) +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1608,10 +1608,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages + return -EINVAL; + + down_write(&kvm->slots_lock); ++ spin_lock(&kvm->mmu_lock); + + kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); + kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; + ++ spin_unlock(&kvm->mmu_lock); + up_write(&kvm->slots_lock); + return 0; + } +@@ -1787,7 +1789,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kv + + /* If nothing is dirty, don't bother messing with page tables. */ + if (is_dirty) { ++ spin_lock(&kvm->mmu_lock); + kvm_mmu_slot_remove_write_access(kvm, log->slot); ++ spin_unlock(&kvm->mmu_lock); + kvm_flush_remote_tlbs(kvm); + memslot = &kvm->memslots[log->slot]; + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; +@@ -4419,12 +4423,14 @@ int kvm_arch_set_memory_region(struct kv + } + } + ++ spin_lock(&kvm->mmu_lock); + if (!kvm->arch.n_requested_mmu_pages) { + unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); + kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); + } + + kvm_mmu_slot_remove_write_access(kvm, mem->slot); ++ spin_unlock(&kvm->mmu_lock); + kvm_flush_remote_tlbs(kvm); + + return 0; diff --git a/queue-2.6.30/kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch b/queue-2.6.30/kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch new file mode 100644 index 00000000000..e9060e7805f --- /dev/null +++ b/queue-2.6.30/kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch @@ -0,0 +1,51 @@ +From mtosatti@redhat.com Thu Sep 3 14:23:53 2009 +From: Avi Kivity +Date: Mon, 3 Aug 2009 14:57:54 -0300 +Subject: KVM: MMU: Use different shadows when EFER.NXE changes +To: stable@kernel.org +Cc: Avi Kivity +Message-ID: <1249322277-5824-7-git-send-email-mtosatti@redhat.com> + + +From: Avi Kivity + +(cherry picked from commit 9645bb56b31a1b70ab9e470387b5264cafc04aa9) + +A pte that is shadowed when the guest EFER.NXE=1 is not valid when +EFER.NXE=0; if bit 63 is set, the pte should cause a fault, and since the +shadow EFER always has NX enabled, this won't happen. + +Fix by using a different shadow page table for different EFER.NXE bits. This +allows vcpus to run correctly with different values of EFER.NXE, and for +transitions on this bit to be handled correctly without requiring a full +flush. + +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 1 + + arch/x86/kvm/x86.c | 3 +++ + 2 files changed, 4 insertions(+) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -185,6 +185,7 @@ union kvm_mmu_page_role { + unsigned access:3; + unsigned invalid:1; + unsigned cr4_pge:1; ++ unsigned nxe:1; + }; + }; + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -523,6 +523,9 @@ static void set_efer(struct kvm_vcpu *vc + efer |= vcpu->arch.shadow_efer & EFER_LMA; + + vcpu->arch.shadow_efer = efer; ++ ++ vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; ++ kvm_mmu_reset_context(vcpu); + } + + void kvm_enable_efer_bits(u64 mask) diff --git a/queue-2.6.30/kvm-svm-force-new-asid-on-vcpu-migration.patch b/queue-2.6.30/kvm-svm-force-new-asid-on-vcpu-migration.patch new file mode 100644 index 00000000000..dc2ef8b088f --- /dev/null +++ b/queue-2.6.30/kvm-svm-force-new-asid-on-vcpu-migration.patch @@ -0,0 +1,59 @@ +From mtosatti@redhat.com Thu Sep 3 14:26:07 2009 +From: Marcelo Tosatti +Date: Tue, 1 Sep 2009 12:15:11 -0300 +Subject: KVM: SVM: force new asid on vcpu migration +To: stable@kernel.org +Cc: Marcelo Tosatti , avi@redhat.com +Message-ID: <1251818115-22157-3-git-send-email-mtosatti@redhat.com> + +From: Marcelo Tosatti + +(cherry picked from commit 4b656b1202498184a0ecef86b3b89ff613b9c6ab) + +If a migrated vcpu matches the asid_generation value of the target pcpu, +there will be no TLB flush via TLB_CONTROL_FLUSH_ALL_ASID. + +The check for vcpu.cpu in pre_svm_run is meaningless since svm_vcpu_load +already updated it on schedule in. + +Such vcpu will VMRUN with stale TLB entries. + +Based on original patch from Joerg Roedel (http://patchwork.kernel.org/patch/10021/) + +Signed-off-by: Marcelo Tosatti +Acked-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -736,6 +736,7 @@ static void svm_vcpu_load(struct kvm_vcp + svm->vmcb->control.tsc_offset += delta; + vcpu->cpu = cpu; + kvm_migrate_timers(vcpu); ++ svm->asid_generation = 0; + } + + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) +@@ -1046,7 +1047,6 @@ static void new_asid(struct vcpu_svm *sv + svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; + } + +- svm->vcpu.cpu = svm_data->cpu; + svm->asid_generation = svm_data->asid_generation; + svm->vmcb->control.asid = svm_data->next_asid++; + } +@@ -2258,8 +2258,8 @@ static void pre_svm_run(struct vcpu_svm + struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); + + svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; +- if (svm->vcpu.cpu != cpu || +- svm->asid_generation != svm_data->asid_generation) ++ /* FIXME: handle wraparound of asid_generation */ ++ if (svm->asid_generation != svm_data->asid_generation) + new_asid(svm, svm_data); + } + diff --git a/queue-2.6.30/kvm-take-mmu_lock-when-updating-a-deleted-slot.patch b/queue-2.6.30/kvm-take-mmu_lock-when-updating-a-deleted-slot.patch new file mode 100644 index 00000000000..6c17061331c --- /dev/null +++ b/queue-2.6.30/kvm-take-mmu_lock-when-updating-a-deleted-slot.patch @@ -0,0 +1,36 @@ +From mtosatti@redhat.com Thu Sep 3 14:22:07 2009 +From: mtosatti@redhat.com +Date: Mon, 3 Aug 2009 14:57:48 -0300 +Subject: KVM: take mmu_lock when updating a deleted slot +To: stable@kernel.org +Cc: Marcelo Tosatti , avi@redhat.com +Message-ID: <1249322277-5824-1-git-send-email-mtosatti@redhat.com> + + +From: Marcelo Tosatti + +(cherry picked from commit b43b1901ad282aeb74161837fb403927102687a1) + +kvm_handle_hva relies on mmu_lock protection to safely access +the memslot structures. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1055,8 +1055,10 @@ int __kvm_set_memory_region(struct kvm * + + kvm_free_physmem_slot(&old, npages ? &new : NULL); + /* Slot deletion case: we have to update the current slot */ ++ spin_lock(&kvm->mmu_lock); + if (!npages) + *memslot = old; ++ spin_unlock(&kvm->mmu_lock); + #ifdef CONFIG_DMAR + /* map the pages in iommu page table */ + r = kvm_iommu_map_pages(kvm, base_gfn, npages); diff --git a/queue-2.6.30/kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch b/queue-2.6.30/kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch new file mode 100644 index 00000000000..39ed49ceff0 --- /dev/null +++ b/queue-2.6.30/kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch @@ -0,0 +1,113 @@ +From mtosatti@redhat.com Thu Sep 3 14:22:22 2009 +From: mtosatti@redhat.com +Date: Mon, 3 Aug 2009 14:57:49 -0300 +Subject: KVM: x86: check for cr3 validity in mmu_alloc_roots +To: stable@kernel.org +Cc: Marcelo Tosatti , avi@redhat.com +Message-ID: <1249322277-5824-2-git-send-email-mtosatti@redhat.com> + + +From: Marcelo Tosatti + +(cherry picked from commit 8986ecc0ef58c96eec48d8502c048f3ab67fd8e2) + +Verify the cr3 address stored in vcpu->arch.cr3 points to an existant +memslot. If not, inject a triple fault. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu.c | 27 +++++++++++++++++++++++---- + arch/x86/kvm/x86.c | 1 + + 2 files changed, 24 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -1942,7 +1942,19 @@ static void mmu_free_roots(struct kvm_vc + vcpu->arch.mmu.root_hpa = INVALID_PAGE; + } + +-static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ++static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) ++{ ++ int ret = 0; ++ ++ if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { ++ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); ++ ret = 1; ++ } ++ ++ return ret; ++} ++ ++static int mmu_alloc_roots(struct kvm_vcpu *vcpu) + { + int i; + gfn_t root_gfn; +@@ -1957,13 +1969,15 @@ static void mmu_alloc_roots(struct kvm_v + ASSERT(!VALID_PAGE(root)); + if (tdp_enabled) + direct = 1; ++ if (mmu_check_root(vcpu, root_gfn)) ++ return 1; + sp = kvm_mmu_get_page(vcpu, root_gfn, 0, + PT64_ROOT_LEVEL, direct, + ACC_ALL, NULL); + root = __pa(sp->spt); + ++sp->root_count; + vcpu->arch.mmu.root_hpa = root; +- return; ++ return 0; + } + direct = !is_paging(vcpu); + if (tdp_enabled) +@@ -1980,6 +1994,8 @@ static void mmu_alloc_roots(struct kvm_v + root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT; + } else if (vcpu->arch.mmu.root_level == 0) + root_gfn = 0; ++ if (mmu_check_root(vcpu, root_gfn)) ++ return 1; + sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, + PT32_ROOT_LEVEL, direct, + ACC_ALL, NULL); +@@ -1988,6 +2004,7 @@ static void mmu_alloc_roots(struct kvm_v + vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; + } + vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); ++ return 0; + } + + static void mmu_sync_roots(struct kvm_vcpu *vcpu) +@@ -2006,7 +2023,7 @@ static void mmu_sync_roots(struct kvm_vc + for (i = 0; i < 4; ++i) { + hpa_t root = vcpu->arch.mmu.pae_root[i]; + +- if (root) { ++ if (root && VALID_PAGE(root)) { + root &= PT64_BASE_ADDR_MASK; + sp = page_header(root); + mmu_sync_children(vcpu, sp); +@@ -2290,9 +2307,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) + goto out; + spin_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_free_some_pages(vcpu); +- mmu_alloc_roots(vcpu); ++ r = mmu_alloc_roots(vcpu); + mmu_sync_roots(vcpu); + spin_unlock(&vcpu->kvm->mmu_lock); ++ if (r) ++ goto out; + kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); + kvm_mmu_flush_tlb(vcpu); + out: +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4433,6 +4433,7 @@ int kvm_arch_set_memory_region(struct kv + void kvm_arch_flush_shadow(struct kvm *kvm) + { + kvm_mmu_zap_all(kvm); ++ kvm_reload_remote_mmus(kvm); + } + + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) diff --git a/queue-2.6.30/kvm-x86-ignore-reads-to-evntsel-msrs.patch b/queue-2.6.30/kvm-x86-ignore-reads-to-evntsel-msrs.patch new file mode 100644 index 00000000000..8d1f06ca819 --- /dev/null +++ b/queue-2.6.30/kvm-x86-ignore-reads-to-evntsel-msrs.patch @@ -0,0 +1,38 @@ +From mtosatti@redhat.com Thu Sep 3 14:24:08 2009 +From: Amit Shah +Date: Mon, 3 Aug 2009 14:57:55 -0300 +Subject: KVM: x86: Ignore reads to EVNTSEL MSRs +To: stable@kernel.org +Cc: Amit Shah , avi@redhat.com +Message-ID: <1249322277-5824-8-git-send-email-mtosatti@redhat.com> + + +From: Amit Shah + +(cherry picked from commit 7fe29e0faacb650d31b9e9f538203a157bec821d) + +We ignore writes to the performance counters and performance event +selector registers already. Kaspersky antivirus reads the eventsel +MSR causing it to crash with the current behaviour. + +Return 0 as data when the eventsel registers are read to stop the +crash. + +Signed-off-by: Amit Shah +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -898,6 +898,8 @@ int kvm_get_msr_common(struct kvm_vcpu * + case MSR_IA32_LASTINTFROMIP: + case MSR_IA32_LASTINTTOIP: + case MSR_VM_HSAVE_PA: ++ case MSR_P6_EVNTSEL0: ++ case MSR_P6_EVNTSEL1: + data = 0; + break; + case MSR_MTRRcap: diff --git a/queue-2.6.30/kvm-x86-verify-mtrr-pat-validity.patch b/queue-2.6.30/kvm-x86-verify-mtrr-pat-validity.patch new file mode 100644 index 00000000000..3c05e5c281b --- /dev/null +++ b/queue-2.6.30/kvm-x86-verify-mtrr-pat-validity.patch @@ -0,0 +1,74 @@ +From mtosatti@redhat.com Thu Sep 3 14:25:49 2009 +From: Marcelo Tosatti +Date: Tue, 1 Sep 2009 12:15:10 -0300 +Subject: KVM: x86: verify MTRR/PAT validity +To: stable@kernel.org +Cc: Marcelo Tosatti , avi@redhat.com +Message-ID: <1251818115-22157-2-git-send-email-mtosatti@redhat.com> + +From: Marcelo Tosatti + +(cherry picked from commit d6289b9365c3f622a8cfe62c4fb054bb70b5061a) + +Do not allow invalid memory types in MTRR/PAT (generating a #GP +otherwise). + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 39 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 38 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -706,11 +706,48 @@ static bool msr_mtrr_valid(unsigned msr) + return false; + } + ++static bool valid_pat_type(unsigned t) ++{ ++ return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ ++} ++ ++static bool valid_mtrr_type(unsigned t) ++{ ++ return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ ++} ++ ++static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) ++{ ++ int i; ++ ++ if (!msr_mtrr_valid(msr)) ++ return false; ++ ++ if (msr == MSR_IA32_CR_PAT) { ++ for (i = 0; i < 8; i++) ++ if (!valid_pat_type((data >> (i * 8)) & 0xff)) ++ return false; ++ return true; ++ } else if (msr == MSR_MTRRdefType) { ++ if (data & ~0xcff) ++ return false; ++ return valid_mtrr_type(data & 0xff); ++ } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { ++ for (i = 0; i < 8 ; i++) ++ if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) ++ return false; ++ return true; ++ } ++ ++ /* variable MTRRs */ ++ return valid_mtrr_type(data & 0xff); ++} ++ + static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) + { + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; + +- if (!msr_mtrr_valid(msr)) ++ if (!mtrr_valid(vcpu, msr, data)) + return 1; + + if (msr == MSR_MTRRdefType) { diff --git a/queue-2.6.30/series b/queue-2.6.30/series index 2c49c1b6a45..0bede0858fe 100644 --- a/queue-2.6.30/series +++ b/queue-2.6.30/series @@ -16,3 +16,20 @@ clone-fix-race-between-copy_process-and-de_thread.patch wmi-fix-kernel-panic-when-stack-protection-enabled.patch sunrpc-fix-rpc_task_force_reencode.patch alsa-hda-fix-macbookpro-3-1-4-1-quirk-with-alc889a.patch +kvm-take-mmu_lock-when-updating-a-deleted-slot.patch +kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch +kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch +kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch +kvm-introduce-set-get-_interrupt_shadow.patch +kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch +kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch +kvm-x86-ignore-reads-to-evntsel-msrs.patch +kvm-ignore-reads-to-k7-evntsel-msrs.patch +kvm-fix-cpuid-feature-misreporting.patch +kvm-x86-verify-mtrr-pat-validity.patch +kvm-svm-force-new-asid-on-vcpu-migration.patch +kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch +kvm-mmu-limit-rmap-chain-length.patch +kvm-fix-ack-not-being-delivered-when-msi-present.patch +kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch +kvm-fix-kvm_get_msr_index_list.patch