kvm patches for .30

author Greg Kroah-Hartman <gregkh@suse.de>

Thu, 3 Sep 2009 21:31:28 +0000 (14:31 -0700)

committer Greg Kroah-Hartman <gregkh@suse.de>

Thu, 3 Sep 2009 21:31:28 +0000 (14:31 -0700)
author Greg Kroah-Hartman <gregkh@suse.de>
Thu, 3 Sep 2009 21:31:28 +0000 (14:31 -0700)
committer Greg Kroah-Hartman <gregkh@suse.de>
Thu, 3 Sep 2009 21:31:28 +0000 (14:31 -0700)
diff --git a/queue-2.6.30/kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch b/queue-2.6.30/kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch

new file mode 100644 (file)

index 0000000..7dfd864
--- /dev/null
+++ b/queue-2.6.30/kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch
@@ -0,0 +1,66 @@
+From mtosatti@redhat.com  Thu Sep  3 14:27:33 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Tue,  1 Sep 2009 12:15:15 -0300
+Subject: KVM: Avoid redelivery of edge interrupt before next edge
+To: stable@kernel.org
+Cc: avi@redhat.com, Gleb Natapov <gleb@redhat.com>
+Message-ID: <1251818115-22157-7-git-send-email-mtosatti@redhat.com>
+
+
+From: Gleb Natapov <gleb@redhat.com>
+
+(cherry picked from commit b4a2f5e723e4f7df46731106faf9e2405673c073)
+
+The check for an edge is broken in current ioapic code. ioapic->irr is
+cleared on each edge interrupt by ioapic_service() and this makes
+old_irr != ioapic->irr condition in kvm_ioapic_set_irq() to be always
+true. The patch fixes the code to properly recognise edge.
+
+Some HW emulation calls set_irq() without level change. If each such
+call is propagated to an OS it may confuse a device driver. This is the
+case with keyboard device emulation and Windows XP x64  installer on SMP VM.
+Each keystroke produce two interrupts (down/up) one interrupt is
+submitted to CPU0 and another to CPU1. This confuses Windows somehow
+and it ignores keystrokes.
+
+Signed-off-by: Gleb Natapov <gleb@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ virt/kvm/ioapic.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/virt/kvm/ioapic.c
++++ b/virt/kvm/ioapic.c
+@@ -95,8 +95,6 @@ static int ioapic_service(struct kvm_ioa
+               if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+                       pent->fields.remote_irr = 1;
+       }
+-      if (!pent->fields.trig_mode)
+-              ioapic->irr &= ~(1 << idx);
+ 
+       return injected;
+ }
+@@ -136,7 +134,8 @@ static void ioapic_write_indirect(struct
+               mask_after = ioapic->redirtbl[index].fields.mask;
+               if (mask_before != mask_after)
+                       kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
+-              if (ioapic->irr & (1 << index))
++              if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG
++                  && ioapic->irr & (1 << index))
+                       ioapic_service(ioapic, index);
+               break;
+       }
+@@ -293,9 +292,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic
+               if (!level)
+                       ioapic->irr &= ~mask;
+               else {
++                      int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+                       ioapic->irr |= mask;
+-                      if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
+-                          || !entry.fields.remote_irr)
++                      if ((edge && old_irr != ioapic->irr) ||
++                          (!edge && !entry.fields.remote_irr))
+                               ret = ioapic_service(ioapic, irq);
+               }
+       }
diff --git a/queue-2.6.30/kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch b/queue-2.6.30/kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch

new file mode 100644 (file)

index 0000000..d50e0fb
--- /dev/null
+++ b/queue-2.6.30/kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch
@@ -0,0 +1,119 @@
+From mtosatti@redhat.com  Thu Sep  3 14:23:35 2009
+From: Glauber Costa <glommer@redhat.com>
+Date: Mon,  3 Aug 2009 14:57:53 -0300
+Subject: KVM: Deal with interrupt shadow state for emulated instructions
+To: stable@kernel.org
+Cc: Glauber Costa <glommer@redhat.com>, avi@redhat.com, Gleb Natapov <gleb@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>
+Message-ID: <1249322277-5824-6-git-send-email-mtosatti@redhat.com>
+
+
+From: Glauber Costa <glommer@redhat.com>
+
+(cherry picked from commit 310b5d306c1aee7ebe32f702c0e33e7988d50646)
+
+We currently unblock shadow interrupt state when we skip an instruction,
+but failing to do so when we actually emulate one. This blocks interrupts
+in key instruction blocks, in particular sti; hlt; sequences
+
+If the instruction emulated is an sti, we have to block shadow interrupts.
+The same goes for mov ss. pop ss also needs it, but we don't currently
+emulate it.
+
+Without this patch, I cannot boot gpxe option roms at vmx machines.
+This is described at https://bugzilla.redhat.com/show_bug.cgi?id=494469
+
+Signed-off-by: Glauber Costa <glommer@redhat.com>
+CC: H. Peter Anvin <hpa@zytor.com>
+CC: Gleb Natapov <gleb@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/include/asm/kvm_x86_emulate.h |    3 +++
+ arch/x86/kvm/x86.c                     |    6 +++++-
+ arch/x86/kvm/x86_emulate.c             |   20 ++++++++++++++++++++
+ 3 files changed, 28 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/kvm_x86_emulate.h
++++ b/arch/x86/include/asm/kvm_x86_emulate.h
+@@ -155,6 +155,9 @@ struct x86_emulate_ctxt {
+       int mode;
+       u32 cs_base;
+ 
++      /* interruptibility state, as a result of execution of STI or MOV SS */
++      int interruptibility;
++
+       /* decode cache */
+       struct decode_cache decode;
+ };
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2366,7 +2366,7 @@ int emulate_instruction(struct kvm_vcpu 
+                       u16 error_code,
+                       int emulation_type)
+ {
+-      int r;
++      int r, shadow_mask;
+       struct decode_cache *c;
+ 
+       kvm_clear_exception_queue(vcpu);
+@@ -2415,6 +2415,10 @@ int emulate_instruction(struct kvm_vcpu 
+       }
+ 
+       r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
++      shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
++
++      if (r == 0)
++              kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
+ 
+       if (vcpu->arch.pio.string)
+               return EMULATE_DO_MMIO;
+--- a/arch/x86/kvm/x86_emulate.c
++++ b/arch/x86/kvm/x86_emulate.c
+@@ -1349,6 +1349,20 @@ static inline int writeback(struct x86_e
+       return 0;
+ }
+ 
++void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
++{
++      u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
++      /*
++       * an sti; sti; sequence only disable interrupts for the first
++       * instruction. So, if the last instruction, be it emulated or
++       * not, left the system with the INT_STI flag enabled, it
++       * means that the last instruction is an sti. We should not
++       * leave the flag on in this case. The same goes for mov ss
++       */
++      if (!(int_shadow & mask))
++              ctxt->interruptibility = mask;
++}
++
+ int
+ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
+ {
+@@ -1360,6 +1374,8 @@ x86_emulate_insn(struct x86_emulate_ctxt
+       int io_dir_in;
+       int rc = 0;
+ 
++      ctxt->interruptibility = 0;
++
+       /* Shadow copy of register state. Committed on successful emulation.
+        * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
+        * modify them.
+@@ -1609,6 +1625,9 @@ special_insn:
+               int err;
+ 
+               sel = c->src.val;
++              if (c->modrm_reg == VCPU_SREG_SS)
++                      toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
++
+               if (c->modrm_reg <= 5) {
+                       type_bits = (c->modrm_reg == 1) ? 9 : 1;
+                       err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
+@@ -1865,6 +1884,7 @@ special_insn:
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
+       case 0xfb: /* sti */
++              toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
+               ctxt->eflags |= X86_EFLAGS_IF;
+               c->dst.type = OP_NONE;  /* Disable writeback. */
+               break;
diff --git a/queue-2.6.30/kvm-fix-ack-not-being-delivered-when-msi-present.patch b/queue-2.6.30/kvm-fix-ack-not-being-delivered-when-msi-present.patch

new file mode 100644 (file)

index 0000000..b7ea044
--- /dev/null
+++ b/queue-2.6.30/kvm-fix-ack-not-being-delivered-when-msi-present.patch
@@ -0,0 +1,56 @@
+From mtosatti@redhat.com  Thu Sep  3 14:27:17 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Tue,  1 Sep 2009 12:15:14 -0300
+Subject: KVM: fix ack not being delivered when msi present
+To: stable@kernel.org
+Cc: avi@redhat.com, "Michael S. Tsirkin" <mst@redhat.com>
+Message-ID: <1251818115-22157-6-git-send-email-mtosatti@redhat.com>
+
+
+From: Michael S. Tsirkin <mst@redhat.com>
+
+(cherry picked from commit 5116d8f6b977970ebefc1932c0f313163a6ec91f)
+
+kvm_notify_acked_irq does not check irq type, so that it sometimes
+interprets msi vector as irq.  As a result, ack notifiers are not
+called, which typially hangs the guest.  The fix is to track and
+check irq type.
+
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/kvm_host.h |    1 +
+ virt/kvm/irq_comm.c      |    4 +++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -110,6 +110,7 @@ struct kvm_memory_slot {
+ 
+ struct kvm_kernel_irq_routing_entry {
+       u32 gsi;
++      u32 type;
+       int (*set)(struct kvm_kernel_irq_routing_entry *e,
+                   struct kvm *kvm, int level);
+       union {
+--- a/virt/kvm/irq_comm.c
++++ b/virt/kvm/irq_comm.c
+@@ -141,7 +141,8 @@ void kvm_notify_acked_irq(struct kvm *kv
+       unsigned gsi = pin;
+ 
+       list_for_each_entry(e, &kvm->irq_routing, link)
+-              if (e->irqchip.irqchip == irqchip &&
++              if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
++                  e->irqchip.irqchip == irqchip &&
+                   e->irqchip.pin == pin) {
+                       gsi = e->gsi;
+                       break;
+@@ -240,6 +241,7 @@ static int setup_routing_entry(struct kv
+       int delta;
+ 
+       e->gsi = ue->gsi;
++      e->type = ue->type;
+       switch (ue->type) {
+       case KVM_IRQ_ROUTING_IRQCHIP:
+               delta = 0;
diff --git a/queue-2.6.30/kvm-fix-cpuid-feature-misreporting.patch b/queue-2.6.30/kvm-fix-cpuid-feature-misreporting.patch

new file mode 100644 (file)

index 0000000..a33ee17
--- /dev/null
+++ b/queue-2.6.30/kvm-fix-cpuid-feature-misreporting.patch
@@ -0,0 +1,40 @@
+From mtosatti@redhat.com  Thu Sep  3 14:24:45 2009
+From: Avi Kivity <avi@redhat.com>
+Date: Mon,  3 Aug 2009 14:57:57 -0300
+Subject: KVM: Fix cpuid feature misreporting
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>
+Message-ID: <1249322277-5824-10-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit 8d753f369bd28fff1706ffe9fb9fea4fd88cf85b)
+
+MTRR, PAT, MCE, and MCA are all supported (to some extent) but not reported.
+Vista requires these features, so if userspace relies on kernel cpuid
+reporting, it loses support for Vista.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1256,9 +1256,12 @@ static void do_cpuid_ent(struct kvm_cpui
+               bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
+               bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
+               bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
++              bit(X86_FEATURE_MCE) |
+               bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
+-              bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
+-              bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
++              bit(X86_FEATURE_SEP) | bit(X86_FEATURE_MTRR) |
++              bit(X86_FEATURE_PGE) | bit(X86_FEATURE_MCA) |
++              bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PAT) |
++              bit(X86_FEATURE_PSE36) |
+               bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
+               bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
+               bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
diff --git a/queue-2.6.30/kvm-fix-kvm_get_msr_index_list.patch b/queue-2.6.30/kvm-fix-kvm_get_msr_index_list.patch

new file mode 100644 (file)

index 0000000..2ce6107
--- /dev/null
+++ b/queue-2.6.30/kvm-fix-kvm_get_msr_index_list.patch
@@ -0,0 +1,42 @@
+From e125e7b6944898831b56739a5448e705578bf7e2 Mon Sep 17 00:00:00 2001
+From: Jan Kiszka <jan.kiszka@web.de>
+Date: Thu, 2 Jul 2009 21:45:47 +0200
+Subject: KVM: Fix KVM_GET_MSR_INDEX_LIST
+
+From: Jan Kiszka <jan.kiszka@web.de>
+
+commit e125e7b6944898831b56739a5448e705578bf7e2 upstream.
+
+So far, KVM copied the emulated_msrs (only MSR_IA32_MISC_ENABLE) to a
+wrong address in user space due to broken pointer arithmetic. This
+caused subtle corruption up there (missing MSR_IA32_MISC_ENABLE had
+probably no practical relevance). Moreover, the size check for the
+user-provided kvm_msr_list forgot about emulated MSRs.
+
+Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/x86.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1117,14 +1117,13 @@ long kvm_arch_dev_ioctl(struct file *fil
+               if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
+                       goto out;
+               r = -E2BIG;
+-              if (n < num_msrs_to_save)
++              if (n < msr_list.nmsrs)
+                       goto out;
+               r = -EFAULT;
+               if (copy_to_user(user_msr_list->indices, &msrs_to_save,
+                                num_msrs_to_save * sizeof(u32)))
+                       goto out;
+-              if (copy_to_user(user_msr_list->indices
+-                               + num_msrs_to_save * sizeof(u32),
++              if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
+                                &emulated_msrs,
+                                ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
+                       goto out;
diff --git a/queue-2.6.30/kvm-ignore-reads-to-k7-evntsel-msrs.patch b/queue-2.6.30/kvm-ignore-reads-to-k7-evntsel-msrs.patch

new file mode 100644 (file)

index 0000000..c4453f7
--- /dev/null
+++ b/queue-2.6.30/kvm-ignore-reads-to-k7-evntsel-msrs.patch
@@ -0,0 +1,38 @@
+From mtosatti@redhat.com  Thu Sep  3 14:24:25 2009
+From: Amit Shah <amit.shah@redhat.com>
+Date: Mon,  3 Aug 2009 14:57:56 -0300
+Subject: KVM: Ignore reads to K7 EVNTSEL MSRs
+To: stable@kernel.org
+Cc: Amit Shah <amit.shah@redhat.com>, avi@redhat.com
+Message-ID: <1249322277-5824-9-git-send-email-mtosatti@redhat.com>
+
+
+From: Amit Shah <amit.shah@redhat.com>
+
+(cherry picked from commit 9e6996240afcbe61682eab8eeaeb65c34333164d)
+
+In commit 7fe29e0faacb650d31b9e9f538203a157bec821d we ignored the
+reads to the P6 EVNTSEL MSRs. That fixed crashes on Intel machines.
+
+Ignore the reads to K7 EVNTSEL MSRs as well to fix this on AMD
+hosts.
+
+This fixes Kaspersky antivirus crashing Windows guests on AMD hosts.
+
+Signed-off-by: Amit Shah <amit.shah@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -900,6 +900,7 @@ int kvm_get_msr_common(struct kvm_vcpu *
+       case MSR_VM_HSAVE_PA:
+       case MSR_P6_EVNTSEL0:
+       case MSR_P6_EVNTSEL1:
++      case MSR_K7_EVNTSEL0:
+               data = 0;
+               break;
+       case MSR_MTRRcap:
diff --git a/queue-2.6.30/kvm-introduce-set-get-_interrupt_shadow.patch b/queue-2.6.30/kvm-introduce-set-get-_interrupt_shadow.patch

new file mode 100644 (file)

index 0000000..5795610
--- /dev/null
+++ b/queue-2.6.30/kvm-introduce-set-get-_interrupt_shadow.patch
@@ -0,0 +1,173 @@
+From mtosatti@redhat.com  Thu Sep  3 14:23:20 2009
+From: Glauber Costa <glommer@redhat.com>
+Date: Mon,  3 Aug 2009 14:57:52 -0300
+Subject: KVM: Introduce {set/get}_interrupt_shadow()
+To: stable@kernel.org
+Cc: Glauber Costa <glommer@redhat.com>, Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1249322277-5824-5-git-send-email-mtosatti@redhat.com>
+
+
+From: Glauber Costa <glommer@redhat.com>
+
+This patch introduces set/get_interrupt_shadow(), that does exactly
+what the name suggests. It also replaces open code that explicitly does
+it with the now existent functions. It differs slightly from upstream,
+because upstream merged it after gleb's interrupt rework, that we don't
+ship.
+
+Just for reference, upstream changelog is
+(2809f5d2c4cfad171167b131bb2a21ab65eba40f):
+
+This patch replaces drop_interrupt_shadow with the more
+general set_interrupt_shadow, that can either drop or raise
+it, depending on its parameter.  It also adds ->get_interrupt_shadow()
+for future use.
+
+Signed-off-by: Glauber Costa <glommer@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/include/asm/kvm_host.h        |    2 +
+ arch/x86/include/asm/kvm_x86_emulate.h |    3 ++
+ arch/x86/kvm/svm.c                     |   25 ++++++++++++++++++-
+ arch/x86/kvm/vmx.c                     |   42 +++++++++++++++++++++++++--------
+ 4 files changed, 62 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -513,6 +513,8 @@ struct kvm_x86_ops {
+       void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
+       int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+       void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
++      void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
++      u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
+       void (*patch_hypercall)(struct kvm_vcpu *vcpu,
+                               unsigned char *hypercall_addr);
+       int (*get_irq)(struct kvm_vcpu *vcpu);
+--- a/arch/x86/include/asm/kvm_x86_emulate.h
++++ b/arch/x86/include/asm/kvm_x86_emulate.h
+@@ -143,6 +143,9 @@ struct decode_cache {
+       struct fetch_cache fetch;
+ };
+ 
++#define X86_SHADOW_INT_MOV_SS  1
++#define X86_SHADOW_INT_STI     2
++
+ struct x86_emulate_ctxt {
+       /* Register state before/after emulation. */
+       struct kvm_vcpu *vcpu;
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -227,6 +227,27 @@ static int is_external_interrupt(u32 inf
+       return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
+ }
+ 
++static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
++{
++      struct vcpu_svm *svm = to_svm(vcpu);
++      u32 ret = 0;
++
++      if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
++              ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
++      return ret & mask;
++}
++
++static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
++{
++      struct vcpu_svm *svm = to_svm(vcpu);
++
++      if (mask == 0)
++              svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
++      else
++              svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
++
++}
++
+ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+@@ -240,7 +261,7 @@ static void skip_emulated_instruction(st
+                      __func__, kvm_rip_read(vcpu), svm->next_rip);
+ 
+       kvm_rip_write(vcpu, svm->next_rip);
+-      svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
++      svm_set_interrupt_shadow(vcpu, 0);
+ 
+       vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
+ }
+@@ -2667,6 +2688,8 @@ static struct kvm_x86_ops svm_x86_ops = 
+       .run = svm_vcpu_run,
+       .handle_exit = handle_exit,
+       .skip_emulated_instruction = skip_emulated_instruction,
++      .set_interrupt_shadow = svm_set_interrupt_shadow,
++      .get_interrupt_shadow = svm_get_interrupt_shadow,
+       .patch_hypercall = svm_patch_hypercall,
+       .get_irq = svm_get_irq,
+       .set_irq = svm_set_irq,
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -732,23 +732,45 @@ static void vmx_set_rflags(struct kvm_vc
+       vmcs_writel(GUEST_RFLAGS, rflags);
+ }
+ 
++static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
++{
++      u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
++      int ret = 0;
++
++      if (interruptibility & GUEST_INTR_STATE_STI)
++              ret |= X86_SHADOW_INT_STI;
++      if (interruptibility & GUEST_INTR_STATE_MOV_SS)
++              ret |= X86_SHADOW_INT_MOV_SS;
++
++      return ret & mask;
++}
++
++static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
++{
++      u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
++      u32 interruptibility = interruptibility_old;
++
++      interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
++
++      if (mask & X86_SHADOW_INT_MOV_SS)
++              interruptibility |= GUEST_INTR_STATE_MOV_SS;
++      if (mask & X86_SHADOW_INT_STI)
++              interruptibility |= GUEST_INTR_STATE_STI;
++
++      if ((interruptibility != interruptibility_old))
++              vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
++}
++
+ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+ {
+       unsigned long rip;
+-      u32 interruptibility;
+ 
+       rip = kvm_rip_read(vcpu);
+       rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+       kvm_rip_write(vcpu, rip);
+ 
+-      /*
+-       * We emulated an instruction, so temporary interrupt blocking
+-       * should be removed, if set.
+-       */
+-      interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+-      if (interruptibility & 3)
+-              vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+-                           interruptibility & ~3);
++      /* skipping an emulated instruction also counts */
++      vmx_set_interrupt_shadow(vcpu, 0);
+       vcpu->arch.interrupt_window_open = 1;
+ }
+ 
+@@ -3738,6 +3760,8 @@ static struct kvm_x86_ops vmx_x86_ops = 
+       .run = vmx_vcpu_run,
+       .handle_exit = kvm_handle_exit,
+       .skip_emulated_instruction = skip_emulated_instruction,
++      .set_interrupt_shadow = vmx_set_interrupt_shadow,
++      .get_interrupt_shadow = vmx_get_interrupt_shadow,
+       .patch_hypercall = vmx_patch_hypercall,
+       .get_irq = vmx_get_irq,
+       .set_irq = vmx_inject_irq,
diff --git a/queue-2.6.30/kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch b/queue-2.6.30/kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch

new file mode 100644 (file)

index 0000000..bf31267
--- /dev/null
+++ b/queue-2.6.30/kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch
@@ -0,0 +1,52 @@
+From mtosatti@redhat.com  Thu Sep  3 14:23:03 2009
+From: Gleb Natapov <gleb@redhat.com>
+Date: Mon,  3 Aug 2009 14:57:51 -0300
+Subject: KVM: MMU: do not free active mmu pages in free_mmu_pages()
+To: stable@kernel.org
+Cc: avi@redhat.com, Gleb Natapov <gleb@redhat.com>
+Message-ID: <1249322277-5824-4-git-send-email-mtosatti@redhat.com>
+
+
+From: Gleb Natapov <gleb@redhat.com>
+
+(cherry picked from commit f00be0cae4e6ad0a8c7be381c6d9be3586800b3e)
+
+free_mmu_pages() should only undo what alloc_mmu_pages() does.
+Free mmu pages from the generic VM destruction function, kvm_destroy_vm().
+
+Signed-off-by: Gleb Natapov <gleb@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c  |    8 --------
+ virt/kvm/kvm_main.c |    2 ++
+ 2 files changed, 2 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2657,14 +2657,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
+ 
+ static void free_mmu_pages(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_mmu_page *sp;
+-
+-      while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
+-              sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
+-                                struct kvm_mmu_page, link);
+-              kvm_mmu_zap_page(vcpu->kvm, sp);
+-              cond_resched();
+-      }
+       free_page((unsigned long)vcpu->arch.mmu.pae_root);
+ }
+ 
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -881,6 +881,8 @@ static void kvm_destroy_vm(struct kvm *k
+ #endif
+ #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+       mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
++#else
++      kvm_arch_flush_shadow(kvm);
+ #endif
+       kvm_arch_destroy_vm(kvm);
+       mmdrop(mm);
diff --git a/queue-2.6.30/kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch b/queue-2.6.30/kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch

new file mode 100644 (file)

index 0000000..e32333d
--- /dev/null
+++ b/queue-2.6.30/kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch
@@ -0,0 +1,61 @@
+From mtosatti@redhat.com  Thu Sep  3 14:26:28 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Tue,  1 Sep 2009 12:15:12 -0300
+Subject: KVM: MMU: handle n_free_mmu_pages > n_alloc_mmu_pages in kvm_mmu_change_mmu_pages
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1251818115-22157-4-git-send-email-mtosatti@redhat.com>
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 025dbbf36a7680bffe54d9dcbf0a8bc01a7cbd10)
+
+kvm_mmu_change_mmu_pages mishandles the case where n_alloc_mmu_pages is
+smaller then n_free_mmu_pages, by not checking if the result of
+the subtraction is negative.
+
+Its a valid condition which can happen if a large number of pages has
+been recently freed.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1417,24 +1417,25 @@ static int kvm_mmu_zap_page(struct kvm *
+  */
+ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
+ {
++      int used_pages;
++
++      used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
++      used_pages = max(0, used_pages);
++
+       /*
+        * If we set the number of mmu pages to be smaller be than the
+        * number of actived pages , we must to free some mmu pages before we
+        * change the value
+        */
+ 
+-      if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) >
+-          kvm_nr_mmu_pages) {
+-              int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
+-                                     - kvm->arch.n_free_mmu_pages;
+-
+-              while (n_used_mmu_pages > kvm_nr_mmu_pages) {
++      if (used_pages > kvm_nr_mmu_pages) {
++              while (used_pages > kvm_nr_mmu_pages) {
+                       struct kvm_mmu_page *page;
+ 
+                       page = container_of(kvm->arch.active_mmu_pages.prev,
+                                           struct kvm_mmu_page, link);
+                       kvm_mmu_zap_page(kvm, page);
+-                      n_used_mmu_pages--;
++                      used_pages--;
+               }
+               kvm->arch.n_free_mmu_pages = 0;
+       }
diff --git a/queue-2.6.30/kvm-mmu-limit-rmap-chain-length.patch b/queue-2.6.30/kvm-mmu-limit-rmap-chain-length.patch

new file mode 100644 (file)

index 0000000..c69e876
--- /dev/null
+++ b/queue-2.6.30/kvm-mmu-limit-rmap-chain-length.patch
@@ -0,0 +1,109 @@
+From mtosatti@redhat.com  Thu Sep  3 14:26:45 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Tue,  1 Sep 2009 12:15:13 -0300
+Subject: [stable] [PATCH 14/16] KVM: MMU: limit rmap chain length
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1251818115-22157-5-git-send-email-mtosatti@redhat.com>
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 53a27b39ff4d2492f84b1fdc2f0047175f0b0b93)
+
+Otherwise the host can spend too long traversing an rmap chain, which
+happens under a spinlock.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c |   33 ++++++++++++++++++++++++++++-----
+ 1 file changed, 28 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -490,16 +490,20 @@ static unsigned long *gfn_to_rmap(struct
+  *
+  * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
+  * containing more mappings.
++ *
++ * Returns the number of rmap entries before the spte was added or zero if
++ * the spte was not added.
++ *
+  */
+-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
++static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
+ {
+       struct kvm_mmu_page *sp;
+       struct kvm_rmap_desc *desc;
+       unsigned long *rmapp;
+-      int i;
++      int i, count = 0;
+ 
+       if (!is_rmap_pte(*spte))
+-              return;
++              return count;
+       gfn = unalias_gfn(vcpu->kvm, gfn);
+       sp = page_header(__pa(spte));
+       sp->gfns[spte - sp->spt] = gfn;
+@@ -516,8 +520,10 @@ static void rmap_add(struct kvm_vcpu *vc
+       } else {
+               rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
+               desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
+-              while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
++              while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
+                       desc = desc->more;
++                      count += RMAP_EXT;
++              }
+               if (desc->shadow_ptes[RMAP_EXT-1]) {
+                       desc->more = mmu_alloc_rmap_desc(vcpu);
+                       desc = desc->more;
+@@ -526,6 +532,7 @@ static void rmap_add(struct kvm_vcpu *vc
+                       ;
+               desc->shadow_ptes[i] = spte;
+       }
++      return count;
+ }
+ 
+ static void rmap_desc_remove_entry(unsigned long *rmapp,
+@@ -755,6 +762,19 @@ static int kvm_age_rmapp(struct kvm *kvm
+       return young;
+ }
+ 
++#define RMAP_RECYCLE_THRESHOLD 1000
++
++static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
++{
++      unsigned long *rmapp;
++
++      gfn = unalias_gfn(vcpu->kvm, gfn);
++      rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
++
++      kvm_unmap_rmapp(vcpu->kvm, rmapp);
++      kvm_flush_remote_tlbs(vcpu->kvm);
++}
++
+ int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+ {
+       return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+@@ -1771,6 +1791,7 @@ static void mmu_set_spte(struct kvm_vcpu
+ {
+       int was_rmapped = 0;
+       int was_writeble = is_writeble_pte(*shadow_pte);
++      int rmap_count;
+ 
+       pgprintk("%s: spte %llx access %x write_fault %d"
+                " user_fault %d gfn %lx\n",
+@@ -1812,9 +1833,11 @@ static void mmu_set_spte(struct kvm_vcpu
+ 
+       page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
+       if (!was_rmapped) {
+-              rmap_add(vcpu, shadow_pte, gfn, largepage);
++              rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
+               if (!is_rmap_pte(*shadow_pte))
+                       kvm_release_pfn_clean(pfn);
++              if (rmap_count > RMAP_RECYCLE_THRESHOLD)
++                      rmap_recycle(vcpu, gfn, largepage);
+       } else {
+               if (was_writeble)
+                       kvm_release_pfn_dirty(pfn);
diff --git a/queue-2.6.30/kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch b/queue-2.6.30/kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch

new file mode 100644 (file)

index 0000000..a82fcdf
--- /dev/null
+++ b/queue-2.6.30/kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch
@@ -0,0 +1,85 @@
+From mtosatti@redhat.com  Thu Sep  3 14:22:36 2009
+From: mtosatti@redhat.com
+Date: Mon,  3 Aug 2009 14:57:50 -0300
+Subject: KVM: MMU: protect kvm_mmu_change_mmu_pages with mmu_lock
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1249322277-5824-3-git-send-email-mtosatti@redhat.com>
+
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 7c8a83b75a38a807d37f5a4398eca2a42c8cf513)
+
+kvm_handle_hva, called by MMU notifiers, manipulates mmu data only with
+the protection of mmu_lock.
+
+Update kvm_mmu_change_mmu_pages callers to take mmu_lock, thus protecting
+against kvm_handle_hva.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c |    2 --
+ arch/x86/kvm/x86.c |    6 ++++++
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2729,7 +2729,6 @@ void kvm_mmu_slot_remove_write_access(st
+ {
+       struct kvm_mmu_page *sp;
+ 
+-      spin_lock(&kvm->mmu_lock);
+       list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
+               int i;
+               u64 *pt;
+@@ -2744,7 +2743,6 @@ void kvm_mmu_slot_remove_write_access(st
+                               pt[i] &= ~PT_WRITABLE_MASK;
+       }
+       kvm_flush_remote_tlbs(kvm);
+-      spin_unlock(&kvm->mmu_lock);
+ }
+ 
+ void kvm_mmu_zap_all(struct kvm *kvm)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1608,10 +1608,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages
+               return -EINVAL;
+ 
+       down_write(&kvm->slots_lock);
++      spin_lock(&kvm->mmu_lock);
+ 
+       kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
+       kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
+ 
++      spin_unlock(&kvm->mmu_lock);
+       up_write(&kvm->slots_lock);
+       return 0;
+ }
+@@ -1787,7 +1789,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+ 
+       /* If nothing is dirty, don't bother messing with page tables. */
+       if (is_dirty) {
++              spin_lock(&kvm->mmu_lock);
+               kvm_mmu_slot_remove_write_access(kvm, log->slot);
++              spin_unlock(&kvm->mmu_lock);
+               kvm_flush_remote_tlbs(kvm);
+               memslot = &kvm->memslots[log->slot];
+               n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+@@ -4419,12 +4423,14 @@ int kvm_arch_set_memory_region(struct kv
+               }
+       }
+ 
++      spin_lock(&kvm->mmu_lock);
+       if (!kvm->arch.n_requested_mmu_pages) {
+               unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
+               kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
+       }
+ 
+       kvm_mmu_slot_remove_write_access(kvm, mem->slot);
++      spin_unlock(&kvm->mmu_lock);
+       kvm_flush_remote_tlbs(kvm);
+ 
+       return 0;
diff --git a/queue-2.6.30/kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch b/queue-2.6.30/kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch

new file mode 100644 (file)

index 0000000..e9060e7
--- /dev/null
+++ b/queue-2.6.30/kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch
@@ -0,0 +1,51 @@
+From mtosatti@redhat.com  Thu Sep  3 14:23:53 2009
+From: Avi Kivity <avi@redhat.com>
+Date: Mon,  3 Aug 2009 14:57:54 -0300
+Subject: KVM: MMU: Use different shadows when EFER.NXE changes
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>
+Message-ID: <1249322277-5824-7-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit 9645bb56b31a1b70ab9e470387b5264cafc04aa9)
+
+A pte that is shadowed when the guest EFER.NXE=1 is not valid when
+EFER.NXE=0; if bit 63 is set, the pte should cause a fault, and since the
+shadow EFER always has NX enabled, this won't happen.
+
+Fix by using a different shadow page table for different EFER.NXE bits.  This
+allows vcpus to run correctly with different values of EFER.NXE, and for
+transitions on this bit to be handled correctly without requiring a full
+flush.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/x86.c              |    3 +++
+ 2 files changed, 4 insertions(+)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -185,6 +185,7 @@ union kvm_mmu_page_role {
+               unsigned access:3;
+               unsigned invalid:1;
+               unsigned cr4_pge:1;
++              unsigned nxe:1;
+       };
+ };
+ 
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -523,6 +523,9 @@ static void set_efer(struct kvm_vcpu *vc
+       efer |= vcpu->arch.shadow_efer & EFER_LMA;
+ 
+       vcpu->arch.shadow_efer = efer;
++
++      vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
++      kvm_mmu_reset_context(vcpu);
+ }
+ 
+ void kvm_enable_efer_bits(u64 mask)
diff --git a/queue-2.6.30/kvm-svm-force-new-asid-on-vcpu-migration.patch b/queue-2.6.30/kvm-svm-force-new-asid-on-vcpu-migration.patch

new file mode 100644 (file)

index 0000000..dc2ef8b
--- /dev/null
+++ b/queue-2.6.30/kvm-svm-force-new-asid-on-vcpu-migration.patch
@@ -0,0 +1,59 @@
+From mtosatti@redhat.com  Thu Sep  3 14:26:07 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Tue,  1 Sep 2009 12:15:11 -0300
+Subject: KVM: SVM: force new asid on vcpu migration
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1251818115-22157-3-git-send-email-mtosatti@redhat.com>
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 4b656b1202498184a0ecef86b3b89ff613b9c6ab)
+
+If a migrated vcpu matches the asid_generation value of the target pcpu,
+there will be no TLB flush via TLB_CONTROL_FLUSH_ALL_ASID.
+
+The check for vcpu.cpu in pre_svm_run is meaningless since svm_vcpu_load
+already updated it on schedule in.
+
+Such vcpu will VMRUN with stale TLB entries.
+
+Based on original patch from Joerg Roedel (http://patchwork.kernel.org/patch/10021/)
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Acked-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/svm.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -736,6 +736,7 @@ static void svm_vcpu_load(struct kvm_vcp
+               svm->vmcb->control.tsc_offset += delta;
+               vcpu->cpu = cpu;
+               kvm_migrate_timers(vcpu);
++              svm->asid_generation = 0;
+       }
+ 
+       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
+@@ -1046,7 +1047,6 @@ static void new_asid(struct vcpu_svm *sv
+               svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+       }
+ 
+-      svm->vcpu.cpu = svm_data->cpu;
+       svm->asid_generation = svm_data->asid_generation;
+       svm->vmcb->control.asid = svm_data->next_asid++;
+ }
+@@ -2258,8 +2258,8 @@ static void pre_svm_run(struct vcpu_svm 
+       struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
+ 
+       svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+-      if (svm->vcpu.cpu != cpu ||
+-          svm->asid_generation != svm_data->asid_generation)
++      /* FIXME: handle wraparound of asid_generation */
++      if (svm->asid_generation != svm_data->asid_generation)
+               new_asid(svm, svm_data);
+ }
+ 
diff --git a/queue-2.6.30/kvm-take-mmu_lock-when-updating-a-deleted-slot.patch b/queue-2.6.30/kvm-take-mmu_lock-when-updating-a-deleted-slot.patch

new file mode 100644 (file)

index 0000000..6c17061
--- /dev/null
+++ b/queue-2.6.30/kvm-take-mmu_lock-when-updating-a-deleted-slot.patch
@@ -0,0 +1,36 @@
+From mtosatti@redhat.com  Thu Sep  3 14:22:07 2009
+From: mtosatti@redhat.com
+Date: Mon,  3 Aug 2009 14:57:48 -0300
+Subject: KVM: take mmu_lock when updating a deleted slot
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1249322277-5824-1-git-send-email-mtosatti@redhat.com>
+
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit b43b1901ad282aeb74161837fb403927102687a1)
+
+kvm_handle_hva relies on mmu_lock protection to safely access
+the memslot structures.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ virt/kvm/kvm_main.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1055,8 +1055,10 @@ int __kvm_set_memory_region(struct kvm *
+ 
+       kvm_free_physmem_slot(&old, npages ? &new : NULL);
+       /* Slot deletion case: we have to update the current slot */
++      spin_lock(&kvm->mmu_lock);
+       if (!npages)
+               *memslot = old;
++      spin_unlock(&kvm->mmu_lock);
+ #ifdef CONFIG_DMAR
+       /* map the pages in iommu page table */
+       r = kvm_iommu_map_pages(kvm, base_gfn, npages);
diff --git a/queue-2.6.30/kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch b/queue-2.6.30/kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch

new file mode 100644 (file)

index 0000000..39ed49c
--- /dev/null
+++ b/queue-2.6.30/kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch
@@ -0,0 +1,113 @@
+From mtosatti@redhat.com  Thu Sep  3 14:22:22 2009
+From: mtosatti@redhat.com
+Date: Mon,  3 Aug 2009 14:57:49 -0300
+Subject: KVM: x86: check for cr3 validity in mmu_alloc_roots
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1249322277-5824-2-git-send-email-mtosatti@redhat.com>
+
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 8986ecc0ef58c96eec48d8502c048f3ab67fd8e2)
+
+Verify the cr3 address stored in vcpu->arch.cr3 points to an existant
+memslot. If not, inject a triple fault.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c |   27 +++++++++++++++++++++++----
+ arch/x86/kvm/x86.c |    1 +
+ 2 files changed, 24 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1942,7 +1942,19 @@ static void mmu_free_roots(struct kvm_vc
+       vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+ }
+ 
+-static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
++static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
++{
++      int ret = 0;
++
++      if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
++              set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
++              ret = 1;
++      }
++
++      return ret;
++}
++
++static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
+ {
+       int i;
+       gfn_t root_gfn;
+@@ -1957,13 +1969,15 @@ static void mmu_alloc_roots(struct kvm_v
+               ASSERT(!VALID_PAGE(root));
+               if (tdp_enabled)
+                       direct = 1;
++              if (mmu_check_root(vcpu, root_gfn))
++                      return 1;
+               sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+                                     PT64_ROOT_LEVEL, direct,
+                                     ACC_ALL, NULL);
+               root = __pa(sp->spt);
+               ++sp->root_count;
+               vcpu->arch.mmu.root_hpa = root;
+-              return;
++              return 0;
+       }
+       direct = !is_paging(vcpu);
+       if (tdp_enabled)
+@@ -1980,6 +1994,8 @@ static void mmu_alloc_roots(struct kvm_v
+                       root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
+               } else if (vcpu->arch.mmu.root_level == 0)
+                       root_gfn = 0;
++              if (mmu_check_root(vcpu, root_gfn))
++                      return 1;
+               sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
+                                     PT32_ROOT_LEVEL, direct,
+                                     ACC_ALL, NULL);
+@@ -1988,6 +2004,7 @@ static void mmu_alloc_roots(struct kvm_v
+               vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+       }
+       vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
++      return 0;
+ }
+ 
+ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
+@@ -2006,7 +2023,7 @@ static void mmu_sync_roots(struct kvm_vc
+       for (i = 0; i < 4; ++i) {
+               hpa_t root = vcpu->arch.mmu.pae_root[i];
+ 
+-              if (root) {
++              if (root && VALID_PAGE(root)) {
+                       root &= PT64_BASE_ADDR_MASK;
+                       sp = page_header(root);
+                       mmu_sync_children(vcpu, sp);
+@@ -2290,9 +2307,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
+               goto out;
+       spin_lock(&vcpu->kvm->mmu_lock);
+       kvm_mmu_free_some_pages(vcpu);
+-      mmu_alloc_roots(vcpu);
++      r = mmu_alloc_roots(vcpu);
+       mmu_sync_roots(vcpu);
+       spin_unlock(&vcpu->kvm->mmu_lock);
++      if (r)
++              goto out;
+       kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+       kvm_mmu_flush_tlb(vcpu);
+ out:
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4433,6 +4433,7 @@ int kvm_arch_set_memory_region(struct kv
+ void kvm_arch_flush_shadow(struct kvm *kvm)
+ {
+       kvm_mmu_zap_all(kvm);
++      kvm_reload_remote_mmus(kvm);
+ }
+ 
+ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
diff --git a/queue-2.6.30/kvm-x86-ignore-reads-to-evntsel-msrs.patch b/queue-2.6.30/kvm-x86-ignore-reads-to-evntsel-msrs.patch

new file mode 100644 (file)

index 0000000..8d1f06c
--- /dev/null
+++ b/queue-2.6.30/kvm-x86-ignore-reads-to-evntsel-msrs.patch
@@ -0,0 +1,38 @@
+From mtosatti@redhat.com  Thu Sep  3 14:24:08 2009
+From: Amit Shah <amit.shah@redhat.com>
+Date: Mon,  3 Aug 2009 14:57:55 -0300
+Subject: KVM: x86: Ignore reads to EVNTSEL MSRs
+To: stable@kernel.org
+Cc: Amit Shah <amit.shah@redhat.com>, avi@redhat.com
+Message-ID: <1249322277-5824-8-git-send-email-mtosatti@redhat.com>
+
+
+From: Amit Shah <amit.shah@redhat.com>
+
+(cherry picked from commit 7fe29e0faacb650d31b9e9f538203a157bec821d)
+
+We ignore writes to the performance counters and performance event
+selector registers already. Kaspersky antivirus reads the eventsel
+MSR causing it to crash with the current behaviour.
+
+Return 0 as data when the eventsel registers are read to stop the
+crash.
+
+Signed-off-by: Amit Shah <amit.shah@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -898,6 +898,8 @@ int kvm_get_msr_common(struct kvm_vcpu *
+       case MSR_IA32_LASTINTFROMIP:
+       case MSR_IA32_LASTINTTOIP:
+       case MSR_VM_HSAVE_PA:
++      case MSR_P6_EVNTSEL0:
++      case MSR_P6_EVNTSEL1:
+               data = 0;
+               break;
+       case MSR_MTRRcap:
diff --git a/queue-2.6.30/kvm-x86-verify-mtrr-pat-validity.patch b/queue-2.6.30/kvm-x86-verify-mtrr-pat-validity.patch

new file mode 100644 (file)

index 0000000..3c05e5c
--- /dev/null
+++ b/queue-2.6.30/kvm-x86-verify-mtrr-pat-validity.patch
@@ -0,0 +1,74 @@
+From mtosatti@redhat.com  Thu Sep  3 14:25:49 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Tue,  1 Sep 2009 12:15:10 -0300
+Subject: KVM: x86: verify MTRR/PAT validity
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1251818115-22157-2-git-send-email-mtosatti@redhat.com>
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit d6289b9365c3f622a8cfe62c4fb054bb70b5061a)
+
+Do not allow invalid memory types in MTRR/PAT (generating a #GP
+otherwise).
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c |   39 ++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 38 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -706,11 +706,48 @@ static bool msr_mtrr_valid(unsigned msr)
+       return false;
+ }
+ 
++static bool valid_pat_type(unsigned t)
++{
++      return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
++}
++
++static bool valid_mtrr_type(unsigned t)
++{
++      return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
++}
++
++static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
++{
++      int i;
++
++      if (!msr_mtrr_valid(msr))
++              return false;
++
++      if (msr == MSR_IA32_CR_PAT) {
++              for (i = 0; i < 8; i++)
++                      if (!valid_pat_type((data >> (i * 8)) & 0xff))
++                              return false;
++              return true;
++      } else if (msr == MSR_MTRRdefType) {
++              if (data & ~0xcff)
++                      return false;
++              return valid_mtrr_type(data & 0xff);
++      } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
++              for (i = 0; i < 8 ; i++)
++                      if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
++                              return false;
++              return true;
++      }
++
++      /* variable MTRRs */
++      return valid_mtrr_type(data & 0xff);
++}
++
+ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+ {
+       u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+ 
+-      if (!msr_mtrr_valid(msr))
++      if (!mtrr_valid(vcpu, msr, data))
+               return 1;
+ 
+       if (msr == MSR_MTRRdefType) {
diff --git a/queue-2.6.30/series b/queue-2.6.30/series

index 2c49c1b6a45ef113d2e7405c7706b929855482bd..0bede0858fe0924eb566ecc1407df5789e3efcf3 100644 (file)
--- a/queue-2.6.30/series
+++ b/queue-2.6.30/series
@@ -16,3 +16,20 @@ clone-fix-race-between-copy_process-and-de_thread.patch
  wmi-fix-kernel-panic-when-stack-protection-enabled.patch
  sunrpc-fix-rpc_task_force_reencode.patch
  alsa-hda-fix-macbookpro-3-1-4-1-quirk-with-alc889a.patch
+kvm-take-mmu_lock-when-updating-a-deleted-slot.patch
+kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch
+kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch
+kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch
+kvm-introduce-set-get-_interrupt_shadow.patch
+kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch
+kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch
+kvm-x86-ignore-reads-to-evntsel-msrs.patch
+kvm-ignore-reads-to-k7-evntsel-msrs.patch
+kvm-fix-cpuid-feature-misreporting.patch
+kvm-x86-verify-mtrr-pat-validity.patch
+kvm-svm-force-new-asid-on-vcpu-migration.patch
+kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch
+kvm-mmu-limit-rmap-chain-length.patch
+kvm-fix-ack-not-being-delivered-when-msi-present.patch
+kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch
+kvm-fix-kvm_get_msr_index_list.patch
author	Greg Kroah-Hartman <gregkh@suse.de>
	Thu, 3 Sep 2009 21:31:28 +0000 (14:31 -0700)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Thu, 3 Sep 2009 21:31:28 +0000 (14:31 -0700)
queue-2.6.30/kvm-avoid-redelivery-of-edge-interrupt-before-next-edge.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-deal-with-interrupt-shadow-state-for-emulated-instructions.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-fix-ack-not-being-delivered-when-msi-present.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-fix-cpuid-feature-misreporting.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-fix-kvm_get_msr_index_list.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-ignore-reads-to-k7-evntsel-msrs.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-introduce-set-get-_interrupt_shadow.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-mmu-handle-n_free_mmu_pages-n_alloc_mmu_pages-in-kvm_mmu_change_mmu_pages.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-mmu-limit-rmap-chain-length.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-mmu-use-different-shadows-when-efer.nxe-changes.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-svm-force-new-asid-on-vcpu-migration.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-take-mmu_lock-when-updating-a-deleted-slot.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-x86-ignore-reads-to-evntsel-msrs.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/kvm-x86-verify-mtrr-pat-validity.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.30/series		patch \| blob \| blame \| history