--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:36:07 2009
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Thu, 6 Aug 2009 14:39:53 -0300
+Subject: KVM: add MC5_MISC msr read support
+To: stable@kernel.org
+Cc: Joerg Roedel <joerg.roedel@amd.com>, Avi Kivity <avi@redhat.com>
+Message-ID: <1249580407-21883-14-git-send-email-mtosatti@redhat.com>
+
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+(cherry picked from commit a89c1ad270ca7ad0eec2667bc754362ce7b142be)
+
+Currently KVM implements MC0-MC4_MISC read support. When booting Linux this
+results in KVM warnings in the kernel log when the guest tries to read
+MC5_MISC. Fix this warnings with this patch.
+
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -752,6 +752,7 @@ int kvm_get_msr_common(struct kvm_vcpu *
+ case MSR_IA32_MC0_MISC+8:
+ case MSR_IA32_MC0_MISC+12:
+ case MSR_IA32_MC0_MISC+16:
++ case MSR_IA32_MC0_MISC+20:
+ case MSR_IA32_UCODE_REV:
+ case MSR_IA32_EBL_CR_POWERON:
+ data = 0;
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:33:18 2009
+From: Avi Kivity <avi@qumranet.com>
+Date: Thu, 6 Aug 2009 14:39:44 -0300
+Subject: KVM: Allocate guest memory as MAP_PRIVATE, not MAP_SHARED
+To: stable@kernel.org
+Cc: avi@redhat.com
+Message-ID: <1249580407-21883-5-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@qumranet.com>
+
+(cherry picked from commit acee3c04e8208c17aad1baff99baa68d71640a19)
+
+There is no reason to share internal memory slots with fork()ed instances.
+
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4003,7 +4003,7 @@ int kvm_arch_set_memory_region(struct kv
+ userspace_addr = do_mmap(NULL, 0,
+ npages * PAGE_SIZE,
+ PROT_READ | PROT_WRITE,
+- MAP_SHARED | MAP_ANONYMOUS,
++ MAP_PRIVATE | MAP_ANONYMOUS,
+ 0);
+ up_write(¤t->mm->mmap_sem);
+
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:33:31 2009
+From: Avi Kivity <avi@qumranet.com>
+Date: Thu, 6 Aug 2009 14:39:45 -0300
+Subject: KVM: Don't call get_user_pages(.force = 1)
+To: stable@kernel.org
+Cc: avi@redhat.com
+Message-ID: <1249580407-21883-6-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@qumranet.com>
+
+(cherry picked from commit d657c7335b97d746aa6123c56504b46c20e37df3)
+
+This is esoteric and only needed to break COW on MAP_SHARED mappings. Since
+KVM no longer does these sorts of mappings, breaking COW on them is no longer
+necessary.
+
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ virt/kvm/kvm_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -726,7 +726,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t
+ return page_to_pfn(bad_page);
+ }
+
+- npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
++ npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
+ NULL);
+
+ if (unlikely(npages != 1)) {
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:37:31 2009
+From: Glauber Costa <glommer@redhat.com>
+Date: Thu, 6 Aug 2009 14:39:57 -0300
+Subject: KVM: Don't destroy vcpu in case vcpu_setup fails
+To: stable@kernel.org
+Cc: Glauber Costa <glommer@redhat.com>, avi@redhat.com
+Message-ID: <1249580407-21883-18-git-send-email-mtosatti@redhat.com>
+
+
+From: Glauber Costa <glommer@redhat.com>
+
+(cherry picked from commit 7d8fece678c1abc2ca3e1ceda2277c3538a9161c)
+
+One of vcpu_setup responsibilities is to do mmu initialization.
+However, in case we fail in kvm_arch_vcpu_reset, before we get the
+chance to init mmu. OTOH, vcpu_destroy will attempt to destroy mmu,
+triggering a bug. Keeping track of whether or not mmu is initialized
+would unnecessarily complicate things. Rather, we just make return,
+making sure any needed uninitialization is done before we return, in
+case we fail.
+
+Signed-off-by: Glauber Costa <glommer@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ virt/kvm/kvm_main.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1074,12 +1074,11 @@ static int kvm_vm_ioctl_create_vcpu(stru
+
+ r = kvm_arch_vcpu_setup(vcpu);
+ if (r)
+- goto vcpu_destroy;
++ return r;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->vcpus[n]) {
+ r = -EEXIST;
+- mutex_unlock(&kvm->lock);
+ goto vcpu_destroy;
+ }
+ kvm->vcpus[n] = vcpu;
+@@ -1095,8 +1094,8 @@ static int kvm_vm_ioctl_create_vcpu(stru
+ unlink:
+ mutex_lock(&kvm->lock);
+ kvm->vcpus[n] = NULL;
+- mutex_unlock(&kvm->lock);
+ vcpu_destroy:
++ mutex_unlock(&kvm->lock);
+ kvm_arch_vcpu_destroy(vcpu);
+ return r;
+ }
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:40:03 2009
+From: Izik Eidus <ieidus@redhat.com>
+Date: Thu, 6 Aug 2009 14:40:05 -0300
+Subject: KVM: Fix dirty bit tracking for slots with large pages
+To: stable@kernel.org
+Cc: Izik Eidus <ieidus@redhat.com>, avi@redhat.com
+Message-ID: <1249580407-21883-26-git-send-email-mtosatti@redhat.com>
+
+
+From: Izik Eidus <ieidus@redhat.com>
+
+(cherry picked from commit e244584fe3a5c20deddeca246548ac86dbc6e1d1)
+
+When slot is already allocated and being asked to be tracked we need
+to break the large pages.
+
+This code flush the mmu when someone ask a slot to start dirty bit
+tracking.
+
+Signed-off-by: Izik Eidus <ieidus@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ virt/kvm/kvm_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -550,6 +550,8 @@ int __kvm_set_memory_region(struct kvm *
+ if (!new.dirty_bitmap)
+ goto out_free;
+ memset(new.dirty_bitmap, 0, dirty_bytes);
++ if (old.npages)
++ kvm_arch_flush_shadow(kvm);
+ }
+ #endif /* not defined CONFIG_S390 */
+
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:39:30 2009
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 6 Aug 2009 14:40:03 -0300
+Subject: KVM: Fix PDPTR reloading on CR4 writes
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>
+Message-ID: <1249580407-21883-24-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit a2edf57f510cce6a389cc14e58c6ad0a4296d6f9)
+
+The processor is documented to reload the PDPTRs while in PAE mode if any
+of the CR4 bits PSE, PGE, or PAE change. Linux relies on this
+behaviour when zapping the low mappings of PAE kernels during boot.
+
+The code already handled changes to CR4.PAE; augment it to also notice changes
+to PSE and PGE.
+
+This triggered while booting an F11 PAE kernel; the futex initialization code
+runs before any CR3 reloads and writes to a NULL pointer; the futex subsystem
+ended up uninitialized, killing PI futexes and pulseaudio which uses them.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -318,6 +318,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
+
+ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
++ unsigned long old_cr4 = vcpu->arch.cr4;
++ unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
++
+ if (cr4 & CR4_RESERVED_BITS) {
+ printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
+ kvm_inject_gp(vcpu, 0);
+@@ -331,7 +334,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu,
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+- } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
++ } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
++ && ((cr4 ^ old_cr4) & pdptr_bits)
+ && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
+ printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
+ kvm_inject_gp(vcpu, 0);
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:33:00 2009
+From: Avi Kivity <avi@qumranet.com>
+Date: Thu, 6 Aug 2009 14:39:43 -0300
+Subject: KVM: Load real mode segments correctly
+To: stable@kernel.org
+Cc: avi@redhat.com
+Message-ID: <1249580407-21883-4-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@qumranet.com>
+
+(cherry picked from commit f4bbd9aaaae23007e4d79536d35a30cbbb11d407)
+
+Real mode segments to not reference the GDT or LDT; they simply compute
+base = selector * 16.
+
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3294,11 +3294,33 @@ static int load_segment_descriptor_to_kv
+ return 0;
+ }
+
++int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
++{
++ struct kvm_segment segvar = {
++ .base = selector << 4,
++ .limit = 0xffff,
++ .selector = selector,
++ .type = 3,
++ .present = 1,
++ .dpl = 3,
++ .db = 0,
++ .s = 1,
++ .l = 0,
++ .g = 0,
++ .avl = 0,
++ .unusable = 0,
++ };
++ kvm_x86_ops->set_segment(vcpu, &segvar, seg);
++ return 0;
++}
++
+ int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+ int type_bits, int seg)
+ {
+ struct kvm_segment kvm_seg;
+
++ if (!(vcpu->arch.cr0 & X86_CR0_PE))
++ return kvm_load_realmode_segment(vcpu, selector, seg);
+ if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
+ return 1;
+ kvm_seg.type |= type_bits;
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:38:23 2009
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 6 Aug 2009 14:40:00 -0300
+Subject: KVM: Make EFER reads safe when EFER does not exist
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>
+Message-ID: <1249580407-21883-21-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit e286e86e6d2042d67d09244aa0e05ffef75c9d54)
+
+Some processors don't have EFER; don't oops if userspace wants us to
+read EFER when we check NX.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -983,9 +983,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *
+
+ static int is_efer_nx(void)
+ {
+- u64 efer;
++ unsigned long long efer = 0;
+
+- rdmsrl(MSR_EFER, efer);
++ rdmsrl_safe(MSR_EFER, &efer);
+ return efer & EFER_NX;
+ }
+
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:39:10 2009
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 6 Aug 2009 14:40:02 -0300
+Subject: KVM: Make paravirt tlb flush also reload the PAE PDPTRs
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>
+Message-ID: <1249580407-21883-23-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit a8cd0244e9cebcf9b358d24c7e7410062f3665cb)
+
+The paravirt tlb flush may be used not only to flush TLBs, but also
+to reload the four page-directory-pointer-table entries, as it is used
+as a replacement for reloading CR3. Change the code to do the entire
+CR3 reloading dance instead of simply flushing the TLB.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2233,7 +2233,7 @@ static int kvm_pv_mmu_write(struct kvm_v
+
+ static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
+ {
+- kvm_x86_ops->tlb_flush(vcpu);
++ kvm_set_cr3(vcpu, vcpu->arch.cr3);
+ return 1;
+ }
+
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:33:46 2009
+From: Avi Kivity <avi@qumranet.com>
+Date: Thu, 6 Aug 2009 14:39:46 -0300
+Subject: KVM: MMU: Add locking around kvm_mmu_slot_remove_write_access()
+To: stable@kernel.org
+Cc: avi@redhat.com
+Message-ID: <1249580407-21883-7-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@qumranet.com>
+
+(cherry picked from commit 2245a28fe2e6fdb1bdabc4dcde1ea3a5c37e2a9e)
+
+It was generally safe due to slots_lock being held for write, but it wasn't
+very nice.
+
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2055,6 +2055,7 @@ void kvm_mmu_slot_remove_write_access(st
+ {
+ struct kvm_mmu_page *sp;
+
++ spin_lock(&kvm->mmu_lock);
+ list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
+ int i;
+ u64 *pt;
+@@ -2068,6 +2069,7 @@ void kvm_mmu_slot_remove_write_access(st
+ if (pt[i] & PT_WRITABLE_MASK)
+ pt[i] &= ~PT_WRITABLE_MASK;
+ }
++ spin_unlock(&kvm->mmu_lock);
+ }
+
+ void kvm_mmu_zap_all(struct kvm *kvm)
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:39:46 2009
+From: Gleb Natapov <gleb@redhat.com>
+Date: Thu, 6 Aug 2009 14:40:04 -0300
+Subject: KVM: MMU: do not free active mmu pages in free_mmu_pages()
+To: stable@kernel.org
+Cc: avi@redhat.com, Gleb Natapov <gleb@redhat.com>
+Message-ID: <1249580407-21883-25-git-send-email-mtosatti@redhat.com>
+
+
+From: Gleb Natapov <gleb@redhat.com>
+
+(cherry picked from commit f00be0cae4e6ad0a8c7be381c6d9be3586800b3e)
+
+free_mmu_pages() should only undo what alloc_mmu_pages() does.
+Free mmu pages from the generic VM destruction function, kvm_destroy_vm().
+
+Signed-off-by: Gleb Natapov <gleb@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 8 --------
+ virt/kvm/kvm_main.c | 2 ++
+ 2 files changed, 2 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1976,14 +1976,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
+
+ static void free_mmu_pages(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_mmu_page *sp;
+-
+- while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
+- sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
+- struct kvm_mmu_page, link);
+- kvm_mmu_zap_page(vcpu->kvm, sp);
+- cond_resched();
+- }
+ free_page((unsigned long)vcpu->arch.mmu.pae_root);
+ }
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -406,6 +406,8 @@ static void kvm_destroy_vm(struct kvm *k
+ #endif
+ #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
++#else
++ kvm_arch_flush_shadow(kvm);
+ #endif
+ kvm_arch_destroy_vm(kvm);
+ mmdrop(mm);
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:34:15 2009
+From: Avi Kivity <avi@qumranet.com>
+Date: Thu, 6 Aug 2009 14:39:48 -0300
+Subject: KVM: MMU: Fix setting the accessed bit on non-speculative sptes
+To: stable@kernel.org
+Cc: avi@redhat.com
+Message-ID: <1249580407-21883-9-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@qumranet.com>
+
+(cherry picked from commit 3201b5d9f0f7ef392886cd76dcd2c69186d9d5cd)
+
+The accessed bit was accidentally turned on in a random flag word, rather
+than, the spte itself, which was lucky, since it used the non-EPT compatible
+PT_ACCESSED_MASK.
+
+Fix by turning the bit on in the spte and changing it to use the portable
+accessed mask.
+
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1162,7 +1162,7 @@ static void mmu_set_spte(struct kvm_vcpu
+ */
+ spte = shadow_base_present_pte | shadow_dirty_mask;
+ if (!speculative)
+- pte_access |= PT_ACCESSED_MASK;
++ spte |= shadow_accessed_mask;
+ if (!dirty)
+ pte_access &= ~ACC_WRITE_MASK;
+ if (pte_access & ACC_EXEC_MASK)
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:34:01 2009
+From: Avi Kivity <avi@qumranet.com>
+Date: Thu, 6 Aug 2009 14:39:47 -0300
+Subject: KVM: MMU: Flush tlbs after clearing write permission when accessing dirty log
+To: stable@kernel.org
+Cc: avi@redhat.com
+Message-ID: <1249580407-21883-8-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@qumranet.com>
+
+(cherry picked from commit 171d595d3b3254b9a952af8d1f6965d2e85dcbaa)
+
+Otherwise, the cpu may allow writes to the tracked pages, and we lose
+some display bits or fail to migrate correctly.
+
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2069,6 +2069,7 @@ void kvm_mmu_slot_remove_write_access(st
+ if (pt[i] & PT_WRITABLE_MASK)
+ pt[i] &= ~PT_WRITABLE_MASK;
+ }
++ kvm_flush_remote_tlbs(kvm);
+ spin_unlock(&kvm->mmu_lock);
+ }
+
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:36:47 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Thu, 6 Aug 2009 14:39:55 -0300
+Subject: KVM: MMU: increase per-vcpu rmap cache alloc size
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1249580407-21883-16-git-send-email-mtosatti@redhat.com>
+
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit c41ef344de212bd918f7765af21b5008628c03e0)
+
+The page fault path can use two rmap_desc structures, if:
+
+- walk_addr's dirty pte update allocates one rmap_desc.
+- mmu_lock is dropped, sptes are zapped resulting in rmap_desc being
+ freed.
+- fetch->mmu_set_spte allocates another rmap_desc.
+
+Increase to 4 for safety.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -298,7 +298,7 @@ static int mmu_topup_memory_caches(struc
+ if (r)
+ goto out;
+ r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
+- rmap_desc_cache, 1);
++ rmap_desc_cache, 4);
+ if (r)
+ goto out;
+ r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:40:37 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Thu, 6 Aug 2009 14:40:07 -0300
+Subject: KVM: MMU: protect kvm_mmu_change_mmu_pages with mmu_lock
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1249580407-21883-28-git-send-email-mtosatti@redhat.com>
+
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 7c8a83b75a38a807d37f5a4398eca2a42c8cf513)
+
+kvm_handle_hva, called by MMU notifiers, manipulates mmu data only with
+the protection of mmu_lock.
+
+Update kvm_mmu_change_mmu_pages callers to take mmu_lock, thus protecting
+against kvm_handle_hva.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 2 --
+ arch/x86/kvm/x86.c | 6 ++++++
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2059,7 +2059,6 @@ void kvm_mmu_slot_remove_write_access(st
+ {
+ struct kvm_mmu_page *sp;
+
+- spin_lock(&kvm->mmu_lock);
+ list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
+ int i;
+ u64 *pt;
+@@ -2074,7 +2073,6 @@ void kvm_mmu_slot_remove_write_access(st
+ pt[i] &= ~PT_WRITABLE_MASK;
+ }
+ kvm_flush_remote_tlbs(kvm);
+- spin_unlock(&kvm->mmu_lock);
+ }
+
+ void kvm_mmu_zap_all(struct kvm *kvm)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1454,10 +1454,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages
+ return -EINVAL;
+
+ down_write(&kvm->slots_lock);
++ spin_lock(&kvm->mmu_lock);
+
+ kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
+ kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
+
++ spin_unlock(&kvm->mmu_lock);
+ up_write(&kvm->slots_lock);
+ return 0;
+ }
+@@ -1624,7 +1626,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+
+ /* If nothing is dirty, don't bother messing with page tables. */
+ if (is_dirty) {
++ spin_lock(&kvm->mmu_lock);
+ kvm_mmu_slot_remove_write_access(kvm, log->slot);
++ spin_unlock(&kvm->mmu_lock);
+ kvm_flush_remote_tlbs(kvm);
+ memslot = &kvm->memslots[log->slot];
+ n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+@@ -4059,12 +4063,14 @@ int kvm_arch_set_memory_region(struct kv
+ }
+ }
+
++ spin_lock(&kvm->mmu_lock);
+ if (!kvm->arch.n_requested_mmu_pages) {
+ unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
+ kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
+ }
+
+ kvm_mmu_slot_remove_write_access(kvm, mem->slot);
++ spin_unlock(&kvm->mmu_lock);
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:34:31 2009
+From: Dave Hansen <dave@linux.vnet.ibm.com>
+Date: Thu, 6 Aug 2009 14:39:49 -0300
+Subject: KVM: Reduce kvm stack usage in kvm_arch_vm_ioctl()
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>, Dave Hansen <dave@linux.vnet.ibm.com>
+Message-ID: <1249580407-21883-10-git-send-email-mtosatti@redhat.com>
+
+
+From: Dave Hansen <dave@linux.vnet.ibm.com>
+
+(cherry picked from commit f0d662759a2465babdba1160749c446648c9d159)
+
+On my machine with gcc 3.4, kvm uses ~2k of stack in a few
+select functions. This is mostly because gcc fails to
+notice that the different case: statements could have their
+stack usage combined. It overflows very nicely if interrupts
+happen during one of these large uses.
+
+This patch uses two methods for reducing stack usage.
+1. dynamically allocate large objects instead of putting
+ on the stack.
+2. Use a union{} member for all of the case variables. This
+ tricks gcc into combining them all into a single stack
+ allocation. (There's also a comment on this)
+
+Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c | 72 +++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 45 insertions(+), 27 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1630,6 +1630,15 @@ long kvm_arch_vm_ioctl(struct file *filp
+ struct kvm *kvm = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int r = -EINVAL;
++ /*
++ * This union makes it completely explicit to gcc-3.x
++ * that these two variables' stack usage should be
++ * combined, not added together.
++ */
++ union {
++ struct kvm_pit_state ps;
++ struct kvm_memory_alias alias;
++ } u;
+
+ switch (ioctl) {
+ case KVM_SET_TSS_ADDR:
+@@ -1661,17 +1670,14 @@ long kvm_arch_vm_ioctl(struct file *filp
+ case KVM_GET_NR_MMU_PAGES:
+ r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
+ break;
+- case KVM_SET_MEMORY_ALIAS: {
+- struct kvm_memory_alias alias;
+-
++ case KVM_SET_MEMORY_ALIAS:
+ r = -EFAULT;
+- if (copy_from_user(&alias, argp, sizeof alias))
++ if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
+ goto out;
+- r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
++ r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
+ if (r)
+ goto out;
+ break;
+- }
+ case KVM_CREATE_IRQCHIP:
+ r = -ENOMEM;
+ kvm->arch.vpic = kvm_create_pic(kvm);
+@@ -1713,65 +1719,77 @@ long kvm_arch_vm_ioctl(struct file *filp
+ }
+ case KVM_GET_IRQCHIP: {
+ /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+- struct kvm_irqchip chip;
++ struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
+
+- r = -EFAULT;
+- if (copy_from_user(&chip, argp, sizeof chip))
++ r = -ENOMEM;
++ if (!chip)
+ goto out;
++ r = -EFAULT;
++ if (copy_from_user(chip, argp, sizeof *chip))
++ goto get_irqchip_out;
+ r = -ENXIO;
+ if (!irqchip_in_kernel(kvm))
+- goto out;
+- r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
++ goto get_irqchip_out;
++ r = kvm_vm_ioctl_get_irqchip(kvm, chip);
+ if (r)
+- goto out;
++ goto get_irqchip_out;
+ r = -EFAULT;
+- if (copy_to_user(argp, &chip, sizeof chip))
+- goto out;
++ if (copy_to_user(argp, chip, sizeof *chip))
++ goto get_irqchip_out;
+ r = 0;
++ get_irqchip_out:
++ kfree(chip);
++ if (r)
++ goto out;
+ break;
+ }
+ case KVM_SET_IRQCHIP: {
+ /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+- struct kvm_irqchip chip;
++ struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
+
+- r = -EFAULT;
+- if (copy_from_user(&chip, argp, sizeof chip))
++ r = -ENOMEM;
++ if (!chip)
+ goto out;
++ r = -EFAULT;
++ if (copy_from_user(chip, argp, sizeof *chip))
++ goto set_irqchip_out;
+ r = -ENXIO;
+ if (!irqchip_in_kernel(kvm))
+- goto out;
+- r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
++ goto set_irqchip_out;
++ r = kvm_vm_ioctl_set_irqchip(kvm, chip);
+ if (r)
+- goto out;
++ goto set_irqchip_out;
+ r = 0;
++ set_irqchip_out:
++ kfree(chip);
++ if (r)
++ goto out;
+ break;
+ }
+ case KVM_GET_PIT: {
+- struct kvm_pit_state ps;
+ r = -EFAULT;
+- if (copy_from_user(&ps, argp, sizeof ps))
++ if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
+ goto out;
+ r = -ENXIO;
+ if (!kvm->arch.vpit)
+ goto out;
+- r = kvm_vm_ioctl_get_pit(kvm, &ps);
++ r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
+ if (r)
+ goto out;
+ r = -EFAULT;
+- if (copy_to_user(argp, &ps, sizeof ps))
++ if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_SET_PIT: {
+- struct kvm_pit_state ps;
+ r = -EFAULT;
+- if (copy_from_user(&ps, argp, sizeof ps))
++ if (copy_from_user(&u.ps, argp, sizeof u.ps))
+ goto out;
+ r = -ENXIO;
+ if (!kvm->arch.vpit)
+ goto out;
+- r = kvm_vm_ioctl_set_pit(kvm, &ps);
++ r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
+ if (r)
+ goto out;
+ r = 0;
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:35:30 2009
+From: Dave Hansen <dave@linux.vnet.ibm.com>
+Date: Thu, 6 Aug 2009 14:39:51 -0300
+Subject: KVM: Reduce stack usage in kvm_arch_vcpu_ioctl()
+To: stable@kernel.org
+Cc: Sheng Yang <sheng.yang@intel.com>, Avi Kivity <avi@redhat.com>, Dave Hansen <dave@linux.vnet.ibm.com>
+Message-ID: <1249580407-21883-12-git-send-email-mtosatti@redhat.com>
+
+
+From: Dave Hansen <dave@linux.vnet.ibm.com>
+
+(cherry picked from commit b772ff362ec6b821c8a5227a3355e263f917bfad)
+
+[sheng: fix KVM_GET_LAPIC using wrong size]
+
+Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: Sheng Yang <sheng.yang@intel.com>
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c | 23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1303,28 +1303,33 @@ long kvm_arch_vcpu_ioctl(struct file *fi
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int r;
++ struct kvm_lapic_state *lapic = NULL;
+
+ switch (ioctl) {
+ case KVM_GET_LAPIC: {
+- struct kvm_lapic_state lapic;
++ lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
+
+- memset(&lapic, 0, sizeof lapic);
+- r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
++ r = -ENOMEM;
++ if (!lapic)
++ goto out;
++ r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
+ if (r)
+ goto out;
+ r = -EFAULT;
+- if (copy_to_user(argp, &lapic, sizeof lapic))
++ if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_SET_LAPIC: {
+- struct kvm_lapic_state lapic;
+-
++ lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
++ r = -ENOMEM;
++ if (!lapic)
++ goto out;
+ r = -EFAULT;
+- if (copy_from_user(&lapic, argp, sizeof lapic))
++ if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
+ goto out;
+- r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
++ r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
+ if (r)
+ goto out;
+ r = 0;
+@@ -1422,6 +1427,8 @@ long kvm_arch_vcpu_ioctl(struct file *fi
+ r = -EINVAL;
+ }
+ out:
++ if (lapic)
++ kfree(lapic);
+ return r;
+ }
+
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:35:47 2009
+From: Dave Hansen <dave@linux.vnet.ibm.com>
+Date: Thu, 6 Aug 2009 14:39:52 -0300
+Subject: KVM: Reduce stack usage in kvm_pv_mmu_op()
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>, Dave Hansen <dave@linux.vnet.ibm.com>
+Message-ID: <1249580407-21883-13-git-send-email-mtosatti@redhat.com>
+
+
+From: Dave Hansen <dave@linux.vnet.ibm.com>
+
+(cherry picked from commit 6ad18fba05228fb1d47cdbc0339fe8b3fca1ca26)
+
+We're in a hot path. We can't use kmalloc() because
+it might impact performance. So, we just stick the buffer that
+we need into the kvm_vcpu_arch structure. This is used very
+often, so it is not really a waste.
+
+We also have to move the buffer structure's definition to the
+arch-specific x86 kvm header.
+
+Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 23 ++++++++---------------
+ include/asm-x86/kvm_host.h | 10 ++++++++++
+ 2 files changed, 18 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -135,13 +135,6 @@ module_param(dbg, bool, 0644);
+ #define ACC_USER_MASK PT_USER_MASK
+ #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+
+-struct kvm_pv_mmu_op_buffer {
+- void *ptr;
+- unsigned len;
+- unsigned processed;
+- char buf[512] __aligned(sizeof(long));
+-};
+-
+ struct kvm_rmap_desc {
+ u64 *shadow_ptes[RMAP_EXT];
+ struct kvm_rmap_desc *more;
+@@ -2294,18 +2287,18 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned long *ret)
+ {
+ int r;
+- struct kvm_pv_mmu_op_buffer buffer;
++ struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer;
+
+- buffer.ptr = buffer.buf;
+- buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
+- buffer.processed = 0;
++ buffer->ptr = buffer->buf;
++ buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf);
++ buffer->processed = 0;
+
+- r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
++ r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len);
+ if (r)
+ goto out;
+
+- while (buffer.len) {
+- r = kvm_pv_mmu_op_one(vcpu, &buffer);
++ while (buffer->len) {
++ r = kvm_pv_mmu_op_one(vcpu, buffer);
+ if (r < 0)
+ goto out;
+ if (r == 0)
+@@ -2314,7 +2307,7 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu,
+
+ r = 1;
+ out:
+- *ret = buffer.processed;
++ *ret = buffer->processed;
+ return r;
+ }
+
+--- a/include/asm-x86/kvm_host.h
++++ b/include/asm-x86/kvm_host.h
+@@ -195,6 +195,13 @@ struct kvm_mmu_page {
+ };
+ };
+
++struct kvm_pv_mmu_op_buffer {
++ void *ptr;
++ unsigned len;
++ unsigned processed;
++ char buf[512] __aligned(sizeof(long));
++};
++
+ /*
+ * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
+ * 32-bit). The kvm_mmu structure abstracts the details of the current mmu
+@@ -237,6 +244,9 @@ struct kvm_vcpu_arch {
+ bool tpr_access_reporting;
+
+ struct kvm_mmu mmu;
++ /* only needed in kvm_pv_mmu_op() path, but it's hot so
++ * put it here to avoid allocation */
++ struct kvm_pv_mmu_op_buffer mmu_op_buffer;
+
+ struct kvm_mmu_memory_cache mmu_pte_chain_cache;
+ struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:35:12 2009
+From: Dave Hansen <dave@linux.vnet.ibm.com>
+Date: Thu, 6 Aug 2009 14:39:50 -0300
+Subject: KVM: Reduce stack usage in kvm_vcpu_ioctl()
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>, Dave Hansen <dave@linux.vnet.ibm.com>
+Message-ID: <1249580407-21883-11-git-send-email-mtosatti@redhat.com>
+
+
+From: Dave Hansen <dave@linux.vnet.ibm.com>
+
+(cherry picked from commit fa3795a7308df099f0f2c9e5ca2c20a5ff65bdc4)
+
+Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ virt/kvm/kvm_main.c | 46 ++++++++++++++++++++++++++++------------------
+ 1 file changed, 28 insertions(+), 18 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1118,6 +1118,8 @@ static long kvm_vcpu_ioctl(struct file *
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int r;
++ struct kvm_fpu *fpu = NULL;
++ struct kvm_sregs *kvm_sregs = NULL;
+
+ if (vcpu->kvm->mm != current->mm)
+ return -EIO;
+@@ -1165,25 +1167,28 @@ out_free2:
+ break;
+ }
+ case KVM_GET_SREGS: {
+- struct kvm_sregs kvm_sregs;
+-
+- memset(&kvm_sregs, 0, sizeof kvm_sregs);
+- r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
++ kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
++ r = -ENOMEM;
++ if (!kvm_sregs)
++ goto out;
++ r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
+ if (r)
+ goto out;
+ r = -EFAULT;
+- if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
++ if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_SET_SREGS: {
+- struct kvm_sregs kvm_sregs;
+-
++ kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
++ r = -ENOMEM;
++ if (!kvm_sregs)
++ goto out;
+ r = -EFAULT;
+- if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
++ if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
+ goto out;
+- r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
++ r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
+ if (r)
+ goto out;
+ r = 0;
+@@ -1264,25 +1269,28 @@ out_free2:
+ break;
+ }
+ case KVM_GET_FPU: {
+- struct kvm_fpu fpu;
+-
+- memset(&fpu, 0, sizeof fpu);
+- r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu);
++ fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
++ r = -ENOMEM;
++ if (!fpu)
++ goto out;
++ r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
+ if (r)
+ goto out;
+ r = -EFAULT;
+- if (copy_to_user(argp, &fpu, sizeof fpu))
++ if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_SET_FPU: {
+- struct kvm_fpu fpu;
+-
++ fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
++ r = -ENOMEM;
++ if (!fpu)
++ goto out;
+ r = -EFAULT;
+- if (copy_from_user(&fpu, argp, sizeof fpu))
++ if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
+ goto out;
+- r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu);
++ r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
+ if (r)
+ goto out;
+ r = 0;
+@@ -1292,6 +1300,8 @@ out_free2:
+ r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
+ }
+ out:
++ kfree(fpu);
++ kfree(kvm_sregs);
+ return r;
+ }
+
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:36:31 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Thu, 6 Aug 2009 14:39:54 -0300
+Subject: KVM: set debug registers after "schedulable" section
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, Avi Kivity <avi@redhat.com>
+Message-ID: <1249580407-21883-15-git-send-email-mtosatti@redhat.com>
+
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 29415c37f043d1d54dcf356601d738ff6633b72b)
+
+The vcpu thread can be preempted after the guest_debug_pre() callback,
+resulting in invalid debug registers on the new vcpu.
+
+Move it inside the non-preemptable section.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/x86.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2839,10 +2839,6 @@ static int __vcpu_run(struct kvm_vcpu *v
+ down_read(&vcpu->kvm->slots_lock);
+ vapic_enter(vcpu);
+
+-preempted:
+- if (vcpu->guest_debug.enabled)
+- kvm_x86_ops->guest_debug_pre(vcpu);
+-
+ again:
+ if (vcpu->requests)
+ if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+@@ -2896,6 +2892,9 @@ again:
+ goto out;
+ }
+
++ if (vcpu->guest_debug.enabled)
++ kvm_x86_ops->guest_debug_pre(vcpu);
++
+ vcpu->guest_mode = 1;
+ /*
+ * Make sure that guest_mode assignment won't happen after
+@@ -2970,7 +2969,7 @@ out:
+ if (r > 0) {
+ kvm_resched(vcpu);
+ down_read(&vcpu->kvm->slots_lock);
+- goto preempted;
++ goto again;
+ }
+
+ post_kvm_run_save(vcpu, kvm_run);
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:38:05 2009
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 6 Aug 2009 14:39:59 -0300
+Subject: KVM: SVM: Remove port 80 passthrough
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>
+Message-ID: <1249580407-21883-20-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit 99f85a28a78e96d28907fe036e1671a218fee597)
+
+KVM optimizes guest port 80 accesses by passthing them through to the host.
+Some AMD machines die on port 80 writes, allowing the guest to hard-lock the
+host.
+
+Remove the port passthrough to avoid the problem.
+
+Reported-by: Piotr Jaroszyński <p.jaroszynski@gmail.com>
+Tested-by: Piotr Jaroszyński <p.jaroszynski@gmail.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/svm.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -429,7 +429,6 @@ static __init int svm_hardware_setup(voi
+
+ iopm_va = page_address(iopm_pages);
+ memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
+- clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
+ iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+
+ if (boot_cpu_has(X86_FEATURE_NX))
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:32:17 2009
+From: Avi Kivity <avi@qumranet.com>
+Date: Thu, 6 Aug 2009 14:39:41 -0300
+Subject: KVM: VMX: Change cs reset state to be a data segment
+To: stable@kernel.org
+Cc: avi@redhat.com
+Message-ID: <1249580407-21883-2-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@qumranet.com>
+
+(cherry picked from commit 5706be0dafd6f42852f85fbae292301dcad4ccec)
+
+Real mode cs is a data segment, not a code segment.
+
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/vmx.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2036,6 +2036,7 @@ static int vmx_vcpu_reset(struct kvm_vcp
+
+ fx_init(&vmx->vcpu);
+
++ seg_setup(VCPU_SREG_CS);
+ /*
+ * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
+ * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
+@@ -2047,8 +2048,6 @@ static int vmx_vcpu_reset(struct kvm_vcp
+ vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
+ vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
+ }
+- vmcs_write32(GUEST_CS_LIMIT, 0xffff);
+- vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+
+ seg_setup(VCPU_SREG_DS);
+ seg_setup(VCPU_SREG_ES);
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:32:35 2009
+From: Avi Kivity <avi@qumranet.com>
+Date: Thu, 6 Aug 2009 14:39:42 -0300
+Subject: KVM: VMX: Change segment dpl at reset to 3
+To: stable@kernel.org
+Cc: avi@redhat.com
+Message-ID: <1249580407-21883-3-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@qumranet.com>
+
+(cherry picked from commit a16b20da879430fdf245ed45461ed40ffef8db3c)
+
+This is more emulation friendly, if not 100% correct.
+
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/vmx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -1789,7 +1789,7 @@ static void seg_setup(int seg)
+ vmcs_write16(sf->selector, 0);
+ vmcs_writel(sf->base, 0);
+ vmcs_write32(sf->limit, 0xffff);
+- vmcs_write32(sf->ar_bytes, 0x93);
++ vmcs_write32(sf->ar_bytes, 0xf3);
+ }
+
+ static int alloc_apic_access_page(struct kvm *kvm)
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:37:47 2009
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 6 Aug 2009 14:39:58 -0300
+Subject: KVM: VMX: Don't allow uninhibited access to EFER on i386
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>
+Message-ID: <1249580407-21883-19-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit 16175a796d061833aacfbd9672235f2d2725df65)
+
+vmx_set_msr() does not allow i386 guests to touch EFER, but they can still
+do so through the default: label in the switch. If they set EFER_LME, they
+can oops the host.
+
+Fix by having EFER access through the normal channel (which will check for
+EFER_LME) even on i386.
+
+Reported-and-tested-by: Benjamin Gilbert <bgilbert@cs.cmu.edu>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/vmx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -898,11 +898,11 @@ static int vmx_set_msr(struct kvm_vcpu *
+ int ret = 0;
+
+ switch (msr_index) {
+-#ifdef CONFIG_X86_64
+ case MSR_EFER:
+ vmx_load_host_state(vmx);
+ ret = kvm_set_msr_common(vcpu, msr_index, data);
+ break;
++#ifdef CONFIG_X86_64
+ case MSR_FS_BASE:
+ vmcs_writel(GUEST_FS_BASE, data);
+ break;
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:38:46 2009
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 6 Aug 2009 14:40:01 -0300
+Subject: KVM: VMX: Handle vmx instruction vmexits
+To: stable@kernel.org
+Cc: Avi Kivity <avi@redhat.com>
+Message-ID: <1249580407-21883-22-git-send-email-mtosatti@redhat.com>
+
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit e3c7cb6ad7191e92ba89d00a7ae5f5dd1ca0c214)
+
+IF a guest tries to use vmx instructions, inject a #UD to let it know the
+instruction is not implemented, rather than crashing.
+
+This prevents guest userspace from crashing the guest kernel.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/vmx.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2582,6 +2582,12 @@ static int handle_vmcall(struct kvm_vcpu
+ return 1;
+ }
+
++static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
++{
++ kvm_queue_exception(vcpu, UD_VECTOR);
++ return 1;
++}
++
+ static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+ {
+ skip_emulated_instruction(vcpu);
+@@ -2714,6 +2720,15 @@ static int (*kvm_vmx_exit_handlers[])(st
+ [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
+ [EXIT_REASON_HLT] = handle_halt,
+ [EXIT_REASON_VMCALL] = handle_vmcall,
++ [EXIT_REASON_VMCLEAR] = handle_vmx_insn,
++ [EXIT_REASON_VMLAUNCH] = handle_vmx_insn,
++ [EXIT_REASON_VMPTRLD] = handle_vmx_insn,
++ [EXIT_REASON_VMPTRST] = handle_vmx_insn,
++ [EXIT_REASON_VMREAD] = handle_vmx_insn,
++ [EXIT_REASON_VMRESUME] = handle_vmx_insn,
++ [EXIT_REASON_VMWRITE] = handle_vmx_insn,
++ [EXIT_REASON_VMOFF] = handle_vmx_insn,
++ [EXIT_REASON_VMON] = handle_vmx_insn,
+ [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
+ [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
+ [EXIT_REASON_WBINVD] = handle_wbinvd,
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:37:13 2009
+From: Sheng Yang <sheng@linux.intel.com>
+Date: Thu, 6 Aug 2009 14:39:56 -0300
+Subject: KVM: VMX: Set IGMT bit in EPT entry
+To: stable@kernel.org
+Cc: avi@redhat.com, Sheng Yang <sheng@linux.intel.com>
+Message-ID: <1249580407-21883-17-git-send-email-mtosatti@redhat.com>
+
+
+From: Sheng Yang <sheng@linux.intel.com>
+
+(cherry picked from commit 928d4bf747e9c290b690ff515d8f81e8ee226d97)
+
+There is a potential issue that, when guest using pagetable without vmexit when
+EPT enabled, guest would use PAT/PCD/PWT bits to index PAT msr for it's memory,
+which would be inconsistent with host side and would cause host MCE due to
+inconsistent cache attribute.
+
+The patch set IGMT bit in EPT entry to ignore guest PAT and use WB as default
+memory type to protect host (notice that all memory mapped by KVM should be WB).
+
+Signed-off-by: Sheng Yang <sheng@linux.intel.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/vmx.c | 3 ++-
+ arch/x86/kvm/vmx.h | 1 +
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3299,7 +3299,8 @@ static int __init vmx_init(void)
+ bypass_guest_pf = 0;
+ kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
+ VMX_EPT_WRITABLE_MASK |
+- VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
++ VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
++ VMX_EPT_IGMT_BIT);
+ kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
+ VMX_EPT_EXECUTABLE_MASK);
+ kvm_enable_tdp();
+--- a/arch/x86/kvm/vmx.h
++++ b/arch/x86/kvm/vmx.h
+@@ -370,6 +370,7 @@ enum vmcs_field {
+ #define VMX_EPT_READABLE_MASK 0x1ull
+ #define VMX_EPT_WRITABLE_MASK 0x2ull
+ #define VMX_EPT_EXECUTABLE_MASK 0x4ull
++#define VMX_EPT_IGMT_BIT (1ull << 6)
+
+ #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
+
--- /dev/null
+From mtosatti@redhat.com Thu Sep 3 14:40:20 2009
+From: Marcelo Tosatti <mtosatti@redhat.com>
+Date: Thu, 6 Aug 2009 14:40:06 -0300
+Subject: KVM: x86: check for cr3 validity in mmu_alloc_roots
+To: stable@kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, avi@redhat.com
+Message-ID: <1249580407-21883-27-git-send-email-mtosatti@redhat.com>
+
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 8986ecc0ef58c96eec48d8502c048f3ab67fd8e2)
+
+Verify the cr3 address stored in vcpu->arch.cr3 points to an existant
+memslot. If not, inject a triple fault.
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kvm/mmu.c | 25 ++++++++++++++++++++++---
+ arch/x86/kvm/x86.c | 1 +
+ 2 files changed, 23 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1350,7 +1350,19 @@ static void mmu_free_roots(struct kvm_vc
+ vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+ }
+
+-static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
++static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
++{
++ int ret = 0;
++
++ if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
++ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
++ ret = 1;
++ }
++
++ return ret;
++}
++
++static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
+ {
+ int i;
+ gfn_t root_gfn;
+@@ -1365,13 +1377,15 @@ static void mmu_alloc_roots(struct kvm_v
+ ASSERT(!VALID_PAGE(root));
+ if (tdp_enabled)
+ metaphysical = 1;
++ if (mmu_check_root(vcpu, root_gfn))
++ return 1;
+ sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+ PT64_ROOT_LEVEL, metaphysical,
+ ACC_ALL, NULL);
+ root = __pa(sp->spt);
+ ++sp->root_count;
+ vcpu->arch.mmu.root_hpa = root;
+- return;
++ return 0;
+ }
+ metaphysical = !is_paging(vcpu);
+ if (tdp_enabled)
+@@ -1388,6 +1402,8 @@ static void mmu_alloc_roots(struct kvm_v
+ root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
+ } else if (vcpu->arch.mmu.root_level == 0)
+ root_gfn = 0;
++ if (mmu_check_root(vcpu, root_gfn))
++ return 1;
+ sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
+ PT32_ROOT_LEVEL, metaphysical,
+ ACC_ALL, NULL);
+@@ -1396,6 +1412,7 @@ static void mmu_alloc_roots(struct kvm_v
+ vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+ }
+ vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
++ return 0;
+ }
+
+ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
+@@ -1639,8 +1656,10 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
+ goto out;
+ spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_free_some_pages(vcpu);
+- mmu_alloc_roots(vcpu);
++ r = mmu_alloc_roots(vcpu);
+ spin_unlock(&vcpu->kvm->mmu_lock);
++ if (r)
++ goto out;
+ kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+ kvm_mmu_flush_tlb(vcpu);
+ out:
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4073,6 +4073,7 @@ int kvm_arch_set_memory_region(struct kv
+ void kvm_arch_flush_shadow(struct kvm *kvm)
+ {
+ kvm_mmu_zap_all(kvm);
++ kvm_reload_remote_mmus(kvm);
+ }
+
+ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
ehea-fix-napi-list-corruption-on-ifconfig-down.patch
sound-pcm_lib-fix-unsorted-list-constraint-handling.patch
sunrpc-fix-rpc_task_force_reencode.patch
+kvm-vmx-change-cs-reset-state-to-be-a-data-segment.patch
+kvm-vmx-change-segment-dpl-at-reset-to-3.patch
+kvm-load-real-mode-segments-correctly.patch
+kvm-allocate-guest-memory-as-map_private-not-map_shared.patch
+kvm-don-t-call-get_user_pages.patch
+kvm-mmu-add-locking-around-kvm_mmu_slot_remove_write_access.patch
+kvm-mmu-flush-tlbs-after-clearing-write-permission-when-accessing-dirty-log.patch
+kvm-mmu-fix-setting-the-accessed-bit-on-non-speculative-sptes.patch
+kvm-reduce-kvm-stack-usage-in-kvm_arch_vm_ioctl.patch
+kvm-reduce-stack-usage-in-kvm_vcpu_ioctl.patch
+kvm-reduce-stack-usage-in-kvm_arch_vcpu_ioctl.patch
+kvm-reduce-stack-usage-in-kvm_pv_mmu_op.patch
+kvm-add-mc5_misc-msr-read-support.patch
+kvm-set-debug-registers-after-schedulable-section.patch
+kvm-mmu-increase-per-vcpu-rmap-cache-alloc-size.patch
+kvm-vmx-set-igmt-bit-in-ept-entry.patch
+kvm-don-t-destroy-vcpu-in-case-vcpu_setup-fails.patch
+kvm-vmx-don-t-allow-uninhibited-access-to-efer-on-i386.patch
+kvm-svm-remove-port-80-passthrough.patch
+kvm-make-efer-reads-safe-when-efer-does-not-exist.patch
+kvm-vmx-handle-vmx-instruction-vmexits.patch
+kvm-make-paravirt-tlb-flush-also-reload-the-pae-pdptrs.patch
+kvm-fix-pdptr-reloading-on-cr4-writes.patch
+kvm-mmu-do-not-free-active-mmu-pages-in-free_mmu_pages.patch
+kvm-fix-dirty-bit-tracking-for-slots-with-large-pages.patch
+kvm-x86-check-for-cr3-validity-in-mmu_alloc_roots.patch
+kvm-mmu-protect-kvm_mmu_change_mmu_pages-with-mmu_lock.patch