]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
.32 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Thu, 22 Apr 2010 16:24:55 +0000 (09:24 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Thu, 22 Apr 2010 16:24:55 +0000 (09:24 -0700)
queue-2.6.32/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch [new file with mode: 0644]
queue-2.6.32/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch [new file with mode: 0644]
queue-2.6.32/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch [new file with mode: 0644]
queue-2.6.32/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch [new file with mode: 0644]
queue-2.6.32/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch [new file with mode: 0644]
queue-2.6.32/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch [new file with mode: 0644]
queue-2.6.32/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch [new file with mode: 0644]
queue-2.6.32/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch [new file with mode: 0644]
queue-2.6.32/sched-use-proper-type-in-sched_getaffinity.patch [new file with mode: 0644]
queue-2.6.32/series

diff --git a/queue-2.6.32/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch b/queue-2.6.32/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch
new file mode 100644 (file)
index 0000000..60f95fb
--- /dev/null
@@ -0,0 +1,74 @@
+From 55a5d6db91054b446d633f61e0c0ad23e3944aec Mon Sep 17 00:00:00 2001
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Date: Tue, 9 Mar 2010 14:55:19 +0900
+Subject: KVM: SVM: Fix memory leaks that happen when svm_create_vcpu() fails
+
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+
+(Cherry-picked from commit b7af40433870aa0636932ad39b0c48a0cb319057)
+
+svm_create_vcpu() does not free the pages allocated during the creation
+when it fails to complete the allocations. This patch fixes it.
+
+Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/svm.c |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -694,29 +694,28 @@ static struct kvm_vcpu *svm_create_vcpu(
+       if (err)
+               goto free_svm;
++      err = -ENOMEM;
+       page = alloc_page(GFP_KERNEL);
+-      if (!page) {
+-              err = -ENOMEM;
++      if (!page)
+               goto uninit;
+-      }
+-      err = -ENOMEM;
+       msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+       if (!msrpm_pages)
+-              goto uninit;
++              goto free_page1;
+       nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+       if (!nested_msrpm_pages)
+-              goto uninit;
+-
+-      svm->msrpm = page_address(msrpm_pages);
+-      svm_vcpu_init_msrpm(svm->msrpm);
++              goto free_page2;
+       hsave_page = alloc_page(GFP_KERNEL);
+       if (!hsave_page)
+-              goto uninit;
++              goto free_page3;
++
+       svm->nested.hsave = page_address(hsave_page);
++      svm->msrpm = page_address(msrpm_pages);
++      svm_vcpu_init_msrpm(svm->msrpm);
++
+       svm->nested.msrpm = page_address(nested_msrpm_pages);
+       svm->vmcb = page_address(page);
+@@ -733,6 +732,12 @@ static struct kvm_vcpu *svm_create_vcpu(
+       return &svm->vcpu;
++free_page3:
++      __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
++free_page2:
++      __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
++free_page1:
++      __free_page(page);
+ uninit:
+       kvm_vcpu_uninit(&svm->vcpu);
+ free_svm:
diff --git a/queue-2.6.32/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch b/queue-2.6.32/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch
new file mode 100644 (file)
index 0000000..c5348f1
--- /dev/null
@@ -0,0 +1,158 @@
+From 3f6f46d952e6004b4fb4a365c3ea2682e42395fc Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 11 Mar 2010 12:20:03 +0200
+Subject: KVM: Don't spam kernel log when injecting exceptions due to bad cr writes
+
+From: Avi Kivity <avi@redhat.com>
+
+(Cherry-picked from commit d6a23895aa82353788a1cc5a1d9a1c963465463e)
+
+These are guest-triggerable.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/x86.c |   27 ---------------------------
+ 1 file changed, 27 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -297,21 +297,16 @@ out:
+ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ {
+       if (cr0 & CR0_RESERVED_BITS) {
+-              printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
+-                     cr0, vcpu->arch.cr0);
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
+-              printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
+-              printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
+-                     "and a clear PE flag\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+@@ -322,15 +317,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu,
+                       int cs_db, cs_l;
+                       if (!is_pae(vcpu)) {
+-                              printk(KERN_DEBUG "set_cr0: #GP, start paging "
+-                                     "in long mode while PAE is disabled\n");
+                               kvm_inject_gp(vcpu, 0);
+                               return;
+                       }
+                       kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+                       if (cs_l) {
+-                              printk(KERN_DEBUG "set_cr0: #GP, start paging "
+-                                     "in long mode while CS.L == 1\n");
+                               kvm_inject_gp(vcpu, 0);
+                               return;
+@@ -338,8 +329,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu,
+               } else
+ #endif
+               if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
+-                      printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
+-                             "reserved bits\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+@@ -366,28 +355,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu,
+       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
+       if (cr4 & CR4_RESERVED_BITS) {
+-              printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if (is_long_mode(vcpu)) {
+               if (!(cr4 & X86_CR4_PAE)) {
+-                      printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
+-                             "in long mode\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+       } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
+                  && ((cr4 ^ old_cr4) & pdptr_bits)
+                  && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
+-              printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if (cr4 & X86_CR4_VMXE) {
+-              printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+@@ -408,21 +392,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu,
+       if (is_long_mode(vcpu)) {
+               if (cr3 & CR3_L_MODE_RESERVED_BITS) {
+-                      printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+       } else {
+               if (is_pae(vcpu)) {
+                       if (cr3 & CR3_PAE_RESERVED_BITS) {
+-                              printk(KERN_DEBUG
+-                                     "set_cr3: #GP, reserved bits\n");
+                               kvm_inject_gp(vcpu, 0);
+                               return;
+                       }
+                       if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
+-                              printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
+-                                     "reserved bits\n");
+                               kvm_inject_gp(vcpu, 0);
+                               return;
+                       }
+@@ -454,7 +433,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
+ void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+ {
+       if (cr8 & CR8_RESERVED_BITS) {
+-              printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+@@ -508,15 +486,12 @@ static u32 emulated_msrs[] = {
+ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ {
+       if (efer & efer_reserved_bits) {
+-              printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
+-                     efer);
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if (is_paging(vcpu)
+           && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
+-              printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+@@ -526,7 +501,6 @@ static void set_efer(struct kvm_vcpu *vc
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+               if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+-                      printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+@@ -537,7 +511,6 @@ static void set_efer(struct kvm_vcpu *vc
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+               if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+-                      printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
diff --git a/queue-2.6.32/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch b/queue-2.6.32/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch
new file mode 100644 (file)
index 0000000..3bbaa58
--- /dev/null
@@ -0,0 +1,44 @@
+From 5e2da0660782ef33de03e9da84f64223730500d1 Mon Sep 17 00:00:00 2001
+From: Andre Przywara <andre.przywara@amd.com>
+Date: Wed, 24 Mar 2010 17:46:42 +0100
+Subject: KVM: allow bit 10 to be cleared in MSR_IA32_MC4_CTL
+
+From: Andre Przywara <andre.przywara@amd.com>
+
+(Cherry-picked from commit 114be429c8cd44e57f312af2bbd6734e5a185b0d)
+
+There is a quirk for AMD K8 CPUs in many Linux kernels (see
+arch/x86/kernel/cpu/mcheck/mce.c:__mcheck_cpu_apply_quirks()) that
+clears bit 10 in that MCE related MSR. KVM can only cope with all
+zeros or all ones, so it will inject a #GP into the guest, which
+will let it panic.
+So lets add a quirk to the quirk and ignore this single cleared bit.
+This fixes -cpu kvm64 on all machines and -cpu host on K8 machines
+with some guest Linux kernels.
+
+Signed-off-by: Andre Przywara <andre.przywara@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/x86.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -799,9 +799,13 @@ static int set_msr_mce(struct kvm_vcpu *
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                       u32 offset = msr - MSR_IA32_MC0_CTL;
+-                      /* only 0 or all 1s can be written to IA32_MCi_CTL */
++                      /* only 0 or all 1s can be written to IA32_MCi_CTL
++                       * some Linux kernels though clear bit 10 in bank 4 to
++                       * workaround a BIOS/GART TBL issue on AMD K8s, ignore
++                       * this to avoid an uncatched #GP in the guest
++                       */
+                       if ((offset & 0x3) == 0 &&
+-                          data != 0 && data != ~(u64)0)
++                          data != 0 && (data | (1 << 10)) != ~(u64)0)
+                               return -1;
+                       vcpu->arch.mce_banks[offset] = data;
+                       break;
diff --git a/queue-2.6.32/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch b/queue-2.6.32/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch
new file mode 100644 (file)
index 0000000..0c3bd8a
--- /dev/null
@@ -0,0 +1,99 @@
+From 8a618f6f19f9bc88e8b5d75cbfbfedcb416246f3 Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 8 Apr 2010 18:19:35 +0300
+Subject: KVM: VMX: Save/restore rflags.vm correctly in real mode
+
+From: Avi Kivity <avi@redhat.com>
+
+(Cherry-picked from commit 78ac8b47c566dd6177a3b9b291b756ccb70670b7)
+
+Currently we set eflags.vm unconditionally when entering real mode emulation
+through virtual-8086 mode, and clear it unconditionally when we enter protected
+mode.  The means that the following sequence
+
+  KVM_SET_REGS  (rflags.vm=1)
+  KVM_SET_SREGS (cr0.pe=1)
+
+Ends up with rflags.vm clear due to KVM_SET_SREGS triggering enter_pmode().
+
+Fix by shadowing rflags.vm (and rflags.iopl) correctly while in real mode:
+reads and writes to those bits access a shadow register instead of the actual
+register.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/vmx.c |   24 +++++++++++++++---------
+ 1 file changed, 15 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -61,6 +61,8 @@ module_param_named(unrestricted_guest,
+ static int __read_mostly emulate_invalid_guest_state = 0;
+ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
++#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
++
+ struct vmcs {
+       u32 revision_id;
+       u32 abort;
+@@ -92,7 +94,7 @@ struct vcpu_vmx {
+       } host_state;
+       struct {
+               int vm86_active;
+-              u8 save_iopl;
++              ulong save_rflags;
+               struct kvm_save_segment {
+                       u16 selector;
+                       unsigned long base;
+@@ -783,18 +785,23 @@ static void vmx_fpu_deactivate(struct kv
+ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
+ {
+-      unsigned long rflags;
++      unsigned long rflags, save_rflags;
+       rflags = vmcs_readl(GUEST_RFLAGS);
+-      if (to_vmx(vcpu)->rmode.vm86_active)
+-              rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
++      if (to_vmx(vcpu)->rmode.vm86_active) {
++              rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
++              save_rflags = to_vmx(vcpu)->rmode.save_rflags;
++              rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
++      }
+       return rflags;
+ }
+ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+ {
+-      if (to_vmx(vcpu)->rmode.vm86_active)
++      if (to_vmx(vcpu)->rmode.vm86_active) {
++              to_vmx(vcpu)->rmode.save_rflags = rflags;
+               rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
++      }
+       vmcs_writel(GUEST_RFLAGS, rflags);
+ }
+@@ -1431,8 +1438,8 @@ static void enter_pmode(struct kvm_vcpu
+       vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
+       flags = vmcs_readl(GUEST_RFLAGS);
+-      flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
+-      flags |= (vmx->rmode.save_iopl << IOPL_SHIFT);
++      flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
++      flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+       vmcs_writel(GUEST_RFLAGS, flags);
+       vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
+@@ -1501,8 +1508,7 @@ static void enter_rmode(struct kvm_vcpu
+       vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
+       flags = vmcs_readl(GUEST_RFLAGS);
+-      vmx->rmode.save_iopl
+-              = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
++      vmx->rmode.save_rflags = flags;
+       flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
diff --git a/queue-2.6.32/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch b/queue-2.6.32/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch
new file mode 100644 (file)
index 0000000..c2baca1
--- /dev/null
@@ -0,0 +1,65 @@
+From 88e1db0c6eb5f5d5f64525d1b667452022474afd Mon Sep 17 00:00:00 2001
+From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
+Date: Fri, 16 Apr 2010 16:34:42 +0800
+Subject: KVM: MMU: fix kvm_mmu_zap_page() and its calling path
+
+From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
+
+(Cherry-picked from commit 77662e0028c7c63e34257fda03ff9625c59d939d)
+
+This patch fix:
+
+- calculate zapped page number properly in mmu_zap_unsync_children()
+- calculate freeed page number properly kvm_mmu_change_mmu_pages()
+- if zapped children page it shoud restart hlist walking
+
+KVM-Stable-Tag.
+Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/mmu.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1496,8 +1496,8 @@ static int mmu_zap_unsync_children(struc
+               for_each_sp(pages, sp, parents, i) {
+                       kvm_mmu_zap_page(kvm, sp);
+                       mmu_pages_clear_parents(&parents);
++                      zapped++;
+               }
+-              zapped += pages.nr;
+               kvm_mmu_pages_init(parent, &parents, &pages);
+       }
+@@ -1548,14 +1548,16 @@ void kvm_mmu_change_mmu_pages(struct kvm
+        */
+       if (used_pages > kvm_nr_mmu_pages) {
+-              while (used_pages > kvm_nr_mmu_pages) {
++              while (used_pages > kvm_nr_mmu_pages &&
++                      !list_empty(&kvm->arch.active_mmu_pages)) {
+                       struct kvm_mmu_page *page;
+                       page = container_of(kvm->arch.active_mmu_pages.prev,
+                                           struct kvm_mmu_page, link);
+-                      kvm_mmu_zap_page(kvm, page);
++                      used_pages -= kvm_mmu_zap_page(kvm, page);
+                       used_pages--;
+               }
++              kvm_nr_mmu_pages = used_pages;
+               kvm->arch.n_free_mmu_pages = 0;
+       }
+       else
+@@ -1602,7 +1604,8 @@ static void mmu_unshadow(struct kvm *kvm
+                   && !sp->role.invalid) {
+                       pgprintk("%s: zap %lx %x\n",
+                                __func__, gfn, sp->role.word);
+-                      kvm_mmu_zap_page(kvm, sp);
++                      if (kvm_mmu_zap_page(kvm, sp))
++                              nn = bucket->first;
+               }
+       }
+ }
diff --git a/queue-2.6.32/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch b/queue-2.6.32/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch
new file mode 100644 (file)
index 0000000..174c939
--- /dev/null
@@ -0,0 +1,146 @@
+From cbb577cc37fca4093444fcd778fe9de880b30ff2 Mon Sep 17 00:00:00 2001
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Date: Mon, 12 Apr 2010 19:35:35 +0900
+Subject: KVM: fix the handling of dirty bitmaps to avoid overflows
+
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+
+(Cherry-picked from commit 87bf6e7de1134f48681fd2ce4b7c1ec45458cb6d)
+
+Int is not long enough to store the size of a dirty bitmap.
+
+This patch fixes this problem with the introduction of a wrapper
+function to calculate the sizes of dirty bitmaps.
+
+Note: in mark_page_dirty(), we have to consider the fact that
+  __set_bit() takes the offset as int, not long.
+
+Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/ia64/kvm/kvm-ia64.c |    9 +++++----
+ arch/x86/kvm/x86.c       |    4 ++--
+ include/linux/kvm_host.h |    5 +++++
+ virt/kvm/kvm_main.c      |   13 ++++++++-----
+ 4 files changed, 20 insertions(+), 11 deletions(-)
+
+--- a/arch/ia64/kvm/kvm-ia64.c
++++ b/arch/ia64/kvm/kvm-ia64.c
+@@ -1797,7 +1797,8 @@ static int kvm_ia64_sync_dirty_log(struc
+ {
+       struct kvm_memory_slot *memslot;
+       int r, i;
+-      long n, base;
++      long base;
++      unsigned long n;
+       unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
+                       offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
+@@ -1810,7 +1811,7 @@ static int kvm_ia64_sync_dirty_log(struc
+       if (!memslot->dirty_bitmap)
+               goto out;
+-      n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++      n = kvm_dirty_bitmap_bytes(memslot);
+       base = memslot->base_gfn / BITS_PER_LONG;
+       for (i = 0; i < n/sizeof(long); ++i) {
+@@ -1826,7 +1827,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+               struct kvm_dirty_log *log)
+ {
+       int r;
+-      int n;
++      unsigned long n;
+       struct kvm_memory_slot *memslot;
+       int is_dirty = 0;
+@@ -1844,7 +1845,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+       if (is_dirty) {
+               kvm_flush_remote_tlbs(kvm);
+               memslot = &kvm->memslots[log->slot];
+-              n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++              n = kvm_dirty_bitmap_bytes(memslot);
+               memset(memslot->dirty_bitmap, 0, n);
+       }
+       r = 0;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2133,7 +2133,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+                                     struct kvm_dirty_log *log)
+ {
+       int r;
+-      int n;
++      unsigned long n;
+       struct kvm_memory_slot *memslot;
+       int is_dirty = 0;
+@@ -2149,7 +2149,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+               kvm_mmu_slot_remove_write_access(kvm, log->slot);
+               spin_unlock(&kvm->mmu_lock);
+               memslot = &kvm->memslots[log->slot];
+-              n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++              n = kvm_dirty_bitmap_bytes(memslot);
+               memset(memslot->dirty_bitmap, 0, n);
+       }
+       r = 0;
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -116,6 +116,11 @@ struct kvm_memory_slot {
+       int user_alloc;
+ };
++static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
++{
++      return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++}
++
+ struct kvm_kernel_irq_routing_entry {
+       u32 gsi;
+       u32 type;
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1226,7 +1226,7 @@ skip_lpage:
+       /* Allocate page dirty bitmap if needed */
+       if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
+-              unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
++              unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
+               new.dirty_bitmap = vmalloc(dirty_bytes);
+               if (!new.dirty_bitmap)
+@@ -1309,7 +1309,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
+ {
+       struct kvm_memory_slot *memslot;
+       int r, i;
+-      int n;
++      unsigned long n;
+       unsigned long any = 0;
+       r = -EINVAL;
+@@ -1321,7 +1321,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
+       if (!memslot->dirty_bitmap)
+               goto out;
+-      n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++      n = kvm_dirty_bitmap_bytes(memslot);
+       for (i = 0; !any && i < n/sizeof(long); ++i)
+               any = memslot->dirty_bitmap[i];
+@@ -1663,10 +1663,13 @@ void mark_page_dirty(struct kvm *kvm, gf
+       memslot = gfn_to_memslot_unaliased(kvm, gfn);
+       if (memslot && memslot->dirty_bitmap) {
+               unsigned long rel_gfn = gfn - memslot->base_gfn;
++              unsigned long *p = memslot->dirty_bitmap +
++                                      rel_gfn / BITS_PER_LONG;
++              int offset = rel_gfn % BITS_PER_LONG;
+               /* avoid RMW */
+-              if (!test_bit(rel_gfn, memslot->dirty_bitmap))
+-                      set_bit(rel_gfn, memslot->dirty_bitmap);
++              if (!test_bit(offset, p))
++                      set_bit(offset, p);
+       }
+ }
diff --git a/queue-2.6.32/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch b/queue-2.6.32/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch
new file mode 100644 (file)
index 0000000..c6eeda1
--- /dev/null
@@ -0,0 +1,33 @@
+From f8dd49cc65898e9636df861e880aa6c94ab15ca5 Mon Sep 17 00:00:00 2001
+From: Sridhar Samudrala <sri@us.ibm.com>
+Date: Tue, 30 Mar 2010 16:48:25 -0700
+Subject: KVM: Increase NR_IOBUS_DEVS limit to 200
+
+From: Sridhar Samudrala <sri@us.ibm.com>
+
+(Cherry-picked from commit e80e2a60ff7914dae691345a976c80bbbff3ec74)
+
+This patch increases the current hardcoded limit of NR_IOBUS_DEVS
+from 6 to 200. We are hitting this limit when creating a guest with more
+than 1 virtio-net device using vhost-net backend. Each virtio-net
+device requires 2 such devices to service notifications from rx/tx queues.
+
+Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/kvm_host.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -53,7 +53,7 @@ extern struct kmem_cache *kvm_vcpu_cache
+  */
+ struct kvm_io_bus {
+       int                   dev_count;
+-#define NR_IOBUS_DEVS 6
++#define NR_IOBUS_DEVS 200
+       struct kvm_io_device *devs[NR_IOBUS_DEVS];
+ };
diff --git a/queue-2.6.32/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch b/queue-2.6.32/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch
new file mode 100644 (file)
index 0000000..4e0400b
--- /dev/null
@@ -0,0 +1,42 @@
+From 2dbbf0c30bbb339e455b30db8ee13adb487e68f7 Mon Sep 17 00:00:00 2001
+From: Jan Kiszka <jan.kiszka@siemens.com>
+Date: Wed, 14 Apr 2010 16:57:11 +0200
+Subject: KVM: x86: Fix TSS size check for 16-bit tasks
+
+From: Jan Kiszka <jan.kiszka@siemens.com>
+
+(Cherry-picked from commit e8861cfe2c75bdce36655b64d7ce02c2b31b604d)
+
+A 16-bit TSS is only 44 bytes long. So make sure to test for the correct
+size on task switch.
+
+Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/x86.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4543,6 +4543,7 @@ int kvm_task_switch(struct kvm_vcpu *vcp
+       int ret = 0;
+       u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
+       u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
++      u32 desc_limit;
+       old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
+@@ -4565,7 +4566,10 @@ int kvm_task_switch(struct kvm_vcpu *vcp
+               }
+       }
+-      if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
++      desc_limit = get_desc_limit(&nseg_desc);
++      if (!nseg_desc.p ||
++          ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
++           desc_limit < 0x2b)) {
+               kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
+               return 1;
+       }
diff --git a/queue-2.6.32/sched-use-proper-type-in-sched_getaffinity.patch b/queue-2.6.32/sched-use-proper-type-in-sched_getaffinity.patch
new file mode 100644 (file)
index 0000000..5effc6b
--- /dev/null
@@ -0,0 +1,40 @@
+From 8bc037fb89bb3104b9ae290d18c877624cd7d9cc Mon Sep 17 00:00:00 2001
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Date: Wed, 17 Mar 2010 09:36:58 +0900
+Subject: sched: Use proper type in sched_getaffinity()
+
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+
+commit 8bc037fb89bb3104b9ae290d18c877624cd7d9cc upstream.
+
+Using the proper type fixes the following compiler warning:
+
+  kernel/sched.c:4850: warning: comparison of distinct pointer types lacks a cast
+
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: torvalds@linux-foundation.org
+Cc: travis@sgi.com
+Cc: peterz@infradead.org
+Cc: drepper@redhat.com
+Cc: rja@sgi.com
+Cc: sharyath@in.ibm.com
+Cc: steiner@sgi.com
+LKML-Reference: <20100317090046.4C79.A69D9226@jp.fujitsu.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6677,7 +6677,7 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t
+       ret = sched_getaffinity(pid, mask);
+       if (ret == 0) {
+-              int retlen = min(len, cpumask_size());
++              size_t retlen = min_t(size_t, len, cpumask_size());
+               if (copy_to_user(user_mask_ptr, mask, retlen))
+                       ret = -EFAULT;
index cecbe68d6e9639f530a0b102b5e22f7033d18304..603b4baa79a7827e98e306c7e67178872728065c 100644 (file)
@@ -183,3 +183,12 @@ tpm-autoload-tpm_tis-based-on-system-pnp-ids.patch
 ib-iser-rewrite-sg-handling-for-rdma-logic.patch
 mptctl-remove-printk-which-floods-unnecessary-messages-to-var-log-message.patch
 mptspi-fix-for-incorrect-data-underrun-errata.patch
+sched-use-proper-type-in-sched_getaffinity.patch
+0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch
+0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch
+0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch
+0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch
+0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch
+0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch
+0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch
+0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch