]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
.33 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Thu, 22 Apr 2010 16:15:12 +0000 (09:15 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Thu, 22 Apr 2010 16:15:12 +0000 (09:15 -0700)
12 files changed:
queue-2.6.33/0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch [new file with mode: 0644]
queue-2.6.33/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch [new file with mode: 0644]
queue-2.6.33/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch [new file with mode: 0644]
queue-2.6.33/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch [new file with mode: 0644]
queue-2.6.33/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch [new file with mode: 0644]
queue-2.6.33/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch [new file with mode: 0644]
queue-2.6.33/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch [new file with mode: 0644]
queue-2.6.33/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch [new file with mode: 0644]
queue-2.6.33/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch [new file with mode: 0644]
queue-2.6.33/nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch [deleted file]
queue-2.6.33/sched-use-proper-type-in-sched_getaffinity.patch [new file with mode: 0644]
queue-2.6.33/series

diff --git a/queue-2.6.33/0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch b/queue-2.6.33/0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch
new file mode 100644 (file)
index 0000000..cbd21ae
--- /dev/null
@@ -0,0 +1,52 @@
+From 2e7e0afbb8be1a21a592da38c85c7fe68763f933 Mon Sep 17 00:00:00 2001
+From: Jan Kiszka <jan.kiszka@siemens.com>
+Date: Tue, 23 Feb 2010 17:47:53 +0100
+Subject: KVM: VMX: Update instruction length on intercepted BP
+
+From: Jan Kiszka <jan.kiszka@siemens.com>
+
+(Cherry-picked from commit c573cd22939e54fc1b8e672054a505048987a7cb)
+
+We intercept #BP while in guest debugging mode. As VM exits due to
+intercepted exceptions do not necessarily come with valid
+idt_vectoring, we have to update event_exit_inst_len explicitly in such
+cases. At least in the absence of migration, this ensures that
+re-injections of #BP will find and use the correct instruction length.
+
+Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/vmx.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2719,6 +2719,12 @@ static int handle_rmode_exception(struct
+               kvm_queue_exception(vcpu, vec);
+               return 1;
+       case BP_VECTOR:
++              /*
++               * Update instruction length as we may reinject the exception
++               * from user space while in guest debugging mode.
++               */
++              to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
++                      vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+               if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+                       return 0;
+               /* fall through */
+@@ -2841,6 +2847,13 @@ static int handle_exception(struct kvm_v
+               kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
+               /* fall through */
+       case BP_VECTOR:
++              /*
++               * Update instruction length as we may reinject #BP from
++               * user space while in guest debugging mode. Reading it for
++               * #DB as well causes no harm, it is not used in that case.
++               */
++              vmx->vcpu.arch.event_exit_inst_len =
++                      vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+               kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+               kvm_run->debug.arch.exception = ex_no;
diff --git a/queue-2.6.33/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch b/queue-2.6.33/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch
new file mode 100644 (file)
index 0000000..0b145c7
--- /dev/null
@@ -0,0 +1,74 @@
+From c84211fe3f1ea88493688fe417639aa2a9b0edef Mon Sep 17 00:00:00 2001
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Date: Tue, 9 Mar 2010 14:55:19 +0900
+Subject: KVM: SVM: Fix memory leaks that happen when svm_create_vcpu() fails
+
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+
+(Cherry-picked from commit b7af40433870aa0636932ad39b0c48a0cb319057)
+
+svm_create_vcpu() does not free the pages allocated during the creation
+when it fails to complete the allocations. This patch fixes it.
+
+Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/svm.c |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -698,29 +698,28 @@ static struct kvm_vcpu *svm_create_vcpu(
+       if (err)
+               goto free_svm;
++      err = -ENOMEM;
+       page = alloc_page(GFP_KERNEL);
+-      if (!page) {
+-              err = -ENOMEM;
++      if (!page)
+               goto uninit;
+-      }
+-      err = -ENOMEM;
+       msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+       if (!msrpm_pages)
+-              goto uninit;
++              goto free_page1;
+       nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+       if (!nested_msrpm_pages)
+-              goto uninit;
+-
+-      svm->msrpm = page_address(msrpm_pages);
+-      svm_vcpu_init_msrpm(svm->msrpm);
++              goto free_page2;
+       hsave_page = alloc_page(GFP_KERNEL);
+       if (!hsave_page)
+-              goto uninit;
++              goto free_page3;
++
+       svm->nested.hsave = page_address(hsave_page);
++      svm->msrpm = page_address(msrpm_pages);
++      svm_vcpu_init_msrpm(svm->msrpm);
++
+       svm->nested.msrpm = page_address(nested_msrpm_pages);
+       svm->vmcb = page_address(page);
+@@ -737,6 +736,12 @@ static struct kvm_vcpu *svm_create_vcpu(
+       return &svm->vcpu;
++free_page3:
++      __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
++free_page2:
++      __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
++free_page1:
++      __free_page(page);
+ uninit:
+       kvm_vcpu_uninit(&svm->vcpu);
+ free_svm:
diff --git a/queue-2.6.33/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch b/queue-2.6.33/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch
new file mode 100644 (file)
index 0000000..fc63486
--- /dev/null
@@ -0,0 +1,158 @@
+From 654be416ec97b546e6c4f1cbe6de6a7571a6ad23 Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 11 Mar 2010 12:20:03 +0200
+Subject: KVM: Don't spam kernel log when injecting exceptions due to bad cr writes
+
+From: Avi Kivity <avi@redhat.com>
+
+(Cherry-picked from commit d6a23895aa82353788a1cc5a1d9a1c963465463e)
+
+These are guest-triggerable.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/x86.c |   27 ---------------------------
+ 1 file changed, 27 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -384,21 +384,16 @@ out:
+ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ {
+       if (cr0 & CR0_RESERVED_BITS) {
+-              printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
+-                     cr0, vcpu->arch.cr0);
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
+-              printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
+-              printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
+-                     "and a clear PE flag\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+@@ -409,15 +404,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu,
+                       int cs_db, cs_l;
+                       if (!is_pae(vcpu)) {
+-                              printk(KERN_DEBUG "set_cr0: #GP, start paging "
+-                                     "in long mode while PAE is disabled\n");
+                               kvm_inject_gp(vcpu, 0);
+                               return;
+                       }
+                       kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+                       if (cs_l) {
+-                              printk(KERN_DEBUG "set_cr0: #GP, start paging "
+-                                     "in long mode while CS.L == 1\n");
+                               kvm_inject_gp(vcpu, 0);
+                               return;
+@@ -425,8 +416,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu,
+               } else
+ #endif
+               if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
+-                      printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
+-                             "reserved bits\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+@@ -453,28 +442,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu,
+       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
+       if (cr4 & CR4_RESERVED_BITS) {
+-              printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if (is_long_mode(vcpu)) {
+               if (!(cr4 & X86_CR4_PAE)) {
+-                      printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
+-                             "in long mode\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+       } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
+                  && ((cr4 ^ old_cr4) & pdptr_bits)
+                  && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
+-              printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if (cr4 & X86_CR4_VMXE) {
+-              printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+@@ -495,21 +479,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu,
+       if (is_long_mode(vcpu)) {
+               if (cr3 & CR3_L_MODE_RESERVED_BITS) {
+-                      printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+       } else {
+               if (is_pae(vcpu)) {
+                       if (cr3 & CR3_PAE_RESERVED_BITS) {
+-                              printk(KERN_DEBUG
+-                                     "set_cr3: #GP, reserved bits\n");
+                               kvm_inject_gp(vcpu, 0);
+                               return;
+                       }
+                       if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
+-                              printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
+-                                     "reserved bits\n");
+                               kvm_inject_gp(vcpu, 0);
+                               return;
+                       }
+@@ -541,7 +520,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
+ void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+ {
+       if (cr8 & CR8_RESERVED_BITS) {
+-              printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+@@ -595,15 +573,12 @@ static u32 emulated_msrs[] = {
+ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ {
+       if (efer & efer_reserved_bits) {
+-              printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
+-                     efer);
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+       if (is_paging(vcpu)
+           && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
+-              printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
+               kvm_inject_gp(vcpu, 0);
+               return;
+       }
+@@ -613,7 +588,6 @@ static void set_efer(struct kvm_vcpu *vc
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+               if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+-                      printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+@@ -624,7 +598,6 @@ static void set_efer(struct kvm_vcpu *vc
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+               if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+-                      printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
diff --git a/queue-2.6.33/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch b/queue-2.6.33/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch
new file mode 100644 (file)
index 0000000..c9751ae
--- /dev/null
@@ -0,0 +1,44 @@
+From a768dbcc203c867d86f8d296dc7a09a34c71e0d4 Mon Sep 17 00:00:00 2001
+From: Andre Przywara <andre.przywara@amd.com>
+Date: Wed, 24 Mar 2010 17:46:42 +0100
+Subject: KVM: allow bit 10 to be cleared in MSR_IA32_MC4_CTL
+
+From: Andre Przywara <andre.przywara@amd.com>
+
+(Cherry-picked from commit 114be429c8cd44e57f312af2bbd6734e5a185b0d)
+
+There is a quirk for AMD K8 CPUs in many Linux kernels (see
+arch/x86/kernel/cpu/mcheck/mce.c:__mcheck_cpu_apply_quirks()) that
+clears bit 10 in that MCE related MSR. KVM can only cope with all
+zeros or all ones, so it will inject a #GP into the guest, which
+will let it panic.
+So lets add a quirk to the quirk and ignore this single cleared bit.
+This fixes -cpu kvm64 on all machines and -cpu host on K8 machines
+with some guest Linux kernels.
+
+Signed-off-by: Andre Przywara <andre.przywara@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/x86.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -886,9 +886,13 @@ static int set_msr_mce(struct kvm_vcpu *
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                       u32 offset = msr - MSR_IA32_MC0_CTL;
+-                      /* only 0 or all 1s can be written to IA32_MCi_CTL */
++                      /* only 0 or all 1s can be written to IA32_MCi_CTL
++                       * some Linux kernels though clear bit 10 in bank 4 to
++                       * workaround a BIOS/GART TBL issue on AMD K8s, ignore
++                       * this to avoid an uncatched #GP in the guest
++                       */
+                       if ((offset & 0x3) == 0 &&
+-                          data != 0 && data != ~(u64)0)
++                          data != 0 && (data | (1 << 10)) != ~(u64)0)
+                               return -1;
+                       vcpu->arch.mce_banks[offset] = data;
+                       break;
diff --git a/queue-2.6.33/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch b/queue-2.6.33/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch
new file mode 100644 (file)
index 0000000..991381f
--- /dev/null
@@ -0,0 +1,99 @@
+From 7cd9dc94c7c44c635def04a9d89d2d2e8e3741bd Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 8 Apr 2010 18:19:35 +0300
+Subject: KVM: VMX: Save/restore rflags.vm correctly in real mode
+
+From: Avi Kivity <avi@redhat.com>
+
+(Cherry-picked from commit 78ac8b47c566dd6177a3b9b291b756ccb70670b7)
+
+Currently we set eflags.vm unconditionally when entering real mode emulation
+through virtual-8086 mode, and clear it unconditionally when we enter protected
+mode.  The means that the following sequence
+
+  KVM_SET_REGS  (rflags.vm=1)
+  KVM_SET_SREGS (cr0.pe=1)
+
+Ends up with rflags.vm clear due to KVM_SET_SREGS triggering enter_pmode().
+
+Fix by shadowing rflags.vm (and rflags.iopl) correctly while in real mode:
+reads and writes to those bits access a shadow register instead of the actual
+register.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/vmx.c |   24 +++++++++++++++---------
+ 1 file changed, 15 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -61,6 +61,8 @@ module_param_named(unrestricted_guest,
+ static int __read_mostly emulate_invalid_guest_state = 0;
+ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
++#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
++
+ /*
+  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+  * ple_gap:    upper bound on the amount of time between two successive
+@@ -115,7 +117,7 @@ struct vcpu_vmx {
+       } host_state;
+       struct {
+               int vm86_active;
+-              u8 save_iopl;
++              ulong save_rflags;
+               struct kvm_save_segment {
+                       u16 selector;
+                       unsigned long base;
+@@ -787,18 +789,23 @@ static void vmx_fpu_deactivate(struct kv
+ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
+ {
+-      unsigned long rflags;
++      unsigned long rflags, save_rflags;
+       rflags = vmcs_readl(GUEST_RFLAGS);
+-      if (to_vmx(vcpu)->rmode.vm86_active)
+-              rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
++      if (to_vmx(vcpu)->rmode.vm86_active) {
++              rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
++              save_rflags = to_vmx(vcpu)->rmode.save_rflags;
++              rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
++      }
+       return rflags;
+ }
+ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+ {
+-      if (to_vmx(vcpu)->rmode.vm86_active)
++      if (to_vmx(vcpu)->rmode.vm86_active) {
++              to_vmx(vcpu)->rmode.save_rflags = rflags;
+               rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
++      }
+       vmcs_writel(GUEST_RFLAGS, rflags);
+ }
+@@ -1431,8 +1438,8 @@ static void enter_pmode(struct kvm_vcpu
+       vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
+       flags = vmcs_readl(GUEST_RFLAGS);
+-      flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
+-      flags |= (vmx->rmode.save_iopl << IOPL_SHIFT);
++      flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
++      flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+       vmcs_writel(GUEST_RFLAGS, flags);
+       vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
+@@ -1501,8 +1508,7 @@ static void enter_rmode(struct kvm_vcpu
+       vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
+       flags = vmcs_readl(GUEST_RFLAGS);
+-      vmx->rmode.save_iopl
+-              = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
++      vmx->rmode.save_rflags = flags;
+       flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
diff --git a/queue-2.6.33/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch b/queue-2.6.33/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch
new file mode 100644 (file)
index 0000000..51bda62
--- /dev/null
@@ -0,0 +1,65 @@
+From b4bb883779169c8713ad93561e427556e9fde384 Mon Sep 17 00:00:00 2001
+From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
+Date: Fri, 16 Apr 2010 16:34:42 +0800
+Subject: KVM: MMU: fix kvm_mmu_zap_page() and its calling path
+
+From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
+
+(Cherry-picked from commit 77662e0028c7c63e34257fda03ff9625c59d939d)
+
+This patch fix:
+
+- calculate zapped page number properly in mmu_zap_unsync_children()
+- calculate freeed page number properly kvm_mmu_change_mmu_pages()
+- if zapped children page it shoud restart hlist walking
+
+KVM-Stable-Tag.
+Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/mmu.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1502,8 +1502,8 @@ static int mmu_zap_unsync_children(struc
+               for_each_sp(pages, sp, parents, i) {
+                       kvm_mmu_zap_page(kvm, sp);
+                       mmu_pages_clear_parents(&parents);
++                      zapped++;
+               }
+-              zapped += pages.nr;
+               kvm_mmu_pages_init(parent, &parents, &pages);
+       }
+@@ -1554,14 +1554,16 @@ void kvm_mmu_change_mmu_pages(struct kvm
+        */
+       if (used_pages > kvm_nr_mmu_pages) {
+-              while (used_pages > kvm_nr_mmu_pages) {
++              while (used_pages > kvm_nr_mmu_pages &&
++                      !list_empty(&kvm->arch.active_mmu_pages)) {
+                       struct kvm_mmu_page *page;
+                       page = container_of(kvm->arch.active_mmu_pages.prev,
+                                           struct kvm_mmu_page, link);
+-                      kvm_mmu_zap_page(kvm, page);
++                      used_pages -= kvm_mmu_zap_page(kvm, page);
+                       used_pages--;
+               }
++              kvm_nr_mmu_pages = used_pages;
+               kvm->arch.n_free_mmu_pages = 0;
+       }
+       else
+@@ -1608,7 +1610,8 @@ static void mmu_unshadow(struct kvm *kvm
+                   && !sp->role.invalid) {
+                       pgprintk("%s: zap %lx %x\n",
+                                __func__, gfn, sp->role.word);
+-                      kvm_mmu_zap_page(kvm, sp);
++                      if (kvm_mmu_zap_page(kvm, sp))
++                              nn = bucket->first;
+               }
+       }
+ }
diff --git a/queue-2.6.33/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch b/queue-2.6.33/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch
new file mode 100644 (file)
index 0000000..74dd65d
--- /dev/null
@@ -0,0 +1,168 @@
+From 6f695855d25cbb27c8306c5147a29cffeacbe7e4 Mon Sep 17 00:00:00 2001
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Date: Mon, 12 Apr 2010 19:35:35 +0900
+Subject: KVM: fix the handling of dirty bitmaps to avoid overflows
+
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+
+(Cherry-picked from commit 87bf6e7de1134f48681fd2ce4b7c1ec45458cb6d)
+
+Int is not long enough to store the size of a dirty bitmap.
+
+This patch fixes this problem with the introduction of a wrapper
+function to calculate the sizes of dirty bitmaps.
+
+Note: in mark_page_dirty(), we have to consider the fact that
+  __set_bit() takes the offset as int, not long.
+
+Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/ia64/kvm/kvm-ia64.c  |    9 +++++----
+ arch/powerpc/kvm/book3s.c |    5 +++--
+ arch/x86/kvm/x86.c        |    4 ++--
+ include/linux/kvm_host.h  |    5 +++++
+ virt/kvm/kvm_main.c       |   13 ++++++++-----
+ 5 files changed, 23 insertions(+), 13 deletions(-)
+
+--- a/arch/ia64/kvm/kvm-ia64.c
++++ b/arch/ia64/kvm/kvm-ia64.c
+@@ -1794,7 +1794,8 @@ static int kvm_ia64_sync_dirty_log(struc
+ {
+       struct kvm_memory_slot *memslot;
+       int r, i;
+-      long n, base;
++      long base;
++      unsigned long n;
+       unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
+                       offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
+@@ -1807,7 +1808,7 @@ static int kvm_ia64_sync_dirty_log(struc
+       if (!memslot->dirty_bitmap)
+               goto out;
+-      n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++      n = kvm_dirty_bitmap_bytes(memslot);
+       base = memslot->base_gfn / BITS_PER_LONG;
+       for (i = 0; i < n/sizeof(long); ++i) {
+@@ -1823,7 +1824,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+               struct kvm_dirty_log *log)
+ {
+       int r;
+-      int n;
++      unsigned long n;
+       struct kvm_memory_slot *memslot;
+       int is_dirty = 0;
+@@ -1841,7 +1842,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+       if (is_dirty) {
+               kvm_flush_remote_tlbs(kvm);
+               memslot = &kvm->memslots[log->slot];
+-              n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++              n = kvm_dirty_bitmap_bytes(memslot);
+               memset(memslot->dirty_bitmap, 0, n);
+       }
+       r = 0;
+--- a/arch/powerpc/kvm/book3s.c
++++ b/arch/powerpc/kvm/book3s.c
+@@ -848,7 +848,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+       struct kvm_vcpu *vcpu;
+       ulong ga, ga_end;
+       int is_dirty = 0;
+-      int r, n;
++      int r;
++      unsigned long n;
+       down_write(&kvm->slots_lock);
+@@ -866,7 +867,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+               kvm_for_each_vcpu(n, vcpu, kvm)
+                       kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
+-              n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++              n = kvm_dirty_bitmap_bytes(memslot);
+               memset(memslot->dirty_bitmap, 0, n);
+       }
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2343,7 +2343,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+                                     struct kvm_dirty_log *log)
+ {
+       int r;
+-      int n;
++      unsigned long n;
+       struct kvm_memory_slot *memslot;
+       int is_dirty = 0;
+@@ -2359,7 +2359,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+               kvm_mmu_slot_remove_write_access(kvm, log->slot);
+               spin_unlock(&kvm->mmu_lock);
+               memslot = &kvm->memslots[log->slot];
+-              n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++              n = kvm_dirty_bitmap_bytes(memslot);
+               memset(memslot->dirty_bitmap, 0, n);
+       }
+       r = 0;
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -116,6 +116,11 @@ struct kvm_memory_slot {
+       int user_alloc;
+ };
++static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
++{
++      return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++}
++
+ struct kvm_kernel_irq_routing_entry {
+       u32 gsi;
+       u32 type;
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -636,7 +636,7 @@ skip_lpage:
+       /* Allocate page dirty bitmap if needed */
+       if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
+-              unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
++              unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
+               new.dirty_bitmap = vmalloc(dirty_bytes);
+               if (!new.dirty_bitmap)
+@@ -719,7 +719,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
+ {
+       struct kvm_memory_slot *memslot;
+       int r, i;
+-      int n;
++      unsigned long n;
+       unsigned long any = 0;
+       r = -EINVAL;
+@@ -731,7 +731,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
+       if (!memslot->dirty_bitmap)
+               goto out;
+-      n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
++      n = kvm_dirty_bitmap_bytes(memslot);
+       for (i = 0; !any && i < n/sizeof(long); ++i)
+               any = memslot->dirty_bitmap[i];
+@@ -1073,10 +1073,13 @@ void mark_page_dirty(struct kvm *kvm, gf
+       memslot = gfn_to_memslot_unaliased(kvm, gfn);
+       if (memslot && memslot->dirty_bitmap) {
+               unsigned long rel_gfn = gfn - memslot->base_gfn;
++              unsigned long *p = memslot->dirty_bitmap +
++                                      rel_gfn / BITS_PER_LONG;
++              int offset = rel_gfn % BITS_PER_LONG;
+               /* avoid RMW */
+-              if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap))
+-                      generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
++              if (!generic_test_le_bit(offset, p))
++                      generic___set_le_bit(offset, p);
+       }
+ }
diff --git a/queue-2.6.33/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch b/queue-2.6.33/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch
new file mode 100644 (file)
index 0000000..c1a90ae
--- /dev/null
@@ -0,0 +1,33 @@
+From 96c6ce5c0567a909a5f0d96bdec7786c63dbb5c6 Mon Sep 17 00:00:00 2001
+From: Sridhar Samudrala <sri@us.ibm.com>
+Date: Tue, 30 Mar 2010 16:48:25 -0700
+Subject: KVM: Increase NR_IOBUS_DEVS limit to 200
+
+From: Sridhar Samudrala <sri@us.ibm.com>
+
+(Cherry-picked from commit e80e2a60ff7914dae691345a976c80bbbff3ec74)
+
+This patch increases the current hardcoded limit of NR_IOBUS_DEVS
+from 6 to 200. We are hitting this limit when creating a guest with more
+than 1 virtio-net device using vhost-net backend. Each virtio-net
+device requires 2 such devices to service notifications from rx/tx queues.
+
+Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/kvm_host.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -53,7 +53,7 @@ extern struct kmem_cache *kvm_vcpu_cache
+  */
+ struct kvm_io_bus {
+       int                   dev_count;
+-#define NR_IOBUS_DEVS 6
++#define NR_IOBUS_DEVS 200
+       struct kvm_io_device *devs[NR_IOBUS_DEVS];
+ };
diff --git a/queue-2.6.33/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch b/queue-2.6.33/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch
new file mode 100644 (file)
index 0000000..dd55218
--- /dev/null
@@ -0,0 +1,42 @@
+From 2ffd9161aedc6e4f2591d1978a936258b3f98528 Mon Sep 17 00:00:00 2001
+From: Jan Kiszka <jan.kiszka@siemens.com>
+Date: Wed, 14 Apr 2010 16:57:11 +0200
+Subject: KVM: x86: Fix TSS size check for 16-bit tasks
+
+From: Jan Kiszka <jan.kiszka@siemens.com>
+
+(Cherry-picked from commit e8861cfe2c75bdce36655b64d7ce02c2b31b604d)
+
+A 16-bit TSS is only 44 bytes long. So make sure to test for the correct
+size on task switch.
+
+Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kvm/x86.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4576,6 +4576,7 @@ int kvm_task_switch(struct kvm_vcpu *vcp
+       int ret = 0;
+       u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
+       u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
++      u32 desc_limit;
+       old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
+@@ -4598,7 +4599,10 @@ int kvm_task_switch(struct kvm_vcpu *vcp
+               }
+       }
+-      if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
++      desc_limit = get_desc_limit(&nseg_desc);
++      if (!nseg_desc.p ||
++          ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
++           desc_limit < 0x2b)) {
+               kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
+               return 1;
+       }
diff --git a/queue-2.6.33/nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch b/queue-2.6.33/nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch
deleted file mode 100644 (file)
index 5cc565c..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-From dc83d6e27fa80babe31c80aa8568f125f72edf57 Mon Sep 17 00:00:00 2001
-From: J. Bruce Fields <bfields@citi.umich.edu>
-Date: Tue, 20 Oct 2009 18:51:34 -0400
-Subject: nfsd4: don't try to map gid's in generic rpc code
-
-From: J. Bruce Fields <bfields@citi.umich.edu>
-
-commit dc83d6e27fa80babe31c80aa8568f125f72edf57 upstream.
-
-For nfsd we provide users the option of mapping uid's to server-side
-supplementary group lists.  That makes sense for nfsd, but not
-necessarily for other rpc users (such as the callback client).
-
-So move that lookup to svcauth_unix_set_client, which is a
-program-specific method.
-
-Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
-Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-
---- a/net/sunrpc/svcauth_unix.c
-+++ b/net/sunrpc/svcauth_unix.c
-@@ -655,23 +655,25 @@ static struct unix_gid *unix_gid_lookup(uid_t uid)
-               return NULL;
- }
--static int unix_gid_find(uid_t uid, struct group_info **gip,
--                       struct svc_rqst *rqstp)
-+static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
- {
--      struct unix_gid *ug = unix_gid_lookup(uid);
-+      struct unix_gid *ug;
-+      struct group_info *gi;
-+      int ret;
-+
-+      ug = unix_gid_lookup(uid);
-       if (!ug)
--              return -EAGAIN;
--      switch (cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle)) {
-+              return ERR_PTR(-EAGAIN);
-+      ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle);
-+      switch (ret) {
-       case -ENOENT:
--              *gip = NULL;
--              return 0;
-+              return ERR_PTR(-ENOENT);
-       case 0:
--              *gip = ug->gi;
--              get_group_info(*gip);
-+              gi = get_group_info(ug->gi);
-               cache_put(&ug->h, &unix_gid_cache);
--              return 0;
-+              return gi;
-       default:
--              return -EAGAIN;
-+              return ERR_PTR(-EAGAIN);
-       }
- }
-@@ -681,6 +683,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
-       struct sockaddr_in *sin;
-       struct sockaddr_in6 *sin6, sin6_storage;
-       struct ip_map *ipm;
-+      struct group_info *gi;
-+      struct svc_cred *cred = &rqstp->rq_cred;
-       switch (rqstp->rq_addr.ss_family) {
-       case AF_INET:
-@@ -722,6 +726,17 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
-                       ip_map_cached_put(rqstp, ipm);
-                       break;
-       }
-+
-+      gi = unix_gid_find(cred->cr_uid, rqstp);
-+      switch (PTR_ERR(gi)) {
-+      case -EAGAIN:
-+              return SVC_DROP;
-+      case -ENOENT:
-+              break;
-+      default:
-+              put_group_info(cred->cr_group_info);
-+              cred->cr_group_info = gi;
-+      }
-       return SVC_OK;
- }
-@@ -818,19 +833,11 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
-       slen = svc_getnl(argv);                 /* gids length */
-       if (slen > 16 || (len -= (slen + 2)*4) < 0)
-               goto badcred;
--      if (unix_gid_find(cred->cr_uid, &cred->cr_group_info, rqstp)
--          == -EAGAIN)
-+      cred->cr_group_info = groups_alloc(slen);
-+      if (cred->cr_group_info == NULL)
-               return SVC_DROP;
--      if (cred->cr_group_info == NULL) {
--              cred->cr_group_info = groups_alloc(slen);
--              if (cred->cr_group_info == NULL)
--                      return SVC_DROP;
--              for (i = 0; i < slen; i++)
--                      GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
--      } else {
--              for (i = 0; i < slen ; i++)
--                      svc_getnl(argv);
--      }
-+      for (i = 0; i < slen; i++)
-+              GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
-       if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
-               *authp = rpc_autherr_badverf;
-               return SVC_DENIED;
diff --git a/queue-2.6.33/sched-use-proper-type-in-sched_getaffinity.patch b/queue-2.6.33/sched-use-proper-type-in-sched_getaffinity.patch
new file mode 100644 (file)
index 0000000..8be8456
--- /dev/null
@@ -0,0 +1,40 @@
+From 8bc037fb89bb3104b9ae290d18c877624cd7d9cc Mon Sep 17 00:00:00 2001
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Date: Wed, 17 Mar 2010 09:36:58 +0900
+Subject: sched: Use proper type in sched_getaffinity()
+
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+
+commit 8bc037fb89bb3104b9ae290d18c877624cd7d9cc upstream.
+
+Using the proper type fixes the following compiler warning:
+
+  kernel/sched.c:4850: warning: comparison of distinct pointer types lacks a cast
+
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: torvalds@linux-foundation.org
+Cc: travis@sgi.com
+Cc: peterz@infradead.org
+Cc: drepper@redhat.com
+Cc: rja@sgi.com
+Cc: sharyath@in.ibm.com
+Cc: steiner@sgi.com
+LKML-Reference: <20100317090046.4C79.A69D9226@jp.fujitsu.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6727,7 +6727,7 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t
+       ret = sched_getaffinity(pid, mask);
+       if (ret == 0) {
+-              int retlen = min(len, cpumask_size());
++              size_t retlen = min_t(size_t, len, cpumask_size());
+               if (copy_to_user(user_mask_ptr, mask, retlen))
+                       ret = -EFAULT;
index 5fbb49d04e8623560891270f17bc8bf6e7bd4b2e..718b8cc2c2a3e62fe60c6b570e467191acd9955f 100644 (file)
@@ -125,4 +125,13 @@ module-fix-__module_ref_addr.patch
 md-deal-with-merge_bvec_fn-in-component-devices-better.patch
 powerpc-fix-smp-build-with-disabled-cpu-hotplugging.patch
 ext4-fix-async-i-o-writes-beyond-4gb-to-a-sparse-file.patch
-nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch
+sched-use-proper-type-in-sched_getaffinity.patch
+0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch
+0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch
+0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch
+0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch
+0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch
+0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch
+0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch
+0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch
+0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch