From b8510c843c1d82afa10efbe2f73cc39b1e44d5f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 Apr 2010 09:15:12 -0700 Subject: [PATCH] .33 patches --- ...instruction-length-on-intercepted-BP.patch | 52 ++++++ ...ry-leaks-that-happen-when-svm_create.patch | 74 ++++++++ ...ernel-log-when-injecting-exceptions-.patch | 158 ++++++++++++++++ ...10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch | 44 +++++ ...tore-rflags.vm-correctly-in-real-mod.patch | 99 +++++++++++ ...vm_mmu_zap_page-and-its-calling-path.patch | 65 +++++++ ...ling-of-dirty-bitmaps-to-avoid-overf.patch | 168 ++++++++++++++++++ ...-Increase-NR_IOBUS_DEVS-limit-to-200.patch | 33 ++++ ...-Fix-TSS-size-check-for-16-bit-tasks.patch | 42 +++++ ...try-to-map-gid-s-in-generic-rpc-code.patch | 109 ------------ ...use-proper-type-in-sched_getaffinity.patch | 40 +++++ queue-2.6.33/series | 11 +- 12 files changed, 785 insertions(+), 110 deletions(-) create mode 100644 queue-2.6.33/0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch create mode 100644 queue-2.6.33/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch create mode 100644 queue-2.6.33/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch create mode 100644 queue-2.6.33/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch create mode 100644 queue-2.6.33/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch create mode 100644 queue-2.6.33/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch create mode 100644 queue-2.6.33/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch create mode 100644 queue-2.6.33/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch create mode 100644 queue-2.6.33/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch delete mode 100644 queue-2.6.33/nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch create mode 100644 queue-2.6.33/sched-use-proper-type-in-sched_getaffinity.patch diff --git a/queue-2.6.33/0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch b/queue-2.6.33/0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch new file mode 100644 index 00000000000..cbd21ae9a46 --- /dev/null +++ b/queue-2.6.33/0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch @@ -0,0 +1,52 @@ +From 2e7e0afbb8be1a21a592da38c85c7fe68763f933 Mon Sep 17 00:00:00 2001 +From: Jan Kiszka +Date: Tue, 23 Feb 2010 17:47:53 +0100 +Subject: KVM: VMX: Update instruction length on intercepted BP + +From: Jan Kiszka + +(Cherry-picked from commit c573cd22939e54fc1b8e672054a505048987a7cb) + +We intercept #BP while in guest debugging mode. As VM exits due to +intercepted exceptions do not necessarily come with valid +idt_vectoring, we have to update event_exit_inst_len explicitly in such +cases. At least in the absence of migration, this ensures that +re-injections of #BP will find and use the correct instruction length. + +Signed-off-by: Jan Kiszka +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2719,6 +2719,12 @@ static int handle_rmode_exception(struct + kvm_queue_exception(vcpu, vec); + return 1; + case BP_VECTOR: ++ /* ++ * Update instruction length as we may reinject the exception ++ * from user space while in guest debugging mode. ++ */ ++ to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = ++ vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + return 0; + /* fall through */ +@@ -2841,6 +2847,13 @@ static int handle_exception(struct kvm_v + kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); + /* fall through */ + case BP_VECTOR: ++ /* ++ * Update instruction length as we may reinject #BP from ++ * user space while in guest debugging mode. Reading it for ++ * #DB as well causes no harm, it is not used in that case. ++ */ ++ vmx->vcpu.arch.event_exit_inst_len = ++ vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; + kvm_run->debug.arch.exception = ex_no; diff --git a/queue-2.6.33/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch b/queue-2.6.33/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch new file mode 100644 index 00000000000..0b145c7e5e1 --- /dev/null +++ b/queue-2.6.33/0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch @@ -0,0 +1,74 @@ +From c84211fe3f1ea88493688fe417639aa2a9b0edef Mon Sep 17 00:00:00 2001 +From: Takuya Yoshikawa +Date: Tue, 9 Mar 2010 14:55:19 +0900 +Subject: KVM: SVM: Fix memory leaks that happen when svm_create_vcpu() fails + +From: Takuya Yoshikawa + +(Cherry-picked from commit b7af40433870aa0636932ad39b0c48a0cb319057) + +svm_create_vcpu() does not free the pages allocated during the creation +when it fails to complete the allocations. This patch fixes it. + +Signed-off-by: Takuya Yoshikawa +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/svm.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -698,29 +698,28 @@ static struct kvm_vcpu *svm_create_vcpu( + if (err) + goto free_svm; + ++ err = -ENOMEM; + page = alloc_page(GFP_KERNEL); +- if (!page) { +- err = -ENOMEM; ++ if (!page) + goto uninit; +- } + +- err = -ENOMEM; + msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + if (!msrpm_pages) +- goto uninit; ++ goto free_page1; + + nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + if (!nested_msrpm_pages) +- goto uninit; +- +- svm->msrpm = page_address(msrpm_pages); +- svm_vcpu_init_msrpm(svm->msrpm); ++ goto free_page2; + + hsave_page = alloc_page(GFP_KERNEL); + if (!hsave_page) +- goto uninit; ++ goto free_page3; ++ + svm->nested.hsave = page_address(hsave_page); + ++ svm->msrpm = page_address(msrpm_pages); ++ svm_vcpu_init_msrpm(svm->msrpm); ++ + svm->nested.msrpm = page_address(nested_msrpm_pages); + + svm->vmcb = page_address(page); +@@ -737,6 +736,12 @@ static struct kvm_vcpu *svm_create_vcpu( + + return &svm->vcpu; + ++free_page3: ++ __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); ++free_page2: ++ __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); ++free_page1: ++ __free_page(page); + uninit: + kvm_vcpu_uninit(&svm->vcpu); + free_svm: diff --git a/queue-2.6.33/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch b/queue-2.6.33/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch new file mode 100644 index 00000000000..fc634865d74 --- /dev/null +++ b/queue-2.6.33/0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch @@ -0,0 +1,158 @@ +From 654be416ec97b546e6c4f1cbe6de6a7571a6ad23 Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Thu, 11 Mar 2010 12:20:03 +0200 +Subject: KVM: Don't spam kernel log when injecting exceptions due to bad cr writes + +From: Avi Kivity + +(Cherry-picked from commit d6a23895aa82353788a1cc5a1d9a1c963465463e) + +These are guest-triggerable. + +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 27 --------------------------- + 1 file changed, 27 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -384,21 +384,16 @@ out: + void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) + { + if (cr0 & CR0_RESERVED_BITS) { +- printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", +- cr0, vcpu->arch.cr0); + kvm_inject_gp(vcpu, 0); + return; + } + + if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { +- printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); + kvm_inject_gp(vcpu, 0); + return; + } + + if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { +- printk(KERN_DEBUG "set_cr0: #GP, set PG flag " +- "and a clear PE flag\n"); + kvm_inject_gp(vcpu, 0); + return; + } +@@ -409,15 +404,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, + int cs_db, cs_l; + + if (!is_pae(vcpu)) { +- printk(KERN_DEBUG "set_cr0: #GP, start paging " +- "in long mode while PAE is disabled\n"); + kvm_inject_gp(vcpu, 0); + return; + } + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + if (cs_l) { +- printk(KERN_DEBUG "set_cr0: #GP, start paging " +- "in long mode while CS.L == 1\n"); + kvm_inject_gp(vcpu, 0); + return; + +@@ -425,8 +416,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, + } else + #endif + if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { +- printk(KERN_DEBUG "set_cr0: #GP, pdptrs " +- "reserved bits\n"); + kvm_inject_gp(vcpu, 0); + return; + } +@@ -453,28 +442,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; + + if (cr4 & CR4_RESERVED_BITS) { +- printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); + kvm_inject_gp(vcpu, 0); + return; + } + + if (is_long_mode(vcpu)) { + if (!(cr4 & X86_CR4_PAE)) { +- printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " +- "in long mode\n"); + kvm_inject_gp(vcpu, 0); + return; + } + } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) + && ((cr4 ^ old_cr4) & pdptr_bits) + && !load_pdptrs(vcpu, vcpu->arch.cr3)) { +- printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); + kvm_inject_gp(vcpu, 0); + return; + } + + if (cr4 & X86_CR4_VMXE) { +- printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); + kvm_inject_gp(vcpu, 0); + return; + } +@@ -495,21 +479,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, + + if (is_long_mode(vcpu)) { + if (cr3 & CR3_L_MODE_RESERVED_BITS) { +- printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); + kvm_inject_gp(vcpu, 0); + return; + } + } else { + if (is_pae(vcpu)) { + if (cr3 & CR3_PAE_RESERVED_BITS) { +- printk(KERN_DEBUG +- "set_cr3: #GP, reserved bits\n"); + kvm_inject_gp(vcpu, 0); + return; + } + if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { +- printk(KERN_DEBUG "set_cr3: #GP, pdptrs " +- "reserved bits\n"); + kvm_inject_gp(vcpu, 0); + return; + } +@@ -541,7 +520,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3); + void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) + { + if (cr8 & CR8_RESERVED_BITS) { +- printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); + kvm_inject_gp(vcpu, 0); + return; + } +@@ -595,15 +573,12 @@ static u32 emulated_msrs[] = { + static void set_efer(struct kvm_vcpu *vcpu, u64 efer) + { + if (efer & efer_reserved_bits) { +- printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", +- efer); + kvm_inject_gp(vcpu, 0); + return; + } + + if (is_paging(vcpu) + && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { +- printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); + kvm_inject_gp(vcpu, 0); + return; + } +@@ -613,7 +588,6 @@ static void set_efer(struct kvm_vcpu *vc + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { +- printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); + kvm_inject_gp(vcpu, 0); + return; + } +@@ -624,7 +598,6 @@ static void set_efer(struct kvm_vcpu *vc + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { +- printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); + kvm_inject_gp(vcpu, 0); + return; + } diff --git a/queue-2.6.33/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch b/queue-2.6.33/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch new file mode 100644 index 00000000000..c9751aef91d --- /dev/null +++ b/queue-2.6.33/0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch @@ -0,0 +1,44 @@ +From a768dbcc203c867d86f8d296dc7a09a34c71e0d4 Mon Sep 17 00:00:00 2001 +From: Andre Przywara +Date: Wed, 24 Mar 2010 17:46:42 +0100 +Subject: KVM: allow bit 10 to be cleared in MSR_IA32_MC4_CTL + +From: Andre Przywara + +(Cherry-picked from commit 114be429c8cd44e57f312af2bbd6734e5a185b0d) + +There is a quirk for AMD K8 CPUs in many Linux kernels (see +arch/x86/kernel/cpu/mcheck/mce.c:__mcheck_cpu_apply_quirks()) that +clears bit 10 in that MCE related MSR. KVM can only cope with all +zeros or all ones, so it will inject a #GP into the guest, which +will let it panic. +So lets add a quirk to the quirk and ignore this single cleared bit. +This fixes -cpu kvm64 on all machines and -cpu host on K8 machines +with some guest Linux kernels. + +Signed-off-by: Andre Przywara +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -886,9 +886,13 @@ static int set_msr_mce(struct kvm_vcpu * + if (msr >= MSR_IA32_MC0_CTL && + msr < MSR_IA32_MC0_CTL + 4 * bank_num) { + u32 offset = msr - MSR_IA32_MC0_CTL; +- /* only 0 or all 1s can be written to IA32_MCi_CTL */ ++ /* only 0 or all 1s can be written to IA32_MCi_CTL ++ * some Linux kernels though clear bit 10 in bank 4 to ++ * workaround a BIOS/GART TBL issue on AMD K8s, ignore ++ * this to avoid an uncatched #GP in the guest ++ */ + if ((offset & 0x3) == 0 && +- data != 0 && data != ~(u64)0) ++ data != 0 && (data | (1 << 10)) != ~(u64)0) + return -1; + vcpu->arch.mce_banks[offset] = data; + break; diff --git a/queue-2.6.33/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch b/queue-2.6.33/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch new file mode 100644 index 00000000000..991381fb38a --- /dev/null +++ b/queue-2.6.33/0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch @@ -0,0 +1,99 @@ +From 7cd9dc94c7c44c635def04a9d89d2d2e8e3741bd Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Thu, 8 Apr 2010 18:19:35 +0300 +Subject: KVM: VMX: Save/restore rflags.vm correctly in real mode + +From: Avi Kivity + +(Cherry-picked from commit 78ac8b47c566dd6177a3b9b291b756ccb70670b7) + +Currently we set eflags.vm unconditionally when entering real mode emulation +through virtual-8086 mode, and clear it unconditionally when we enter protected +mode. The means that the following sequence + + KVM_SET_REGS (rflags.vm=1) + KVM_SET_SREGS (cr0.pe=1) + +Ends up with rflags.vm clear due to KVM_SET_SREGS triggering enter_pmode(). + +Fix by shadowing rflags.vm (and rflags.iopl) correctly while in real mode: +reads and writes to those bits access a shadow register instead of the actual +register. + +Signed-off-by: Avi Kivity +Signed-off-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 24 +++++++++++++++--------- + 1 file changed, 15 insertions(+), 9 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -61,6 +61,8 @@ module_param_named(unrestricted_guest, + static int __read_mostly emulate_invalid_guest_state = 0; + module_param(emulate_invalid_guest_state, bool, S_IRUGO); + ++#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) ++ + /* + * These 2 parameters are used to config the controls for Pause-Loop Exiting: + * ple_gap: upper bound on the amount of time between two successive +@@ -115,7 +117,7 @@ struct vcpu_vmx { + } host_state; + struct { + int vm86_active; +- u8 save_iopl; ++ ulong save_rflags; + struct kvm_save_segment { + u16 selector; + unsigned long base; +@@ -787,18 +789,23 @@ static void vmx_fpu_deactivate(struct kv + + static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) + { +- unsigned long rflags; ++ unsigned long rflags, save_rflags; + + rflags = vmcs_readl(GUEST_RFLAGS); +- if (to_vmx(vcpu)->rmode.vm86_active) +- rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); ++ if (to_vmx(vcpu)->rmode.vm86_active) { ++ rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; ++ save_rflags = to_vmx(vcpu)->rmode.save_rflags; ++ rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; ++ } + return rflags; + } + + static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) + { +- if (to_vmx(vcpu)->rmode.vm86_active) ++ if (to_vmx(vcpu)->rmode.vm86_active) { ++ to_vmx(vcpu)->rmode.save_rflags = rflags; + rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; ++ } + vmcs_writel(GUEST_RFLAGS, rflags); + } + +@@ -1431,8 +1438,8 @@ static void enter_pmode(struct kvm_vcpu + vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); + + flags = vmcs_readl(GUEST_RFLAGS); +- flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); +- flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); ++ flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; ++ flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + vmcs_writel(GUEST_RFLAGS, flags); + + vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | +@@ -1501,8 +1508,7 @@ static void enter_rmode(struct kvm_vcpu + vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); + + flags = vmcs_readl(GUEST_RFLAGS); +- vmx->rmode.save_iopl +- = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; ++ vmx->rmode.save_rflags = flags; + + flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; + diff --git a/queue-2.6.33/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch b/queue-2.6.33/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch new file mode 100644 index 00000000000..51bda6233ac --- /dev/null +++ b/queue-2.6.33/0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch @@ -0,0 +1,65 @@ +From b4bb883779169c8713ad93561e427556e9fde384 Mon Sep 17 00:00:00 2001 +From: Xiao Guangrong +Date: Fri, 16 Apr 2010 16:34:42 +0800 +Subject: KVM: MMU: fix kvm_mmu_zap_page() and its calling path + +From: Xiao Guangrong + +(Cherry-picked from commit 77662e0028c7c63e34257fda03ff9625c59d939d) + +This patch fix: + +- calculate zapped page number properly in mmu_zap_unsync_children() +- calculate freeed page number properly kvm_mmu_change_mmu_pages() +- if zapped children page it shoud restart hlist walking + +KVM-Stable-Tag. +Signed-off-by: Xiao Guangrong +Signed-off-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/mmu.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -1502,8 +1502,8 @@ static int mmu_zap_unsync_children(struc + for_each_sp(pages, sp, parents, i) { + kvm_mmu_zap_page(kvm, sp); + mmu_pages_clear_parents(&parents); ++ zapped++; + } +- zapped += pages.nr; + kvm_mmu_pages_init(parent, &parents, &pages); + } + +@@ -1554,14 +1554,16 @@ void kvm_mmu_change_mmu_pages(struct kvm + */ + + if (used_pages > kvm_nr_mmu_pages) { +- while (used_pages > kvm_nr_mmu_pages) { ++ while (used_pages > kvm_nr_mmu_pages && ++ !list_empty(&kvm->arch.active_mmu_pages)) { + struct kvm_mmu_page *page; + + page = container_of(kvm->arch.active_mmu_pages.prev, + struct kvm_mmu_page, link); +- kvm_mmu_zap_page(kvm, page); ++ used_pages -= kvm_mmu_zap_page(kvm, page); + used_pages--; + } ++ kvm_nr_mmu_pages = used_pages; + kvm->arch.n_free_mmu_pages = 0; + } + else +@@ -1608,7 +1610,8 @@ static void mmu_unshadow(struct kvm *kvm + && !sp->role.invalid) { + pgprintk("%s: zap %lx %x\n", + __func__, gfn, sp->role.word); +- kvm_mmu_zap_page(kvm, sp); ++ if (kvm_mmu_zap_page(kvm, sp)) ++ nn = bucket->first; + } + } + } diff --git a/queue-2.6.33/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch b/queue-2.6.33/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch new file mode 100644 index 00000000000..74dd65d657f --- /dev/null +++ b/queue-2.6.33/0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch @@ -0,0 +1,168 @@ +From 6f695855d25cbb27c8306c5147a29cffeacbe7e4 Mon Sep 17 00:00:00 2001 +From: Takuya Yoshikawa +Date: Mon, 12 Apr 2010 19:35:35 +0900 +Subject: KVM: fix the handling of dirty bitmaps to avoid overflows + +From: Takuya Yoshikawa + +(Cherry-picked from commit 87bf6e7de1134f48681fd2ce4b7c1ec45458cb6d) + +Int is not long enough to store the size of a dirty bitmap. + +This patch fixes this problem with the introduction of a wrapper +function to calculate the sizes of dirty bitmaps. + +Note: in mark_page_dirty(), we have to consider the fact that + __set_bit() takes the offset as int, not long. + +Signed-off-by: Takuya Yoshikawa +Signed-off-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman + +--- + arch/ia64/kvm/kvm-ia64.c | 9 +++++---- + arch/powerpc/kvm/book3s.c | 5 +++-- + arch/x86/kvm/x86.c | 4 ++-- + include/linux/kvm_host.h | 5 +++++ + virt/kvm/kvm_main.c | 13 ++++++++----- + 5 files changed, 23 insertions(+), 13 deletions(-) + +--- a/arch/ia64/kvm/kvm-ia64.c ++++ b/arch/ia64/kvm/kvm-ia64.c +@@ -1794,7 +1794,8 @@ static int kvm_ia64_sync_dirty_log(struc + { + struct kvm_memory_slot *memslot; + int r, i; +- long n, base; ++ long base; ++ unsigned long n; + unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + + offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); + +@@ -1807,7 +1808,7 @@ static int kvm_ia64_sync_dirty_log(struc + if (!memslot->dirty_bitmap) + goto out; + +- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; ++ n = kvm_dirty_bitmap_bytes(memslot); + base = memslot->base_gfn / BITS_PER_LONG; + + for (i = 0; i < n/sizeof(long); ++i) { +@@ -1823,7 +1824,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv + struct kvm_dirty_log *log) + { + int r; +- int n; ++ unsigned long n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + +@@ -1841,7 +1842,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv + if (is_dirty) { + kvm_flush_remote_tlbs(kvm); + memslot = &kvm->memslots[log->slot]; +- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; ++ n = kvm_dirty_bitmap_bytes(memslot); + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +--- a/arch/powerpc/kvm/book3s.c ++++ b/arch/powerpc/kvm/book3s.c +@@ -848,7 +848,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kv + struct kvm_vcpu *vcpu; + ulong ga, ga_end; + int is_dirty = 0; +- int r, n; ++ int r; ++ unsigned long n; + + down_write(&kvm->slots_lock); + +@@ -866,7 +867,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv + kvm_for_each_vcpu(n, vcpu, kvm) + kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); + +- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; ++ n = kvm_dirty_bitmap_bytes(memslot); + memset(memslot->dirty_bitmap, 0, n); + } + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -2343,7 +2343,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv + struct kvm_dirty_log *log) + { + int r; +- int n; ++ unsigned long n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + +@@ -2359,7 +2359,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv + kvm_mmu_slot_remove_write_access(kvm, log->slot); + spin_unlock(&kvm->mmu_lock); + memslot = &kvm->memslots[log->slot]; +- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; ++ n = kvm_dirty_bitmap_bytes(memslot); + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -116,6 +116,11 @@ struct kvm_memory_slot { + int user_alloc; + }; + ++static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) ++{ ++ return ALIGN(memslot->npages, BITS_PER_LONG) / 8; ++} ++ + struct kvm_kernel_irq_routing_entry { + u32 gsi; + u32 type; +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -636,7 +636,7 @@ skip_lpage: + + /* Allocate page dirty bitmap if needed */ + if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { +- unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; ++ unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); + + new.dirty_bitmap = vmalloc(dirty_bytes); + if (!new.dirty_bitmap) +@@ -719,7 +719,7 @@ int kvm_get_dirty_log(struct kvm *kvm, + { + struct kvm_memory_slot *memslot; + int r, i; +- int n; ++ unsigned long n; + unsigned long any = 0; + + r = -EINVAL; +@@ -731,7 +731,7 @@ int kvm_get_dirty_log(struct kvm *kvm, + if (!memslot->dirty_bitmap) + goto out; + +- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; ++ n = kvm_dirty_bitmap_bytes(memslot); + + for (i = 0; !any && i < n/sizeof(long); ++i) + any = memslot->dirty_bitmap[i]; +@@ -1073,10 +1073,13 @@ void mark_page_dirty(struct kvm *kvm, gf + memslot = gfn_to_memslot_unaliased(kvm, gfn); + if (memslot && memslot->dirty_bitmap) { + unsigned long rel_gfn = gfn - memslot->base_gfn; ++ unsigned long *p = memslot->dirty_bitmap + ++ rel_gfn / BITS_PER_LONG; ++ int offset = rel_gfn % BITS_PER_LONG; + + /* avoid RMW */ +- if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) +- generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); ++ if (!generic_test_le_bit(offset, p)) ++ generic___set_le_bit(offset, p); + } + } + diff --git a/queue-2.6.33/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch b/queue-2.6.33/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch new file mode 100644 index 00000000000..c1a90ae3f53 --- /dev/null +++ b/queue-2.6.33/0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch @@ -0,0 +1,33 @@ +From 96c6ce5c0567a909a5f0d96bdec7786c63dbb5c6 Mon Sep 17 00:00:00 2001 +From: Sridhar Samudrala +Date: Tue, 30 Mar 2010 16:48:25 -0700 +Subject: KVM: Increase NR_IOBUS_DEVS limit to 200 + +From: Sridhar Samudrala + +(Cherry-picked from commit e80e2a60ff7914dae691345a976c80bbbff3ec74) + +This patch increases the current hardcoded limit of NR_IOBUS_DEVS +from 6 to 200. We are hitting this limit when creating a guest with more +than 1 virtio-net device using vhost-net backend. Each virtio-net +device requires 2 such devices to service notifications from rx/tx queues. + +Signed-off-by: Sridhar Samudrala +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/kvm_host.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -53,7 +53,7 @@ extern struct kmem_cache *kvm_vcpu_cache + */ + struct kvm_io_bus { + int dev_count; +-#define NR_IOBUS_DEVS 6 ++#define NR_IOBUS_DEVS 200 + struct kvm_io_device *devs[NR_IOBUS_DEVS]; + }; + diff --git a/queue-2.6.33/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch b/queue-2.6.33/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch new file mode 100644 index 00000000000..dd552187e7a --- /dev/null +++ b/queue-2.6.33/0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch @@ -0,0 +1,42 @@ +From 2ffd9161aedc6e4f2591d1978a936258b3f98528 Mon Sep 17 00:00:00 2001 +From: Jan Kiszka +Date: Wed, 14 Apr 2010 16:57:11 +0200 +Subject: KVM: x86: Fix TSS size check for 16-bit tasks + +From: Jan Kiszka + +(Cherry-picked from commit e8861cfe2c75bdce36655b64d7ce02c2b31b604d) + +A 16-bit TSS is only 44 bytes long. So make sure to test for the correct +size on task switch. + +Signed-off-by: Jan Kiszka +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4576,6 +4576,7 @@ int kvm_task_switch(struct kvm_vcpu *vcp + int ret = 0; + u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); + u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); ++ u32 desc_limit; + + old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); + +@@ -4598,7 +4599,10 @@ int kvm_task_switch(struct kvm_vcpu *vcp + } + } + +- if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { ++ desc_limit = get_desc_limit(&nseg_desc); ++ if (!nseg_desc.p || ++ ((desc_limit < 0x67 && (nseg_desc.type & 8)) || ++ desc_limit < 0x2b)) { + kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); + return 1; + } diff --git a/queue-2.6.33/nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch b/queue-2.6.33/nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch deleted file mode 100644 index 5cc565c49ab..00000000000 --- a/queue-2.6.33/nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch +++ /dev/null @@ -1,109 +0,0 @@ -From dc83d6e27fa80babe31c80aa8568f125f72edf57 Mon Sep 17 00:00:00 2001 -From: J. Bruce Fields -Date: Tue, 20 Oct 2009 18:51:34 -0400 -Subject: nfsd4: don't try to map gid's in generic rpc code - -From: J. Bruce Fields - -commit dc83d6e27fa80babe31c80aa8568f125f72edf57 upstream. - -For nfsd we provide users the option of mapping uid's to server-side -supplementary group lists. That makes sense for nfsd, but not -necessarily for other rpc users (such as the callback client). - -So move that lookup to svcauth_unix_set_client, which is a -program-specific method. - -Signed-off-by: J. Bruce Fields -Signed-off-by: Greg Kroah-Hartman - ---- a/net/sunrpc/svcauth_unix.c -+++ b/net/sunrpc/svcauth_unix.c -@@ -655,23 +655,25 @@ static struct unix_gid *unix_gid_lookup(uid_t uid) - return NULL; - } - --static int unix_gid_find(uid_t uid, struct group_info **gip, -- struct svc_rqst *rqstp) -+static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) - { -- struct unix_gid *ug = unix_gid_lookup(uid); -+ struct unix_gid *ug; -+ struct group_info *gi; -+ int ret; -+ -+ ug = unix_gid_lookup(uid); - if (!ug) -- return -EAGAIN; -- switch (cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle)) { -+ return ERR_PTR(-EAGAIN); -+ ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle); -+ switch (ret) { - case -ENOENT: -- *gip = NULL; -- return 0; -+ return ERR_PTR(-ENOENT); - case 0: -- *gip = ug->gi; -- get_group_info(*gip); -+ gi = get_group_info(ug->gi); - cache_put(&ug->h, &unix_gid_cache); -- return 0; -+ return gi; - default: -- return -EAGAIN; -+ return ERR_PTR(-EAGAIN); - } - } - -@@ -681,6 +683,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) - struct sockaddr_in *sin; - struct sockaddr_in6 *sin6, sin6_storage; - struct ip_map *ipm; -+ struct group_info *gi; -+ struct svc_cred *cred = &rqstp->rq_cred; - - switch (rqstp->rq_addr.ss_family) { - case AF_INET: -@@ -722,6 +726,17 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) - ip_map_cached_put(rqstp, ipm); - break; - } -+ -+ gi = unix_gid_find(cred->cr_uid, rqstp); -+ switch (PTR_ERR(gi)) { -+ case -EAGAIN: -+ return SVC_DROP; -+ case -ENOENT: -+ break; -+ default: -+ put_group_info(cred->cr_group_info); -+ cred->cr_group_info = gi; -+ } - return SVC_OK; - } - -@@ -818,19 +833,11 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) - slen = svc_getnl(argv); /* gids length */ - if (slen > 16 || (len -= (slen + 2)*4) < 0) - goto badcred; -- if (unix_gid_find(cred->cr_uid, &cred->cr_group_info, rqstp) -- == -EAGAIN) -+ cred->cr_group_info = groups_alloc(slen); -+ if (cred->cr_group_info == NULL) - return SVC_DROP; -- if (cred->cr_group_info == NULL) { -- cred->cr_group_info = groups_alloc(slen); -- if (cred->cr_group_info == NULL) -- return SVC_DROP; -- for (i = 0; i < slen; i++) -- GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); -- } else { -- for (i = 0; i < slen ; i++) -- svc_getnl(argv); -- } -+ for (i = 0; i < slen; i++) -+ GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); - if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { - *authp = rpc_autherr_badverf; - return SVC_DENIED; diff --git a/queue-2.6.33/sched-use-proper-type-in-sched_getaffinity.patch b/queue-2.6.33/sched-use-proper-type-in-sched_getaffinity.patch new file mode 100644 index 00000000000..8be8456a1eb --- /dev/null +++ b/queue-2.6.33/sched-use-proper-type-in-sched_getaffinity.patch @@ -0,0 +1,40 @@ +From 8bc037fb89bb3104b9ae290d18c877624cd7d9cc Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Wed, 17 Mar 2010 09:36:58 +0900 +Subject: sched: Use proper type in sched_getaffinity() + +From: KOSAKI Motohiro + +commit 8bc037fb89bb3104b9ae290d18c877624cd7d9cc upstream. + +Using the proper type fixes the following compiler warning: + + kernel/sched.c:4850: warning: comparison of distinct pointer types lacks a cast + +Signed-off-by: KOSAKI Motohiro +Cc: torvalds@linux-foundation.org +Cc: travis@sgi.com +Cc: peterz@infradead.org +Cc: drepper@redhat.com +Cc: rja@sgi.com +Cc: sharyath@in.ibm.com +Cc: steiner@sgi.com +LKML-Reference: <20100317090046.4C79.A69D9226@jp.fujitsu.com> +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -6727,7 +6727,7 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t + + ret = sched_getaffinity(pid, mask); + if (ret == 0) { +- int retlen = min(len, cpumask_size()); ++ size_t retlen = min_t(size_t, len, cpumask_size()); + + if (copy_to_user(user_mask_ptr, mask, retlen)) + ret = -EFAULT; diff --git a/queue-2.6.33/series b/queue-2.6.33/series index 5fbb49d04e8..718b8cc2c2a 100644 --- a/queue-2.6.33/series +++ b/queue-2.6.33/series @@ -125,4 +125,13 @@ module-fix-__module_ref_addr.patch md-deal-with-merge_bvec_fn-in-component-devices-better.patch powerpc-fix-smp-build-with-disabled-cpu-hotplugging.patch ext4-fix-async-i-o-writes-beyond-4gb-to-a-sparse-file.patch -nfsd4-don-t-try-to-map-gid-s-in-generic-rpc-code.patch +sched-use-proper-type-in-sched_getaffinity.patch +0001-KVM-VMX-Update-instruction-length-on-intercepted-BP.patch +0002-KVM-SVM-Fix-memory-leaks-that-happen-when-svm_create.patch +0003-KVM-Don-t-spam-kernel-log-when-injecting-exceptions-.patch +0004-KVM-allow-bit-10-to-be-cleared-in-MSR_IA32_MC4_CTL.patch +0005-KVM-VMX-Save-restore-rflags.vm-correctly-in-real-mod.patch +0006-KVM-MMU-fix-kvm_mmu_zap_page-and-its-calling-path.patch +0007-KVM-fix-the-handling-of-dirty-bitmaps-to-avoid-overf.patch +0008-KVM-Increase-NR_IOBUS_DEVS-limit-to-200.patch +0009-KVM-x86-Fix-TSS-size-check-for-16-bit-tasks.patch -- 2.47.3